path: root/contrib/llvm/tools/lld
diff options
Diffstat (limited to 'contrib/llvm/tools/lld')
-rw-r--r--contrib/llvm/tools/lld/docs/_static/favicon.icobin0 -> 1150 bytes
-rw-r--r--contrib/llvm/tools/lld/docs/hello.pngbin0 -> 27616 bytes
-rw-r--r--contrib/llvm/tools/lld/docs/llvm-theme/static/contents.pngbin0 -> 202 bytes
-rw-r--r--contrib/llvm/tools/lld/docs/llvm-theme/static/logo.pngbin0 -> 9865 bytes
-rw-r--r--contrib/llvm/tools/lld/docs/llvm-theme/static/navigation.pngbin0 -> 218 bytes
228 files changed, 70662 insertions, 0 deletions
diff --git a/contrib/llvm/tools/lld/.arcconfig b/contrib/llvm/tools/lld/.arcconfig
new file mode 100644
index 000000000000..c8a8e079023f
--- /dev/null
+++ b/contrib/llvm/tools/lld/.arcconfig
@@ -0,0 +1,4 @@
+ "repository.callsign" : "LLD",
+ "conduit_uri" : "https://reviews.llvm.org/"
diff --git a/contrib/llvm/tools/lld/.clang-format b/contrib/llvm/tools/lld/.clang-format
new file mode 100644
index 000000000000..9b3aa8b7213b
--- /dev/null
+++ b/contrib/llvm/tools/lld/.clang-format
@@ -0,0 +1 @@
+BasedOnStyle: LLVM
diff --git a/contrib/llvm/tools/lld/.gitignore b/contrib/llvm/tools/lld/.gitignore
new file mode 100644
index 000000000000..0a288ee8ce96
--- /dev/null
+++ b/contrib/llvm/tools/lld/.gitignore
@@ -0,0 +1,24 @@
+# This file specifies intentionally untracked files that git should ignore.
+# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html
+# File extensions to be ignored anywhere in the tree.
+# Temp files created by most text editors.
+# Merge files created by git.
+# Byte compiled python modules.
+# vim swap files
+# Mac OS X Finder layout info
+# Directories to be ignored.
+# Sphinx build files.
diff --git a/contrib/llvm/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/CMakeLists.txt
new file mode 100644
index 000000000000..e2fbdbfbbb47
--- /dev/null
+++ b/contrib/llvm/tools/lld/CMakeLists.txt
@@ -0,0 +1,226 @@
+# Check if lld is built as a standalone project.
+ project(lld)
+ cmake_minimum_required(VERSION 3.4.3)
+ find_program(LLVM_CONFIG_PATH "llvm-config" DOC "Path to llvm-config binary")
+ message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH")
+ endif()
+ execute_process(COMMAND "${LLVM_CONFIG_PATH}"
+ "--obj-root"
+ "--includedir"
+ "--cmakedir"
+ "--src-root"
+ message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
+ endif()
+ string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" LLVM_CONFIG_OUTPUT "${LLVM_CONFIG_OUTPUT}")
+ set(LLVM_OBJ_ROOT ${OBJ_ROOT} CACHE PATH "path to LLVM build tree")
+ set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "path to llvm/include")
+ set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
+ if(NOT EXISTS "${LLVM_CMAKE_PATH}/LLVMConfig.cmake")
+ message(FATAL_ERROR "LLVMConfig.cmake not found")
+ endif()
+ include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake")
+ include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS})
+ link_directories(${LLVM_LIBRARY_DIRS})
+ include(AddLLVM)
+ include(TableGen)
+ include(HandleLLVMOptions)
+ include(FindPythonInterp)
+ message(FATAL_ERROR
+"Unable to find Python interpreter, required for testing.
+Please install Python or specify the PYTHON_EXECUTABLE CMake variable.")
+ endif()
+ message(FATAL_ERROR "Python 2.7 or newer is required")
+ endif()
+ # Check prebuilt llvm/utils.
+ endif()
+ if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
+ # Note: path not really used, except for checking if lit was found
+ set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
+ add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/FileCheck utils/FileCheck)
+ add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/not utils/not)
+ set(LLD_TEST_DEPS FileCheck not)
+ endif()
+ set(UNITTEST_DIR ${LLVM_MAIN_SRC_DIR}/utils/unittest)
+ if(EXISTS ${UNITTEST_DIR}/googletest/include/gtest/gtest.h
+ add_subdirectory(${UNITTEST_DIR} utils/unittest)
+ endif()
+ else()
+ # Seek installed Lit.
+ find_program(LLVM_LIT
+ NAMES llvm-lit lit.py lit
+ PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit"
+ DOC "Path to lit.py")
+ endif()
+ if(LLVM_LIT)
+ # Define the default arguments to use with 'lit', and an option for the user
+ # to override.
+ set(LIT_ARGS_DEFAULT "-sv")
+ set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
+ endif()
+ set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
+ # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
+ set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
+ endif()
+ else()
+ endif()
+ endif()
+# Compute the LLD version from the LLVM version.
+string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" LLD_VERSION
+message(STATUS "LLD version: ${LLD_VERSION}")
+string(REGEX REPLACE "([0-9]+)\\.[0-9]+(\\.[0-9]+)?" "\\1" LLD_VERSION_MAJOR
+string(REGEX REPLACE "[0-9]+\\.([0-9]+)(\\.[0-9]+)?" "\\1" LLD_VERSION_MINOR
+# Determine LLD revision and repository.
+# TODO: Figure out a way to get the revision and the repository on windows.
+ execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetSourceVersion ${LLD_SOURCE_DIR}
+ execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetRepositoryPath ${LLD_SOURCE_DIR}
+ # Replace newline characters with spaces
+ # Remove leading spaces
+ # Remove trailing spaces
+ endif()
+ # Replace newline characters with spaces
+ # Remove leading spaces
+ # Remove trailing spaces
+ endif()
+endif ()
+# Configure the Version.inc file.
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Common/Version.inc.in
+ ${CMAKE_CURRENT_BINARY_DIR}/include/lld/Common/Version.inc)
+ message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite "
+"the makefiles distributed with LLVM. Please create a directory and run cmake "
+"from there, passing the path to this source directory as the last argument. "
+"This process created the file `CMakeCache.txt' and the directory "
+"`CMakeFiles'. Please delete them.")
+list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules")
+ "Enable VTune user task tracking."
+ OFF)
+ find_package(VTune)
+ include_directories(${VTune_INCLUDE_DIRS})
+ add_definitions(-DLLD_HAS_VTUNE)
+ endif()
+ "Build the lld tools. If OFF, just generate build targets." ON)
+if (MSVC)
+ add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.'
+ add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header.
+ )
+ install(DIRECTORY include/
+ PATTERN "*.h"
+ )
+ add_subdirectory(test)
+ add_subdirectory(unittests)
diff --git a/contrib/llvm/tools/lld/CODE_OWNERS.TXT b/contrib/llvm/tools/lld/CODE_OWNERS.TXT
new file mode 100644
index 000000000000..f019a87553aa
--- /dev/null
+++ b/contrib/llvm/tools/lld/CODE_OWNERS.TXT
@@ -0,0 +1,22 @@
+This file is a list of the people responsible for ensuring that patches for a
+particular part of LLD are reviewed, either by themself or by someone else.
+They are also the gatekeepers for their part of LLD, with the final word on
+what goes in or not.
+The list is sorted by surname and formatted to allow easy grepping and
+beautification by scripts. The fields are: name (N), email (E), web-address
+(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+(S). Each entry should contain at least the (N), (E) and (D) fields.
+N: Rui Ueyama
+E: ruiu@google.com
+D: COFF, ELF backends (COFF/* ELF/*)
+N: Lang Hames, Nick Kledzik
+E: lhames@gmail.com, kledzik@apple.com
+D: Mach-O backend
+N: Sam Clegg
+E: sbc@chromium.org
+D: WebAssembly backend (wasm/*)
diff --git a/contrib/llvm/tools/lld/COFF/CMakeLists.txt b/contrib/llvm/tools/lld/COFF/CMakeLists.txt
new file mode 100644
index 000000000000..bb241e788c19
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/CMakeLists.txt
@@ -0,0 +1,47 @@
+tablegen(LLVM Options.inc -gen-opt-parser-defs)
+ set(tablegen_deps intrinsics_gen)
+ Chunks.cpp
+ DLL.cpp
+ Driver.cpp
+ DriverUtils.cpp
+ ICF.cpp
+ InputFiles.cpp
+ LTO.cpp
+ MapFile.cpp
+ MarkLive.cpp
+ MinGW.cpp
+ PDB.cpp
+ SymbolTable.cpp
+ Symbols.cpp
+ Writer.cpp
+ BinaryFormat
+ Core
+ DebugInfoCodeView
+ DebugInfoMSF
+ DebugInfoPDB
+ LibDriver
+ MC
+ Object
+ Option
+ Support
+ WindowsManifest
+ lldCommon
+ COFFOptionsTableGen
+ ${tablegen_deps}
+ )
diff --git a/contrib/llvm/tools/lld/COFF/Chunks.cpp b/contrib/llvm/tools/lld/COFF/Chunks.cpp
new file mode 100644
index 000000000000..2bb9aa01e539
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Chunks.cpp
@@ -0,0 +1,883 @@
+//===- Chunks.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Chunks.h"
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "Writer.h"
+#include "SymbolTable.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::COFF;
+using llvm::support::ulittle32_t;
+namespace lld {
+namespace coff {
+SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
+ : Chunk(SectionKind), Repl(this), Header(H), File(F),
+ Relocs(File->getCOFFObj()->getRelocations(Header)) {
+ // Initialize SectionName.
+ File->getCOFFObj()->getSectionName(Header, SectionName);
+ Alignment = Header->getAlignment();
+ // If linker GC is disabled, every chunk starts out alive. If linker GC is
+ // enabled, treat non-comdat sections as roots. Generally optimized object
+ // files will be built with -ffunction-sections or /Gy, so most things worth
+ // stripping will be in a comdat.
+ Live = !Config->DoGC || !isCOMDAT();
+// Initialize the RelocTargets vector, to allow redirecting certain relocations
+// to a thunk instead of the actual symbol the relocation's symbol table index
+// indicates.
+void SectionChunk::readRelocTargets() {
+ assert(RelocTargets.empty());
+ RelocTargets.reserve(Relocs.size());
+ for (const coff_relocation &Rel : Relocs)
+ RelocTargets.push_back(File->getSymbol(Rel.SymbolTableIndex));
+// Reset RelocTargets to their original targets before thunks were added.
+void SectionChunk::resetRelocTargets() {
+ for (size_t I = 0, E = Relocs.size(); I < E; ++I)
+ RelocTargets[I] = File->getSymbol(Relocs[I].SymbolTableIndex);
+static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); }
+static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); }
+static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); }
+static void or16(uint8_t *P, uint16_t V) { write16le(P, read16le(P) | V); }
+static void or32(uint8_t *P, uint32_t V) { write32le(P, read32le(P) | V); }
+// Verify that given sections are appropriate targets for SECREL
+// relocations. This check is relaxed because unfortunately debug
+// sections have section-relative relocations against absolute symbols.
+static bool checkSecRel(const SectionChunk *Sec, OutputSection *OS) {
+ if (OS)
+ return true;
+ if (Sec->isCodeView())
+ return false;
+ error("SECREL relocation cannot be applied to absolute symbols");
+ return false;
+static void applySecRel(const SectionChunk *Sec, uint8_t *Off,
+ OutputSection *OS, uint64_t S) {
+ if (!checkSecRel(Sec, OS))
+ return;
+ uint64_t SecRel = S - OS->getRVA();
+ if (SecRel > UINT32_MAX) {
+ error("overflow in SECREL relocation in section: " + Sec->getSectionName());
+ return;
+ }
+ add32(Off, SecRel);
+static void applySecIdx(uint8_t *Off, OutputSection *OS) {
+ // Absolute symbol doesn't have section index, but section index relocation
+ // against absolute symbol should be resolved to one plus the last output
+ // section index. This is required for compatibility with MSVC.
+ if (OS)
+ add16(Off, OS->SectionIndex);
+ else
+ add16(Off, DefinedAbsolute::NumOutputSections + 1);
+void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS,
+ uint64_t S, uint64_t P) const {
+ switch (Type) {
+ case IMAGE_REL_AMD64_ADDR32: add32(Off, S + Config->ImageBase); break;
+ case IMAGE_REL_AMD64_ADDR64: add64(Off, S + Config->ImageBase); break;
+ case IMAGE_REL_AMD64_ADDR32NB: add32(Off, S); break;
+ case IMAGE_REL_AMD64_REL32: add32(Off, S - P - 4); break;
+ case IMAGE_REL_AMD64_REL32_1: add32(Off, S - P - 5); break;
+ case IMAGE_REL_AMD64_REL32_2: add32(Off, S - P - 6); break;
+ case IMAGE_REL_AMD64_REL32_3: add32(Off, S - P - 7); break;
+ case IMAGE_REL_AMD64_REL32_4: add32(Off, S - P - 8); break;
+ case IMAGE_REL_AMD64_REL32_5: add32(Off, S - P - 9); break;
+ case IMAGE_REL_AMD64_SECTION: applySecIdx(Off, OS); break;
+ case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, OS, S); break;
+ default:
+ error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
+ toString(File));
+ }
+void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS,
+ uint64_t S, uint64_t P) const {
+ switch (Type) {
+ case IMAGE_REL_I386_ABSOLUTE: break;
+ case IMAGE_REL_I386_DIR32: add32(Off, S + Config->ImageBase); break;
+ case IMAGE_REL_I386_DIR32NB: add32(Off, S); break;
+ case IMAGE_REL_I386_REL32: add32(Off, S - P - 4); break;
+ case IMAGE_REL_I386_SECTION: applySecIdx(Off, OS); break;
+ case IMAGE_REL_I386_SECREL: applySecRel(this, Off, OS, S); break;
+ default:
+ error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
+ toString(File));
+ }
+static void applyMOV(uint8_t *Off, uint16_t V) {
+ write16le(Off, (read16le(Off) & 0xfbf0) | ((V & 0x800) >> 1) | ((V >> 12) & 0xf));
+ write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff));
+static uint16_t readMOV(uint8_t *Off, bool MOVT) {
+ uint16_t Op1 = read16le(Off);
+ if ((Op1 & 0xfbf0) != (MOVT ? 0xf2c0 : 0xf240))
+ error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") +
+ " instruction in MOV32T relocation");
+ uint16_t Op2 = read16le(Off + 2);
+ if ((Op2 & 0x8000) != 0)
+ error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") +
+ " instruction in MOV32T relocation");
+ return (Op2 & 0x00ff) | ((Op2 >> 4) & 0x0700) | ((Op1 << 1) & 0x0800) |
+ ((Op1 & 0x000f) << 12);
+void applyMOV32T(uint8_t *Off, uint32_t V) {
+ uint16_t ImmW = readMOV(Off, false); // read MOVW operand
+ uint16_t ImmT = readMOV(Off + 4, true); // read MOVT operand
+ uint32_t Imm = ImmW | (ImmT << 16);
+ V += Imm; // add the immediate offset
+ applyMOV(Off, V); // set MOVW operand
+ applyMOV(Off + 4, V >> 16); // set MOVT operand
+static void applyBranch20T(uint8_t *Off, int32_t V) {
+ if (!isInt<21>(V))
+ error("relocation out of range");
+ uint32_t S = V < 0 ? 1 : 0;
+ uint32_t J1 = (V >> 19) & 1;
+ uint32_t J2 = (V >> 18) & 1;
+ or16(Off, (S << 10) | ((V >> 12) & 0x3f));
+ or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff));
+void applyBranch24T(uint8_t *Off, int32_t V) {
+ if (!isInt<25>(V))
+ error("relocation out of range");
+ uint32_t S = V < 0 ? 1 : 0;
+ uint32_t J1 = ((~V >> 23) & 1) ^ S;
+ uint32_t J2 = ((~V >> 22) & 1) ^ S;
+ or16(Off, (S << 10) | ((V >> 12) & 0x3ff));
+ // Clear out the J1 and J2 bits which may be set.
+ write16le(Off + 2, (read16le(Off + 2) & 0xd000) | (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff));
+void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS,
+ uint64_t S, uint64_t P) const {
+ // Pointer to thumb code must have the LSB set.
+ uint64_t SX = S;
+ if (OS && (OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE))
+ SX |= 1;
+ switch (Type) {
+ case IMAGE_REL_ARM_ADDR32: add32(Off, SX + Config->ImageBase); break;
+ case IMAGE_REL_ARM_ADDR32NB: add32(Off, SX); break;
+ case IMAGE_REL_ARM_MOV32T: applyMOV32T(Off, SX + Config->ImageBase); break;
+ case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, SX - P - 4); break;
+ case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, SX - P - 4); break;
+ case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, SX - P - 4); break;
+ case IMAGE_REL_ARM_SECTION: applySecIdx(Off, OS); break;
+ case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, OS, S); break;
+ default:
+ error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
+ toString(File));
+ }
+// Interpret the existing immediate value as a byte offset to the
+// target symbol, then update the instruction with the immediate as
+// the page offset from the current instruction to the target.
+void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
+ uint32_t Orig = read32le(Off);
+ uint64_t Imm = ((Orig >> 29) & 0x3) | ((Orig >> 3) & 0x1FFFFC);
+ S += Imm;
+ Imm = (S >> Shift) - (P >> Shift);
+ uint32_t ImmLo = (Imm & 0x3) << 29;
+ uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
+ uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
+ write32le(Off, (Orig & ~Mask) | ImmLo | ImmHi);
+// Update the immediate field in a AARCH64 ldr, str, and add instruction.
+// Optionally limit the range of the written immediate by one or more bits
+// (RangeLimit).
+void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) {
+ uint32_t Orig = read32le(Off);
+ Imm += (Orig >> 10) & 0xFFF;
+ Orig &= ~(0xFFF << 10);
+ write32le(Off, Orig | ((Imm & (0xFFF >> RangeLimit)) << 10));
+// Add the 12 bit page offset to the existing immediate.
+// Ldr/str instructions store the opcode immediate scaled
+// by the load/store size (giving a larger range for larger
+// loads/stores). The immediate is always (both before and after
+// fixing up the relocation) stored scaled similarly.
+// Even if larger loads/stores have a larger range, limit the
+// effective offset to 12 bit, since it is intended to be a
+// page offset.
+static void applyArm64Ldr(uint8_t *Off, uint64_t Imm) {
+ uint32_t Orig = read32le(Off);
+ uint32_t Size = Orig >> 30;
+ // 0x04000000 indicates SIMD/FP registers
+ // 0x00800000 indicates 128 bit
+ if ((Orig & 0x4800000) == 0x4800000)
+ Size += 4;
+ if ((Imm & ((1 << Size) - 1)) != 0)
+ error("misaligned ldr/str offset");
+ applyArm64Imm(Off, Imm >> Size, Size);
+static void applySecRelLow12A(const SectionChunk *Sec, uint8_t *Off,
+ OutputSection *OS, uint64_t S) {
+ if (checkSecRel(Sec, OS))
+ applyArm64Imm(Off, (S - OS->getRVA()) & 0xfff, 0);
+static void applySecRelHigh12A(const SectionChunk *Sec, uint8_t *Off,
+ OutputSection *OS, uint64_t S) {
+ if (!checkSecRel(Sec, OS))
+ return;
+ uint64_t SecRel = (S - OS->getRVA()) >> 12;
+ if (0xfff < SecRel) {
+ error("overflow in SECREL_HIGH12A relocation in section: " +
+ Sec->getSectionName());
+ return;
+ }
+ applyArm64Imm(Off, SecRel & 0xfff, 0);
+static void applySecRelLdr(const SectionChunk *Sec, uint8_t *Off,
+ OutputSection *OS, uint64_t S) {
+ if (checkSecRel(Sec, OS))
+ applyArm64Ldr(Off, (S - OS->getRVA()) & 0xfff);
+void applyArm64Branch26(uint8_t *Off, int64_t V) {
+ if (!isInt<28>(V))
+ error("relocation out of range");
+ or32(Off, (V & 0x0FFFFFFC) >> 2);
+static void applyArm64Branch19(uint8_t *Off, int64_t V) {
+ if (!isInt<21>(V))
+ error("relocation out of range");
+ or32(Off, (V & 0x001FFFFC) << 3);
+static void applyArm64Branch14(uint8_t *Off, int64_t V) {
+ if (!isInt<16>(V))
+ error("relocation out of range");
+ or32(Off, (V & 0x0000FFFC) << 3);
+void SectionChunk::applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS,
+ uint64_t S, uint64_t P) const {
+ switch (Type) {
+ case IMAGE_REL_ARM64_PAGEBASE_REL21: applyArm64Addr(Off, S, P, 12); break;
+ case IMAGE_REL_ARM64_REL21: applyArm64Addr(Off, S, P, 0); break;
+ case IMAGE_REL_ARM64_PAGEOFFSET_12A: applyArm64Imm(Off, S & 0xfff, 0); break;
+ case IMAGE_REL_ARM64_PAGEOFFSET_12L: applyArm64Ldr(Off, S & 0xfff); break;
+ case IMAGE_REL_ARM64_BRANCH26: applyArm64Branch26(Off, S - P); break;
+ case IMAGE_REL_ARM64_BRANCH19: applyArm64Branch19(Off, S - P); break;
+ case IMAGE_REL_ARM64_BRANCH14: applyArm64Branch14(Off, S - P); break;
+ case IMAGE_REL_ARM64_ADDR32: add32(Off, S + Config->ImageBase); break;
+ case IMAGE_REL_ARM64_ADDR32NB: add32(Off, S); break;
+ case IMAGE_REL_ARM64_ADDR64: add64(Off, S + Config->ImageBase); break;
+ case IMAGE_REL_ARM64_SECREL: applySecRel(this, Off, OS, S); break;
+ case IMAGE_REL_ARM64_SECREL_LOW12A: applySecRelLow12A(this, Off, OS, S); break;
+ case IMAGE_REL_ARM64_SECREL_HIGH12A: applySecRelHigh12A(this, Off, OS, S); break;
+ case IMAGE_REL_ARM64_SECREL_LOW12L: applySecRelLdr(this, Off, OS, S); break;
+ case IMAGE_REL_ARM64_SECTION: applySecIdx(Off, OS); break;
+ default:
+ error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
+ toString(File));
+ }
+static void maybeReportRelocationToDiscarded(const SectionChunk *FromChunk,
+ Defined *Sym,
+ const coff_relocation &Rel) {
+ // Don't report these errors when the relocation comes from a debug info
+ // section or in mingw mode. MinGW mode object files (built by GCC) can
+ // have leftover sections with relocations against discarded comdat
+ // sections. Such sections are left as is, with relocations untouched.
+ if (FromChunk->isCodeView() || FromChunk->isDWARF() || Config->MinGW)
+ return;
+ // Get the name of the symbol. If it's null, it was discarded early, so we
+ // have to go back to the object file.
+ ObjFile *File = FromChunk->File;
+ StringRef Name;
+ if (Sym) {
+ Name = Sym->getName();
+ } else {
+ COFFSymbolRef COFFSym =
+ check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex));
+ File->getCOFFObj()->getSymbolName(COFFSym, Name);
+ }
+ error("relocation against symbol in discarded section: " + Name +
+ getSymbolLocations(File, Rel.SymbolTableIndex));
+void SectionChunk::writeTo(uint8_t *Buf) const {
+ if (!hasData())
+ return;
+ // Copy section contents from source object file to output file.
+ ArrayRef<uint8_t> A = getContents();
+ if (!A.empty())
+ memcpy(Buf + OutputSectionOff, A.data(), A.size());
+ // Apply relocations.
+ size_t InputSize = getSize();
+ for (size_t I = 0, E = Relocs.size(); I < E; I++) {
+ const coff_relocation &Rel = Relocs[I];
+ // Check for an invalid relocation offset. This check isn't perfect, because
+ // we don't have the relocation size, which is only known after checking the
+ // machine and relocation type. As a result, a relocation may overwrite the
+ // beginning of the following input section.
+ if (Rel.VirtualAddress >= InputSize) {
+ error("relocation points beyond the end of its parent section");
+ continue;
+ }
+ uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress;
+ // Use the potentially remapped Symbol instead of the one that the
+ // relocation points to.
+ auto *Sym = dyn_cast_or_null<Defined>(RelocTargets[I]);
+ // Get the output section of the symbol for this relocation. The output
+ // section is needed to compute SECREL and SECTION relocations used in debug
+ // info.
+ Chunk *C = Sym ? Sym->getChunk() : nullptr;
+ OutputSection *OS = C ? C->getOutputSection() : nullptr;
+ // Skip the relocation if it refers to a discarded section, and diagnose it
+ // as an error if appropriate. If a symbol was discarded early, it may be
+ // null. If it was discarded late, the output section will be null, unless
+ // it was an absolute or synthetic symbol.
+ if (!Sym ||
+ (!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym))) {
+ maybeReportRelocationToDiscarded(this, Sym, Rel);
+ continue;
+ }
+ uint64_t S = Sym->getRVA();
+ // Compute the RVA of the relocation for relative relocations.
+ uint64_t P = RVA + Rel.VirtualAddress;
+ switch (Config->Machine) {
+ case AMD64:
+ applyRelX64(Off, Rel.Type, OS, S, P);
+ break;
+ case I386:
+ applyRelX86(Off, Rel.Type, OS, S, P);
+ break;
+ case ARMNT:
+ applyRelARM(Off, Rel.Type, OS, S, P);
+ break;
+ case ARM64:
+ applyRelARM64(Off, Rel.Type, OS, S, P);
+ break;
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+ }
+void SectionChunk::addAssociative(SectionChunk *Child) {
+ AssocChildren.push_back(Child);
+static uint8_t getBaserelType(const coff_relocation &Rel) {
+ switch (Config->Machine) {
+ case AMD64:
+ if (Rel.Type == IMAGE_REL_AMD64_ADDR64)
+ case I386:
+ if (Rel.Type == IMAGE_REL_I386_DIR32)
+ case ARMNT:
+ if (Rel.Type == IMAGE_REL_ARM_ADDR32)
+ if (Rel.Type == IMAGE_REL_ARM_MOV32T)
+ case ARM64:
+ if (Rel.Type == IMAGE_REL_ARM64_ADDR64)
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+// Windows-specific.
+// Collect all locations that contain absolute addresses, which need to be
+// fixed by the loader if load-time relocation is needed.
+// Only called when base relocation is enabled.
+void SectionChunk::getBaserels(std::vector<Baserel> *Res) {
+ for (size_t I = 0, E = Relocs.size(); I < E; I++) {
+ const coff_relocation &Rel = Relocs[I];
+ uint8_t Ty = getBaserelType(Rel);
+ continue;
+ // Use the potentially remapped Symbol instead of the one that the
+ // relocation points to.
+ Symbol *Target = RelocTargets[I];
+ if (!Target || isa<DefinedAbsolute>(Target))
+ continue;
+ Res->emplace_back(RVA + Rel.VirtualAddress, Ty);
+ }
+// MinGW specific.
+// Check whether a static relocation of type Type can be deferred and
+// handled at runtime as a pseudo relocation (for references to a module
+// local variable, which turned out to actually need to be imported from
+// another DLL) This returns the size the relocation is supposed to update,
+// in bits, or 0 if the relocation cannot be handled as a runtime pseudo
+// relocation.
+static int getRuntimePseudoRelocSize(uint16_t Type) {
+ // Relocations that either contain an absolute address, or a plain
+ // relative offset, since the runtime pseudo reloc implementation
+ // adds 8/16/32/64 bit values to a memory address.
+ //
+ // Given a pseudo relocation entry,
+ //
+ // typedef struct {
+ // DWORD sym;
+ // DWORD target;
+ // DWORD flags;
+ // } runtime_pseudo_reloc_item_v2;
+ //
+ // the runtime relocation performs this adjustment:
+ // *(base + .target) += *(base + .sym) - (base + .sym)
+ //
+ // This works for both absolute addresses (IMAGE_REL_*_ADDR32/64,
+ // IMAGE_REL_I386_DIR32, where the memory location initially contains
+ // the address of the IAT slot, and for relative addresses (IMAGE_REL*_REL32),
+ // where the memory location originally contains the relative offset to the
+ // IAT slot.
+ //
+ // This requires the target address to be writable, either directly out of
+ // the image, or temporarily changed at runtime with VirtualProtect.
+ // Since this only operates on direct address values, it doesn't work for
+ // ARM/ARM64 relocations, other than the plain ADDR32/ADDR64 relocations.
+ switch (Config->Machine) {
+ case AMD64:
+ switch (Type) {
+ case IMAGE_REL_AMD64_ADDR64:
+ return 64;
+ case IMAGE_REL_AMD64_ADDR32:
+ case IMAGE_REL_AMD64_REL32:
+ case IMAGE_REL_AMD64_REL32_1:
+ case IMAGE_REL_AMD64_REL32_2:
+ case IMAGE_REL_AMD64_REL32_3:
+ case IMAGE_REL_AMD64_REL32_4:
+ case IMAGE_REL_AMD64_REL32_5:
+ return 32;
+ default:
+ return 0;
+ }
+ case I386:
+ switch (Type) {
+ case IMAGE_REL_I386_DIR32:
+ case IMAGE_REL_I386_REL32:
+ return 32;
+ default:
+ return 0;
+ }
+ case ARMNT:
+ switch (Type) {
+ return 32;
+ default:
+ return 0;
+ }
+ case ARM64:
+ switch (Type) {
+ case IMAGE_REL_ARM64_ADDR64:
+ return 64;
+ case IMAGE_REL_ARM64_ADDR32:
+ return 32;
+ default:
+ return 0;
+ }
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+// MinGW specific.
+// Append information to the provided vector about all relocations that
+// need to be handled at runtime as runtime pseudo relocations (references
+// to a module local variable, which turned out to actually need to be
+// imported from another DLL).
+void SectionChunk::getRuntimePseudoRelocs(
+ std::vector<RuntimePseudoReloc> &Res) {
+ for (const coff_relocation &Rel : Relocs) {
+ auto *Target =
+ dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex));
+ if (!Target || !Target->IsRuntimePseudoReloc)
+ continue;
+ int SizeInBits = getRuntimePseudoRelocSize(Rel.Type);
+ if (SizeInBits == 0) {
+ error("unable to automatically import from " + Target->getName() +
+ " with relocation type " +
+ File->getCOFFObj()->getRelocationTypeName(Rel.Type) + " in " +
+ toString(File));
+ continue;
+ }
+ // SizeInBits is used to initialize the Flags field; currently no
+ // other flags are defined.
+ Res.emplace_back(
+ RuntimePseudoReloc(Target, this, Rel.VirtualAddress, SizeInBits));
+ }
+bool SectionChunk::hasData() const {
+ return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
+uint32_t SectionChunk::getOutputCharacteristics() const {
+ return Header->Characteristics & (PermMask | TypeMask);
+bool SectionChunk::isCOMDAT() const {
+ return Header->Characteristics & IMAGE_SCN_LNK_COMDAT;
+void SectionChunk::printDiscardedMessage() const {
+ // Removed by dead-stripping. If it's removed by ICF, ICF already
+ // printed out the name, so don't repeat that here.
+ if (Sym && this == Repl)
+ message("Discarded " + Sym->getName());
+StringRef SectionChunk::getDebugName() {
+ if (Sym)
+ return Sym->getName();
+ return "";
+ArrayRef<uint8_t> SectionChunk::getContents() const {
+ ArrayRef<uint8_t> A;
+ File->getCOFFObj()->getSectionContents(Header, A);
+ return A;
+void SectionChunk::replace(SectionChunk *Other) {
+ Alignment = std::max(Alignment, Other->Alignment);
+ Other->Repl = Repl;
+ Other->Live = false;
+uint32_t SectionChunk::getSectionNumber() const {
+ DataRefImpl R;
+ R.p = reinterpret_cast<uintptr_t>(Header);
+ SectionRef S(R, File->getCOFFObj());
+ return S.getIndex() + 1;
+CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
+ // Common symbols are aligned on natural boundaries up to 32 bytes.
+ // This is what MSVC link.exe does.
+ Alignment = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue()));
+uint32_t CommonChunk::getOutputCharacteristics() const {
+void StringChunk::writeTo(uint8_t *Buf) const {
+ memcpy(Buf + OutputSectionOff, Str.data(), Str.size());
+ Buf[OutputSectionOff + Str.size()] = '\0';
+ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) {
+ // Intel Optimization Manual says that all branch targets
+ // should be 16-byte aligned. MSVC linker does this too.
+ Alignment = 16;
+void ImportThunkChunkX64::writeTo(uint8_t *Buf) const {
+ memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86));
+ // The first two bytes is a JMP instruction. Fill its operand.
+ write32le(Buf + OutputSectionOff + 2, ImpSymbol->getRVA() - RVA - getSize());
+void ImportThunkChunkX86::getBaserels(std::vector<Baserel> *Res) {
+ Res->emplace_back(getRVA() + 2);
+void ImportThunkChunkX86::writeTo(uint8_t *Buf) const {
+ memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86));
+ // The first two bytes is a JMP instruction. Fill its operand.
+ write32le(Buf + OutputSectionOff + 2,
+ ImpSymbol->getRVA() + Config->ImageBase);
+void ImportThunkChunkARM::getBaserels(std::vector<Baserel> *Res) {
+ Res->emplace_back(getRVA(), IMAGE_REL_BASED_ARM_MOV32T);
+void ImportThunkChunkARM::writeTo(uint8_t *Buf) const {
+ memcpy(Buf + OutputSectionOff, ImportThunkARM, sizeof(ImportThunkARM));
+ // Fix mov.w and mov.t operands.
+ applyMOV32T(Buf + OutputSectionOff, ImpSymbol->getRVA() + Config->ImageBase);
+void ImportThunkChunkARM64::writeTo(uint8_t *Buf) const {
+ int64_t Off = ImpSymbol->getRVA() & 0xfff;
+ memcpy(Buf + OutputSectionOff, ImportThunkARM64, sizeof(ImportThunkARM64));
+ applyArm64Addr(Buf + OutputSectionOff, ImpSymbol->getRVA(), RVA, 12);
+ applyArm64Ldr(Buf + OutputSectionOff + 4, Off);
+// A Thumb2, PIC, non-interworking range extension thunk.
+const uint8_t ArmThunk[] = {
+ 0x40, 0xf2, 0x00, 0x0c, // P: movw ip,:lower16:S - (P + (L1-P) + 4)
+ 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P) + 4)
+ 0xe7, 0x44, // L1: add pc, ip
+size_t RangeExtensionThunkARM::getSize() const {
+ assert(Config->Machine == ARMNT);
+ return sizeof(ArmThunk);
+void RangeExtensionThunkARM::writeTo(uint8_t *Buf) const {
+ assert(Config->Machine == ARMNT);
+ uint64_t Offset = Target->getRVA() - RVA - 12;
+ memcpy(Buf + OutputSectionOff, ArmThunk, sizeof(ArmThunk));
+ applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset));
+// A position independent ARM64 adrp+add thunk, with a maximum range of
+// +/- 4 GB, which is enough for any PE-COFF.
+const uint8_t Arm64Thunk[] = {
+ 0x10, 0x00, 0x00, 0x90, // adrp x16, Dest
+ 0x10, 0x02, 0x00, 0x91, // add x16, x16, :lo12:Dest
+ 0x00, 0x02, 0x1f, 0xd6, // br x16
+size_t RangeExtensionThunkARM64::getSize() const {
+ assert(Config->Machine == ARM64);
+ return sizeof(Arm64Thunk);
+void RangeExtensionThunkARM64::writeTo(uint8_t *Buf) const {
+ assert(Config->Machine == ARM64);
+ memcpy(Buf + OutputSectionOff, Arm64Thunk, sizeof(Arm64Thunk));
+ applyArm64Addr(Buf + OutputSectionOff + 0, Target->getRVA(), RVA, 12);
+ applyArm64Imm(Buf + OutputSectionOff + 4, Target->getRVA() & 0xfff, 0);
+void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) {
+ Res->emplace_back(getRVA());
+size_t LocalImportChunk::getSize() const { return Config->Wordsize; }
+void LocalImportChunk::writeTo(uint8_t *Buf) const {
+ if (Config->is64()) {
+ write64le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase);
+ } else {
+ write32le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase);
+ }
+void RVATableChunk::writeTo(uint8_t *Buf) const {
+ ulittle32_t *Begin = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff);
+ size_t Cnt = 0;
+ for (const ChunkAndOffset &CO : Syms)
+ Begin[Cnt++] = CO.InputChunk->getRVA() + CO.Offset;
+ std::sort(Begin, Begin + Cnt);
+ assert(std::unique(Begin, Begin + Cnt) == Begin + Cnt &&
+ "RVA tables should be de-duplicated");
+// MinGW specific, for the "automatic import of variables from DLLs" feature.
+size_t PseudoRelocTableChunk::getSize() const {
+ if (Relocs.empty())
+ return 0;
+ return 12 + 12 * Relocs.size();
+// MinGW specific.
+void PseudoRelocTableChunk::writeTo(uint8_t *Buf) const {
+ if (Relocs.empty())
+ return;
+ ulittle32_t *Table = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff);
+ // This is the list header, to signal the runtime pseudo relocation v2
+ // format.
+ Table[0] = 0;
+ Table[1] = 0;
+ Table[2] = 1;
+ size_t Idx = 3;
+ for (const RuntimePseudoReloc &RPR : Relocs) {
+ Table[Idx + 0] = RPR.Sym->getRVA();
+ Table[Idx + 1] = RPR.Target->getRVA() + RPR.TargetOffset;
+ Table[Idx + 2] = RPR.Flags;
+ Idx += 3;
+ }
+// Windows-specific. This class represents a block in .reloc section.
+// The format is described here.
+// On Windows, each DLL is linked against a fixed base address and
+// usually loaded to that address. However, if there's already another
+// DLL that overlaps, the loader has to relocate it. To do that, DLLs
+// contain .reloc sections which contain offsets that need to be fixed
+// up at runtime. If the loader finds that a DLL cannot be loaded to its
+// desired base address, it loads it to somewhere else, and add <actual
+// base address> - <desired base address> to each offset that is
+// specified by the .reloc section. In ELF terms, .reloc sections
+// contain relative relocations in REL format (as opposed to RELA.)
+// This already significantly reduces the size of relocations compared
+// to ELF .rel.dyn, but Windows does more to reduce it (probably because
+// it was invented for PCs in the late '80s or early '90s.) Offsets in
+// .reloc are grouped by page where the page size is 12 bits, and
+// offsets sharing the same page address are stored consecutively to
+// represent them with less space. This is very similar to the page
+// table which is grouped by (multiple stages of) pages.
+// For example, let's say we have 0x00030, 0x00500, 0x00700, 0x00A00,
+// 0x20004, and 0x20008 in a .reloc section for x64. The uppermost 4
+// bits have a type IMAGE_REL_BASED_DIR64 or 0xA. In the section, they
+// are represented like this:
+// 0x00000 -- page address (4 bytes)
+// 16 -- size of this block (4 bytes)
+// 0xA030 -- entries (2 bytes each)
+// 0xA500
+// 0xA700
+// 0xAA00
+// 0x20000 -- page address (4 bytes)
+// 12 -- size of this block (4 bytes)
+// 0xA004 -- entries (2 bytes each)
+// 0xA008
+// Usually we have a lot of relocations for each page, so the number of
+// bytes for one .reloc entry is close to 2 bytes on average.
+BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) {
+ // Block header consists of 4 byte page RVA and 4 byte block size.
+ // Each entry is 2 byte. Last entry may be padding.
+ Data.resize(alignTo((End - Begin) * 2 + 8, 4));
+ uint8_t *P = Data.data();
+ write32le(P, Page);
+ write32le(P + 4, Data.size());
+ P += 8;
+ for (Baserel *I = Begin; I != End; ++I) {
+ write16le(P, (I->Type << 12) | (I->RVA - Page));
+ P += 2;
+ }
+void BaserelChunk::writeTo(uint8_t *Buf) const {
+ memcpy(Buf + OutputSectionOff, Data.data(), Data.size());
+uint8_t Baserel::getDefaultType() {
+ switch (Config->Machine) {
+ case AMD64:
+ case ARM64:
+ case I386:
+ case ARMNT:
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+std::map<uint32_t, MergeChunk *> MergeChunk::Instances;
+MergeChunk::MergeChunk(uint32_t Alignment)
+ : Builder(StringTableBuilder::RAW, Alignment) {
+ this->Alignment = Alignment;
+void MergeChunk::addSection(SectionChunk *C) {
+ auto *&MC = Instances[C->Alignment];
+ if (!MC)
+ MC = make<MergeChunk>(C->Alignment);
+ MC->Sections.push_back(C);
+void MergeChunk::finalizeContents() {
+ if (!Finalized) {
+ for (SectionChunk *C : Sections)
+ if (C->Live)
+ Builder.add(toStringRef(C->getContents()));
+ Builder.finalize();
+ Finalized = true;
+ }
+ for (SectionChunk *C : Sections) {
+ if (!C->Live)
+ continue;
+ size_t Off = Builder.getOffset(toStringRef(C->getContents()));
+ C->setOutputSection(Out);
+ C->setRVA(RVA + Off);
+ C->OutputSectionOff = OutputSectionOff + Off;
+ }
+uint32_t MergeChunk::getOutputCharacteristics() const {
+size_t MergeChunk::getSize() const {
+ return Builder.getSize();
+void MergeChunk::writeTo(uint8_t *Buf) const {
+ Builder.write(Buf + OutputSectionOff);
+// MinGW specific.
+size_t AbsolutePointerChunk::getSize() const { return Config->Wordsize; }
+void AbsolutePointerChunk::writeTo(uint8_t *Buf) const {
+ if (Config->is64()) {
+ write64le(Buf + OutputSectionOff, Value);
+ } else {
+ write32le(Buf + OutputSectionOff, Value);
+ }
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Chunks.h b/contrib/llvm/tools/lld/COFF/Chunks.h
new file mode 100644
index 000000000000..e132fdf8adfa
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Chunks.h
@@ -0,0 +1,527 @@
+//===- Chunks.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "InputFiles.h"
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/COFF.h"
+#include <utility>
+#include <vector>
+namespace lld {
+namespace coff {
+using llvm::COFF::ImportDirectoryTableEntry;
+using llvm::object::COFFSymbolRef;
+using llvm::object::SectionRef;
+using llvm::object::coff_relocation;
+using llvm::object::coff_section;
+class Baserel;
+class Defined;
+class DefinedImportData;
+class DefinedRegular;
+class ObjFile;
+class OutputSection;
+class RuntimePseudoReloc;
+class Symbol;
+// Mask for permissions (discardable, writable, readable, executable, etc).
+const uint32_t PermMask = 0xFE000000;
+// Mask for section types (code, data, bss).
+const uint32_t TypeMask = 0x000000E0;
+// A Chunk represents a chunk of data that will occupy space in the
+// output (if the resolver chose that). It may or may not be backed by
+// a section of an input file. It could be linker-created data, or
+// doesn't even have actual data (if common or bss).
+class Chunk {
+ enum Kind { SectionKind, OtherKind };
+ Kind kind() const { return ChunkKind; }
+ virtual ~Chunk() = default;
+ // Returns the size of this chunk (even if this is a common or BSS.)
+ virtual size_t getSize() const = 0;
+ // Write this chunk to a mmap'ed file, assuming Buf is pointing to
+ // beginning of the file. Because this function may use RVA values
+ // of other chunks for relocations, you need to set them properly
+ // before calling this function.
+ virtual void writeTo(uint8_t *Buf) const {}
+ // Called by the writer once before assigning addresses and writing
+ // the output.
+ virtual void readRelocTargets() {}
+ // Called if restarting thunk addition.
+ virtual void resetRelocTargets() {}
+ // Called by the writer after an RVA is assigned, but before calling
+ // getSize().
+ virtual void finalizeContents() {}
+ // The writer sets and uses the addresses.
+ uint64_t getRVA() const { return RVA; }
+ void setRVA(uint64_t V) { RVA = V; }
+ // Returns true if this has non-zero data. BSS chunks return
+ // false. If false is returned, the space occupied by this chunk
+ // will be filled with zeros.
+ virtual bool hasData() const { return true; }
+ // Returns readable/writable/executable bits.
+ virtual uint32_t getOutputCharacteristics() const { return 0; }
+ // Returns the section name if this is a section chunk.
+ // It is illegal to call this function on non-section chunks.
+ virtual StringRef getSectionName() const {
+ llvm_unreachable("unimplemented getSectionName");
+ }
+ // An output section has pointers to chunks in the section, and each
+ // chunk has a back pointer to an output section.
+ void setOutputSection(OutputSection *O) { Out = O; }
+ OutputSection *getOutputSection() const { return Out; }
+ // Windows-specific.
+ // Collect all locations that contain absolute addresses for base relocations.
+ virtual void getBaserels(std::vector<Baserel> *Res) {}
+ // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
+ // bytes, so this is used only for logging or debugging.
+ virtual StringRef getDebugName() { return ""; }
+ // The alignment of this chunk. The writer uses the value.
+ uint32_t Alignment = 1;
+ Chunk(Kind K = OtherKind) : ChunkKind(K) {}
+ const Kind ChunkKind;
+ // The RVA of this chunk in the output. The writer sets a value.
+ uint64_t RVA = 0;
+ // The output section for this chunk.
+ OutputSection *Out = nullptr;
+ // The offset from beginning of the output section. The writer sets a value.
+ uint64_t OutputSectionOff = 0;
+ // Whether this section needs to be kept distinct from other sections during
+ // ICF. This is set by the driver using address-significance tables.
+ bool KeepUnique = false;
+// A chunk corresponding a section of an input file.
+class SectionChunk final : public Chunk {
+ // Identical COMDAT Folding feature accesses section internal data.
+ friend class ICF;
+ class symbol_iterator : public llvm::iterator_adaptor_base<
+ symbol_iterator, const coff_relocation *,
+ std::random_access_iterator_tag, Symbol *> {
+ friend SectionChunk;
+ ObjFile *File;
+ symbol_iterator(ObjFile *File, const coff_relocation *I)
+ : symbol_iterator::iterator_adaptor_base(I), File(File) {}
+ public:
+ symbol_iterator() = default;
+ Symbol *operator*() const { return File->getSymbol(I->SymbolTableIndex); }
+ };
+ SectionChunk(ObjFile *File, const coff_section *Header);
+ static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
+ void readRelocTargets() override;
+ void resetRelocTargets() override;
+ size_t getSize() const override { return Header->SizeOfRawData; }
+ ArrayRef<uint8_t> getContents() const;
+ void writeTo(uint8_t *Buf) const override;
+ bool hasData() const override;
+ uint32_t getOutputCharacteristics() const override;
+ StringRef getSectionName() const override { return SectionName; }
+ void getBaserels(std::vector<Baserel> *Res) override;
+ bool isCOMDAT() const;
+ void applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
+ uint64_t P) const;
+ void applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
+ uint64_t P) const;
+ void applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
+ uint64_t P) const;
+ void applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
+ uint64_t P) const;
+ void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &Res);
+ // Called if the garbage collector decides to not include this chunk
+ // in a final output. It's supposed to print out a log message to stdout.
+ void printDiscardedMessage() const;
+ // Adds COMDAT associative sections to this COMDAT section. A chunk
+ // and its children are treated as a group by the garbage collector.
+ void addAssociative(SectionChunk *Child);
+ StringRef getDebugName() override;
+ // True if this is a codeview debug info chunk. These will not be laid out in
+ // the image. Instead they will end up in the PDB, if one is requested.
+ bool isCodeView() const {
+ return SectionName == ".debug" || SectionName.startswith(".debug$");
+ }
+ // True if this is a DWARF debug info or exception handling chunk.
+ bool isDWARF() const {
+ return SectionName.startswith(".debug_") || SectionName == ".eh_frame";
+ }
+ // Allow iteration over the bodies of this chunk's relocated symbols.
+ llvm::iterator_range<symbol_iterator> symbols() const {
+ return llvm::make_range(symbol_iterator(File, Relocs.begin()),
+ symbol_iterator(File, Relocs.end()));
+ }
+ // Allow iteration over the associated child chunks for this section.
+ ArrayRef<SectionChunk *> children() const { return AssocChildren; }
+ // The section ID this chunk belongs to in its Obj.
+ uint32_t getSectionNumber() const;
+ // A pointer pointing to a replacement for this chunk.
+ // Initially it points to "this" object. If this chunk is merged
+ // with other chunk by ICF, it points to another chunk,
+ // and this chunk is considered as dead.
+ SectionChunk *Repl;
+ // The CRC of the contents as described in the COFF spec 4.5.5.
+ // Auxiliary Format 5: Section Definitions. Used for ICF.
+ uint32_t Checksum = 0;
+ const coff_section *Header;
+ // The file that this chunk was created from.
+ ObjFile *File;
+ // The COMDAT leader symbol if this is a COMDAT chunk.
+ DefinedRegular *Sym = nullptr;
+ ArrayRef<coff_relocation> Relocs;
+ // Used by the garbage collector.
+ bool Live;
+ // When inserting a thunk, we need to adjust a relocation to point to
+ // the thunk instead of the actual original target Symbol.
+ std::vector<Symbol *> RelocTargets;
+ StringRef SectionName;
+ std::vector<SectionChunk *> AssocChildren;
+ // Used for ICF (Identical COMDAT Folding)
+ void replace(SectionChunk *Other);
+ uint32_t Class[2] = {0, 0};
+// This class is used to implement an lld-specific feature (not implemented in
+// MSVC) that minimizes the output size by finding string literals sharing tail
+// parts and merging them.
+// If string tail merging is enabled and a section is identified as containing a
+// string literal, it is added to a MergeChunk with an appropriate alignment.
+// The MergeChunk then tail merges the strings using the StringTableBuilder
+// class and assigns RVAs and section offsets to each of the member chunks based
+// on the offsets assigned by the StringTableBuilder.
+class MergeChunk : public Chunk {
+ MergeChunk(uint32_t Alignment);
+ static void addSection(SectionChunk *C);
+ void finalizeContents() override;
+ uint32_t getOutputCharacteristics() const override;
+ StringRef getSectionName() const override { return ".rdata"; }
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) const override;
+ static std::map<uint32_t, MergeChunk *> Instances;
+ std::vector<SectionChunk *> Sections;
+ llvm::StringTableBuilder Builder;
+ bool Finalized = false;
+// A chunk for common symbols. Common chunks don't have actual data.
+class CommonChunk : public Chunk {
+ CommonChunk(const COFFSymbolRef Sym);
+ size_t getSize() const override { return Sym.getValue(); }
+ bool hasData() const override { return false; }
+ uint32_t getOutputCharacteristics() const override;
+ StringRef getSectionName() const override { return ".bss"; }
+ const COFFSymbolRef Sym;
+// A chunk for linker-created strings.
+class StringChunk : public Chunk {
+ explicit StringChunk(StringRef S) : Str(S) {}
+ size_t getSize() const override { return Str.size() + 1; }
+ void writeTo(uint8_t *Buf) const override;
+ StringRef Str;
+static const uint8_t ImportThunkX86[] = {
+ 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
+static const uint8_t ImportThunkARM[] = {
+ 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
+ 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
+ 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
+static const uint8_t ImportThunkARM64[] = {
+ 0x10, 0x00, 0x00, 0x90, // adrp x16, #0
+ 0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16]
+ 0x00, 0x02, 0x1f, 0xd6, // br x16
+// Windows-specific.
+// A chunk for DLL import jump table entry. In a final output, its
+// contents will be a JMP instruction to some __imp_ symbol.
+class ImportThunkChunkX64 : public Chunk {
+ explicit ImportThunkChunkX64(Defined *S);
+ size_t getSize() const override { return sizeof(ImportThunkX86); }
+ void writeTo(uint8_t *Buf) const override;
+ Defined *ImpSymbol;
+class ImportThunkChunkX86 : public Chunk {
+ explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {}
+ size_t getSize() const override { return sizeof(ImportThunkX86); }
+ void getBaserels(std::vector<Baserel> *Res) override;
+ void writeTo(uint8_t *Buf) const override;
+ Defined *ImpSymbol;
+class ImportThunkChunkARM : public Chunk {
+ explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {}
+ size_t getSize() const override { return sizeof(ImportThunkARM); }
+ void getBaserels(std::vector<Baserel> *Res) override;
+ void writeTo(uint8_t *Buf) const override;
+ Defined *ImpSymbol;
+class ImportThunkChunkARM64 : public Chunk {
+ explicit ImportThunkChunkARM64(Defined *S) : ImpSymbol(S) {}
+ size_t getSize() const override { return sizeof(ImportThunkARM64); }
+ void writeTo(uint8_t *Buf) const override;
+ Defined *ImpSymbol;
+class RangeExtensionThunkARM : public Chunk {
+ explicit RangeExtensionThunkARM(Defined *T) : Target(T) {}
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) const override;
+ Defined *Target;
+class RangeExtensionThunkARM64 : public Chunk {
+ explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {}
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) const override;
+ Defined *Target;
+// Windows-specific.
+// See comments for DefinedLocalImport class.
+class LocalImportChunk : public Chunk {
+ explicit LocalImportChunk(Defined *S) : Sym(S) {
+ Alignment = Config->Wordsize;
+ }
+ size_t getSize() const override;
+ void getBaserels(std::vector<Baserel> *Res) override;
+ void writeTo(uint8_t *Buf) const override;
+ Defined *Sym;
+// Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and
+// offset into the chunk. Order does not matter as the RVA table will be sorted
+// later.
+struct ChunkAndOffset {
+ Chunk *InputChunk;
+ uint32_t Offset;
+ struct DenseMapInfo {
+ static ChunkAndOffset getEmptyKey() {
+ return {llvm::DenseMapInfo<Chunk *>::getEmptyKey(), 0};
+ }
+ static ChunkAndOffset getTombstoneKey() {
+ return {llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), 0};
+ }
+ static unsigned getHashValue(const ChunkAndOffset &CO) {
+ return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue(
+ {CO.InputChunk, CO.Offset});
+ }
+ static bool isEqual(const ChunkAndOffset &LHS, const ChunkAndOffset &RHS) {
+ return LHS.InputChunk == RHS.InputChunk && LHS.Offset == RHS.Offset;
+ }
+ };
+using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>;
+// Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
+class RVATableChunk : public Chunk {
+ explicit RVATableChunk(SymbolRVASet S) : Syms(std::move(S)) {}
+ size_t getSize() const override { return Syms.size() * 4; }
+ void writeTo(uint8_t *Buf) const override;
+ SymbolRVASet Syms;
+// Windows-specific.
+// This class represents a block in .reloc section.
+// See the PE/COFF spec 5.6 for details.
+class BaserelChunk : public Chunk {
+ BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End);
+ size_t getSize() const override { return Data.size(); }
+ void writeTo(uint8_t *Buf) const override;
+ std::vector<uint8_t> Data;
+class Baserel {
+ Baserel(uint32_t V, uint8_t Ty) : RVA(V), Type(Ty) {}
+ explicit Baserel(uint32_t V) : Baserel(V, getDefaultType()) {}
+ uint8_t getDefaultType();
+ uint32_t RVA;
+ uint8_t Type;
+// This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
+// specific place in a section, without any data. This is used for the MinGW
+// specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
+// of an empty chunk isn't MinGW specific.
+class EmptyChunk : public Chunk {
+ EmptyChunk() {}
+ size_t getSize() const override { return 0; }
+ void writeTo(uint8_t *Buf) const override {}
+// MinGW specific, for the "automatic import of variables from DLLs" feature.
+// This provides the table of runtime pseudo relocations, for variable
+// references that turned out to need to be imported from a DLL even though
+// the reference didn't use the dllimport attribute. The MinGW runtime will
+// process this table after loading, before handling control over to user
+// code.
+class PseudoRelocTableChunk : public Chunk {
+ PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &Relocs)
+ : Relocs(std::move(Relocs)) {
+ Alignment = 4;
+ }
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) const override;
+ std::vector<RuntimePseudoReloc> Relocs;
+// MinGW specific; information about one individual location in the image
+// that needs to be fixed up at runtime after loading. This represents
+// one individual element in the PseudoRelocTableChunk table.
+class RuntimePseudoReloc {
+ RuntimePseudoReloc(Defined *Sym, SectionChunk *Target, uint32_t TargetOffset,
+ int Flags)
+ : Sym(Sym), Target(Target), TargetOffset(TargetOffset), Flags(Flags) {}
+ Defined *Sym;
+ SectionChunk *Target;
+ uint32_t TargetOffset;
+ // The Flags field contains the size of the relocation, in bits. No other
+ // flags are currently defined.
+ int Flags;
+// MinGW specific. A Chunk that contains one pointer-sized absolute value.
+class AbsolutePointerChunk : public Chunk {
+ AbsolutePointerChunk(uint64_t Value) : Value(Value) {
+ Alignment = getSize();
+ }
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) const override;
+ uint64_t Value;
+void applyMOV32T(uint8_t *Off, uint32_t V);
+void applyBranch24T(uint8_t *Off, int32_t V);
+void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift);
+void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit);
+void applyArm64Branch26(uint8_t *Off, int64_t V);
+} // namespace coff
+} // namespace lld
+namespace llvm {
+template <>
+struct DenseMapInfo<lld::coff::ChunkAndOffset>
+ : lld::coff::ChunkAndOffset::DenseMapInfo {};
diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h
new file mode 100644
index 000000000000..8915b6a3bdd8
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Config.h
@@ -0,0 +1,212 @@
+//===- Config.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/CachePruning.h"
+#include <cstdint>
+#include <map>
+#include <set>
+#include <string>
+namespace lld {
+namespace coff {
+using llvm::COFF::WindowsSubsystem;
+using llvm::StringRef;
+class DefinedAbsolute;
+class DefinedRelative;
+class StringChunk;
+class Symbol;
+// Short aliases.
+static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64;
+static const auto ARM64 = llvm::COFF::IMAGE_FILE_MACHINE_ARM64;
+static const auto ARMNT = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT;
+static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386;
+// Represents an /export option.
+struct Export {
+ StringRef Name; // N in /export:N or /export:E=N
+ StringRef ExtName; // E in /export:E=N
+ Symbol *Sym = nullptr;
+ uint16_t Ordinal = 0;
+ bool Noname = false;
+ bool Data = false;
+ bool Private = false;
+ bool Constant = false;
+ // If an export is a form of /export:foo=dllname.bar, that means
+ // that foo should be exported as an alias to bar in the DLL.
+ // ForwardTo is set to "dllname.bar" part. Usually empty.
+ StringRef ForwardTo;
+ StringChunk *ForwardChunk = nullptr;
+ // True if this /export option was in .drectves section.
+ bool Directives = false;
+ StringRef SymbolName;
+ StringRef ExportName; // Name in DLL
+ bool operator==(const Export &E) {
+ return (Name == E.Name && ExtName == E.ExtName &&
+ Ordinal == E.Ordinal && Noname == E.Noname &&
+ Data == E.Data && Private == E.Private);
+ }
+enum class DebugType {
+ None = 0x0,
+ CV = 0x1, /// CodeView
+ PData = 0x2, /// Procedure Data
+ Fixup = 0x4, /// Relocation Table
+enum class GuardCFLevel {
+ Off,
+ NoLongJmp, // Emit gfids but no longjmp tables
+ Full, // Enable all protections.
+// Global configuration.
+struct Configuration {
+ enum ManifestKind { SideBySide, Embed, No };
+ bool is64() { return Machine == AMD64 || Machine == ARM64; }
+ llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
+ size_t Wordsize;
+ bool Verbose = false;
+ WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN;
+ Symbol *Entry = nullptr;
+ bool NoEntry = false;
+ std::string OutputFile;
+ std::string ImportName;
+ bool DoGC = true;
+ bool DoICF = true;
+ bool TailMerge;
+ bool Relocatable = true;
+ bool ForceMultiple = false;
+ bool ForceUnresolved = false;
+ bool Debug = false;
+ bool DebugDwarf = false;
+ bool DebugGHashes = false;
+ bool DebugSymtab = false;
+ bool ShowTiming = false;
+ unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
+ std::vector<std::string> NatvisFiles;
+ llvm::SmallString<128> PDBAltPath;
+ llvm::SmallString<128> PDBPath;
+ llvm::SmallString<128> PDBSourcePath;
+ std::vector<llvm::StringRef> Argv;
+ // Symbols in this set are considered as live by the garbage collector.
+ std::vector<Symbol *> GCRoot;
+ std::set<StringRef> NoDefaultLibs;
+ bool NoDefaultLibAll = false;
+ // True if we are creating a DLL.
+ bool DLL = false;
+ StringRef Implib;
+ std::vector<Export> Exports;
+ std::set<std::string> DelayLoads;
+ std::map<std::string, int> DLLOrder;
+ Symbol *DelayLoadHelper = nullptr;
+ bool SaveTemps = false;
+ // /guard:cf
+ GuardCFLevel GuardCF = GuardCFLevel::Off;
+ // Used for SafeSEH.
+ Symbol *SEHTable = nullptr;
+ Symbol *SEHCount = nullptr;
+ // Used for /opt:lldlto=N
+ unsigned LTOO = 2;
+ // Used for /opt:lldltojobs=N
+ unsigned ThinLTOJobs = 0;
+ // Used for /opt:lldltopartitions=N
+ unsigned LTOPartitions = 1;
+ // Used for /opt:lldltocache=path
+ StringRef LTOCache;
+ // Used for /opt:lldltocachepolicy=policy
+ llvm::CachePruningPolicy LTOCachePolicy;
+ // Used for /merge:from=to (e.g. /merge:.rdata=.text)
+ std::map<StringRef, StringRef> Merge;
+ // Used for /section=.name,{DEKPRSW} to set section attributes.
+ std::map<StringRef, uint32_t> Section;
+ // Options for manifest files.
+ ManifestKind Manifest = No;
+ int ManifestID = 1;
+ StringRef ManifestDependency;
+ bool ManifestUAC = true;
+ std::vector<std::string> ManifestInput;
+ StringRef ManifestLevel = "'asInvoker'";
+ StringRef ManifestUIAccess = "'false'";
+ StringRef ManifestFile;
+ // Used for /aligncomm.
+ std::map<std::string, int> AlignComm;
+ // Used for /failifmismatch.
+ std::map<StringRef, StringRef> MustMatch;
+ // Used for /alternatename.
+ std::map<StringRef, StringRef> AlternateNames;
+ // Used for /order.
+ llvm::StringMap<int> Order;
+ // Used for /lldmap.
+ std::string MapFile;
+ uint64_t ImageBase = -1;
+ uint64_t StackReserve = 1024 * 1024;
+ uint64_t StackCommit = 4096;
+ uint64_t HeapReserve = 1024 * 1024;
+ uint64_t HeapCommit = 4096;
+ uint32_t MajorImageVersion = 0;
+ uint32_t MinorImageVersion = 0;
+ uint32_t MajorOSVersion = 6;
+ uint32_t MinorOSVersion = 0;
+ uint32_t Timestamp = 0;
+ bool DynamicBase = true;
+ bool AllowBind = true;
+ bool NxCompat = true;
+ bool AllowIsolation = true;
+ bool TerminalServerAware = true;
+ bool LargeAddressAware = false;
+ bool HighEntropyVA = false;
+ bool AppContainer = false;
+ bool MinGW = false;
+ bool WarnMissingOrderSymbol = true;
+ bool WarnLocallyDefinedImported = true;
+ bool WarnDebugInfoUnusable = true;
+ bool Incremental = true;
+ bool IntegrityCheck = false;
+ bool KillAt = false;
+ bool Repro = false;
+extern Configuration *Config;
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/DLL.cpp b/contrib/llvm/tools/lld/COFF/DLL.cpp
new file mode 100644
index 000000000000..c06027d3e5c3
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/DLL.cpp
@@ -0,0 +1,645 @@
+//===- DLL.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file defines various types of chunks for the DLL import or export
+// descriptor tables. They are inherently Windows-specific.
+// You need to read Microsoft PE/COFF spec to understand details
+// about the data structures.
+// If you are not particularly interested in linking against Windows
+// DLL, you can skip this file, and you should still be able to
+// understand the rest of the linker.
+#include "DLL.h"
+#include "Chunks.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Path.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::COFF;
+namespace lld {
+namespace coff {
+namespace {
+// Import table
+// A chunk for the import descriptor table.
+class HintNameChunk : public Chunk {
+ HintNameChunk(StringRef N, uint16_t H) : Name(N), Hint(H) {}
+ size_t getSize() const override {
+ // Starts with 2 byte Hint field, followed by a null-terminated string,
+ // ends with 0 or 1 byte padding.
+ return alignTo(Name.size() + 3, 2);
+ }
+ void writeTo(uint8_t *Buf) const override {
+ memset(Buf + OutputSectionOff, 0, getSize());
+ write16le(Buf + OutputSectionOff, Hint);
+ memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size());
+ }
+ StringRef Name;
+ uint16_t Hint;
+// A chunk for the import descriptor table.
+class LookupChunk : public Chunk {
+ explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = Config->Wordsize; }
+ size_t getSize() const override { return Config->Wordsize; }
+ void writeTo(uint8_t *Buf) const override {
+ if (Config->is64())
+ write64le(Buf + OutputSectionOff, HintName->getRVA());
+ else
+ write32le(Buf + OutputSectionOff, HintName->getRVA());
+ }
+ Chunk *HintName;
+// A chunk for the import descriptor table.
+// This chunk represent import-by-ordinal symbols.
+// See Microsoft PE/COFF spec 7.1. Import Header for details.
+class OrdinalOnlyChunk : public Chunk {
+ explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {
+ Alignment = Config->Wordsize;
+ }
+ size_t getSize() const override { return Config->Wordsize; }
+ void writeTo(uint8_t *Buf) const override {
+ // An import-by-ordinal slot has MSB 1 to indicate that
+ // this is import-by-ordinal (and not import-by-name).
+ if (Config->is64()) {
+ write64le(Buf + OutputSectionOff, (1ULL << 63) | Ordinal);
+ } else {
+ write32le(Buf + OutputSectionOff, (1ULL << 31) | Ordinal);
+ }
+ }
+ uint16_t Ordinal;
+// A chunk for the import descriptor table.
+class ImportDirectoryChunk : public Chunk {
+ explicit ImportDirectoryChunk(Chunk *N) : DLLName(N) {}
+ size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
+ void writeTo(uint8_t *Buf) const override {
+ memset(Buf + OutputSectionOff, 0, getSize());
+ auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff);
+ E->ImportLookupTableRVA = LookupTab->getRVA();
+ E->NameRVA = DLLName->getRVA();
+ E->ImportAddressTableRVA = AddressTab->getRVA();
+ }
+ Chunk *DLLName;
+ Chunk *LookupTab;
+ Chunk *AddressTab;
+// A chunk representing null terminator in the import table.
+// Contents of this chunk is always null bytes.
+class NullChunk : public Chunk {
+ explicit NullChunk(size_t N) : Size(N) {}
+ bool hasData() const override { return false; }
+ size_t getSize() const override { return Size; }
+ void writeTo(uint8_t *Buf) const override {
+ memset(Buf + OutputSectionOff, 0, Size);
+ }
+ size_t Size;
+static std::vector<std::vector<DefinedImportData *>>
+binImports(const std::vector<DefinedImportData *> &Imports) {
+ // Group DLL-imported symbols by DLL name because that's how
+ // symbols are layed out in the import descriptor table.
+ auto Less = [](const std::string &A, const std::string &B) {
+ return Config->DLLOrder[A] < Config->DLLOrder[B];
+ };
+ std::map<std::string, std::vector<DefinedImportData *>,
+ bool(*)(const std::string &, const std::string &)> M(Less);
+ for (DefinedImportData *Sym : Imports)
+ M[Sym->getDLLName().lower()].push_back(Sym);
+ std::vector<std::vector<DefinedImportData *>> V;
+ for (auto &KV : M) {
+ // Sort symbols by name for each group.
+ std::vector<DefinedImportData *> &Syms = KV.second;
+ std::sort(Syms.begin(), Syms.end(),
+ [](DefinedImportData *A, DefinedImportData *B) {
+ return A->getName() < B->getName();
+ });
+ V.push_back(std::move(Syms));
+ }
+ return V;
+// Export table
+// See Microsoft PE/COFF spec 4.3 for details.
+// A chunk for the delay import descriptor table etnry.
+class DelayDirectoryChunk : public Chunk {
+ explicit DelayDirectoryChunk(Chunk *N) : DLLName(N) {}
+ size_t getSize() const override {
+ return sizeof(delay_import_directory_table_entry);
+ }
+ void writeTo(uint8_t *Buf) const override {
+ memset(Buf + OutputSectionOff, 0, getSize());
+ auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff);
+ E->Attributes = 1;
+ E->Name = DLLName->getRVA();
+ E->ModuleHandle = ModuleHandle->getRVA();
+ E->DelayImportAddressTable = AddressTab->getRVA();
+ E->DelayImportNameTable = NameTab->getRVA();
+ }
+ Chunk *DLLName;
+ Chunk *ModuleHandle;
+ Chunk *AddressTab;
+ Chunk *NameTab;
+// Initial contents for delay-loaded functions.
+// This code calls __delayLoadHelper2 function to resolve a symbol
+// and then overwrites its jump table slot with the result
+// for subsequent function calls.
+static const uint8_t ThunkX64[] = {
+ 0x51, // push rcx
+ 0x52, // push rdx
+ 0x41, 0x50, // push r8
+ 0x41, 0x51, // push r9
+ 0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h
+ 0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0
+ 0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1
+ 0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2
+ 0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3
+ 0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_<FUNCNAME>]
+ 0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...]
+ 0xE8, 0, 0, 0, 0, // call __delayLoadHelper2
+ 0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp]
+ 0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h]
+ 0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h]
+ 0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h]
+ 0x48, 0x83, 0xC4, 0x48, // add rsp, 48h
+ 0x41, 0x59, // pop r9
+ 0x41, 0x58, // pop r8
+ 0x5A, // pop rdx
+ 0x59, // pop rcx
+ 0xFF, 0xE0, // jmp rax
+static const uint8_t ThunkX86[] = {
+ 0x51, // push ecx
+ 0x52, // push edx
+ 0x68, 0, 0, 0, 0, // push offset ___imp__<FUNCNAME>
+ 0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR_<DLLNAME>_dll
+ 0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8
+ 0x5A, // pop edx
+ 0x59, // pop ecx
+ 0xFF, 0xE0, // jmp eax
+static const uint8_t ThunkARM[] = {
+ 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 __imp_<FUNCNAME>
+ 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 __imp_<FUNCNAME>
+ 0x2d, 0xe9, 0x0f, 0x48, // push.w {r0, r1, r2, r3, r11, lr}
+ 0x0d, 0xf2, 0x10, 0x0b, // addw r11, sp, #16
+ 0x2d, 0xed, 0x10, 0x0b, // vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+ 0x61, 0x46, // mov r1, ip
+ 0x40, 0xf2, 0x00, 0x00, // mov.w r0, #0 DELAY_IMPORT_DESCRIPTOR
+ 0xc0, 0xf2, 0x00, 0x00, // mov.t r0, #0 DELAY_IMPORT_DESCRIPTOR
+ 0x00, 0xf0, 0x00, 0xd0, // bl #0 __delayLoadHelper2
+ 0x84, 0x46, // mov ip, r0
+ 0xbd, 0xec, 0x10, 0x0b, // vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+ 0xbd, 0xe8, 0x0f, 0x48, // pop.w {r0, r1, r2, r3, r11, lr}
+ 0x60, 0x47, // bx ip
+static const uint8_t ThunkARM64[] = {
+ 0x11, 0x00, 0x00, 0x90, // adrp x17, #0 __imp_<FUNCNAME>
+ 0x31, 0x02, 0x00, 0x91, // add x17, x17, #0 :lo12:__imp_<FUNCNAME>
+ 0xfd, 0x7b, 0xb3, 0xa9, // stp x29, x30, [sp, #-208]!
+ 0xfd, 0x03, 0x00, 0x91, // mov x29, sp
+ 0xe0, 0x07, 0x01, 0xa9, // stp x0, x1, [sp, #16]
+ 0xe2, 0x0f, 0x02, 0xa9, // stp x2, x3, [sp, #32]
+ 0xe4, 0x17, 0x03, 0xa9, // stp x4, x5, [sp, #48]
+ 0xe6, 0x1f, 0x04, 0xa9, // stp x6, x7, [sp, #64]
+ 0xe0, 0x87, 0x02, 0xad, // stp q0, q1, [sp, #80]
+ 0xe2, 0x8f, 0x03, 0xad, // stp q2, q3, [sp, #112]
+ 0xe4, 0x97, 0x04, 0xad, // stp q4, q5, [sp, #144]
+ 0xe6, 0x9f, 0x05, 0xad, // stp q6, q7, [sp, #176]
+ 0xe1, 0x03, 0x11, 0xaa, // mov x1, x17
+ 0x00, 0x00, 0x00, 0x90, // adrp x0, #0 DELAY_IMPORT_DESCRIPTOR
+ 0x00, 0x00, 0x00, 0x91, // add x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR
+ 0x00, 0x00, 0x00, 0x94, // bl #0 __delayLoadHelper2
+ 0xf0, 0x03, 0x00, 0xaa, // mov x16, x0
+ 0xe6, 0x9f, 0x45, 0xad, // ldp q6, q7, [sp, #176]
+ 0xe4, 0x97, 0x44, 0xad, // ldp q4, q5, [sp, #144]
+ 0xe2, 0x8f, 0x43, 0xad, // ldp q2, q3, [sp, #112]
+ 0xe0, 0x87, 0x42, 0xad, // ldp q0, q1, [sp, #80]
+ 0xe6, 0x1f, 0x44, 0xa9, // ldp x6, x7, [sp, #64]
+ 0xe4, 0x17, 0x43, 0xa9, // ldp x4, x5, [sp, #48]
+ 0xe2, 0x0f, 0x42, 0xa9, // ldp x2, x3, [sp, #32]
+ 0xe0, 0x07, 0x41, 0xa9, // ldp x0, x1, [sp, #16]
+ 0xfd, 0x7b, 0xcd, 0xa8, // ldp x29, x30, [sp], #208
+ 0x00, 0x02, 0x1f, 0xd6, // br x16
+// A chunk for the delay import thunk.
+class ThunkChunkX64 : public Chunk {
+ ThunkChunkX64(Defined *I, Chunk *D, Defined *H)
+ : Imp(I), Desc(D), Helper(H) {}
+ size_t getSize() const override { return sizeof(ThunkX64); }
+ void writeTo(uint8_t *Buf) const override {
+ memcpy(Buf + OutputSectionOff, ThunkX64, sizeof(ThunkX64));
+ write32le(Buf + OutputSectionOff + 36, Imp->getRVA() - RVA - 40);
+ write32le(Buf + OutputSectionOff + 43, Desc->getRVA() - RVA - 47);
+ write32le(Buf + OutputSectionOff + 48, Helper->getRVA() - RVA - 52);
+ }
+ Defined *Imp = nullptr;
+ Chunk *Desc = nullptr;
+ Defined *Helper = nullptr;
+class ThunkChunkX86 : public Chunk {
+ ThunkChunkX86(Defined *I, Chunk *D, Defined *H)
+ : Imp(I), Desc(D), Helper(H) {}
+ size_t getSize() const override { return sizeof(ThunkX86); }
+ void writeTo(uint8_t *Buf) const override {
+ memcpy(Buf + OutputSectionOff, ThunkX86, sizeof(ThunkX86));
+ write32le(Buf + OutputSectionOff + 3, Imp->getRVA() + Config->ImageBase);
+ write32le(Buf + OutputSectionOff + 8, Desc->getRVA() + Config->ImageBase);
+ write32le(Buf + OutputSectionOff + 13, Helper->getRVA() - RVA - 17);
+ }
+ void getBaserels(std::vector<Baserel> *Res) override {
+ Res->emplace_back(RVA + 3);
+ Res->emplace_back(RVA + 8);
+ }
+ Defined *Imp = nullptr;
+ Chunk *Desc = nullptr;
+ Defined *Helper = nullptr;
+class ThunkChunkARM : public Chunk {
+ ThunkChunkARM(Defined *I, Chunk *D, Defined *H)
+ : Imp(I), Desc(D), Helper(H) {}
+ size_t getSize() const override { return sizeof(ThunkARM); }
+ void writeTo(uint8_t *Buf) const override {
+ memcpy(Buf + OutputSectionOff, ThunkARM, sizeof(ThunkARM));
+ applyMOV32T(Buf + OutputSectionOff + 0, Imp->getRVA() + Config->ImageBase);
+ applyMOV32T(Buf + OutputSectionOff + 22, Desc->getRVA() + Config->ImageBase);
+ applyBranch24T(Buf + OutputSectionOff + 30, Helper->getRVA() - RVA - 34);
+ }
+ void getBaserels(std::vector<Baserel> *Res) override {
+ Res->emplace_back(RVA + 0, IMAGE_REL_BASED_ARM_MOV32T);
+ Res->emplace_back(RVA + 22, IMAGE_REL_BASED_ARM_MOV32T);
+ }
+ Defined *Imp = nullptr;
+ Chunk *Desc = nullptr;
+ Defined *Helper = nullptr;
+class ThunkChunkARM64 : public Chunk {
+ ThunkChunkARM64(Defined *I, Chunk *D, Defined *H)
+ : Imp(I), Desc(D), Helper(H) {}
+ size_t getSize() const override { return sizeof(ThunkARM64); }
+ void writeTo(uint8_t *Buf) const override {
+ memcpy(Buf + OutputSectionOff, ThunkARM64, sizeof(ThunkARM64));
+ applyArm64Addr(Buf + OutputSectionOff + 0, Imp->getRVA(), RVA + 0, 12);
+ applyArm64Imm(Buf + OutputSectionOff + 4, Imp->getRVA() & 0xfff, 0);
+ applyArm64Addr(Buf + OutputSectionOff + 52, Desc->getRVA(), RVA + 52, 12);
+ applyArm64Imm(Buf + OutputSectionOff + 56, Desc->getRVA() & 0xfff, 0);
+ applyArm64Branch26(Buf + OutputSectionOff + 60,
+ Helper->getRVA() - RVA - 60);
+ }
+ Defined *Imp = nullptr;
+ Chunk *Desc = nullptr;
+ Defined *Helper = nullptr;
+// A chunk for the import descriptor table.
+class DelayAddressChunk : public Chunk {
+ explicit DelayAddressChunk(Chunk *C) : Thunk(C) {
+ Alignment = Config->Wordsize;
+ }
+ size_t getSize() const override { return Config->Wordsize; }
+ void writeTo(uint8_t *Buf) const override {
+ if (Config->is64()) {
+ write64le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase);
+ } else {
+ uint32_t Bit = 0;
+ // Pointer to thumb code must have the LSB set, so adjust it.
+ if (Config->Machine == ARMNT)
+ Bit = 1;
+ write32le(Buf + OutputSectionOff, (Thunk->getRVA() + Config->ImageBase) | Bit);
+ }
+ }
+ void getBaserels(std::vector<Baserel> *Res) override {
+ Res->emplace_back(RVA);
+ }
+ Chunk *Thunk;
+// Export table
+// Read Microsoft PE/COFF spec 5.3 for details.
+// A chunk for the export descriptor table.
+class ExportDirectoryChunk : public Chunk {
+ ExportDirectoryChunk(int I, int J, Chunk *D, Chunk *A, Chunk *N, Chunk *O)
+ : MaxOrdinal(I), NameTabSize(J), DLLName(D), AddressTab(A), NameTab(N),
+ OrdinalTab(O) {}
+ size_t getSize() const override {
+ return sizeof(export_directory_table_entry);
+ }
+ void writeTo(uint8_t *Buf) const override {
+ memset(Buf + OutputSectionOff, 0, getSize());
+ auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff);
+ E->NameRVA = DLLName->getRVA();
+ E->OrdinalBase = 0;
+ E->AddressTableEntries = MaxOrdinal + 1;
+ E->NumberOfNamePointers = NameTabSize;
+ E->ExportAddressTableRVA = AddressTab->getRVA();
+ E->NamePointerRVA = NameTab->getRVA();
+ E->OrdinalTableRVA = OrdinalTab->getRVA();
+ }
+ uint16_t MaxOrdinal;
+ uint16_t NameTabSize;
+ Chunk *DLLName;
+ Chunk *AddressTab;
+ Chunk *NameTab;
+ Chunk *OrdinalTab;
+class AddressTableChunk : public Chunk {
+ explicit AddressTableChunk(size_t MaxOrdinal) : Size(MaxOrdinal + 1) {}
+ size_t getSize() const override { return Size * 4; }
+ void writeTo(uint8_t *Buf) const override {
+ memset(Buf + OutputSectionOff, 0, getSize());
+ for (const Export &E : Config->Exports) {
+ uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4;
+ uint32_t Bit = 0;
+ // Pointer to thumb code must have the LSB set, so adjust it.
+ if (Config->Machine == ARMNT && !E.Data)
+ Bit = 1;
+ if (E.ForwardChunk) {
+ write32le(P, E.ForwardChunk->getRVA() | Bit);
+ } else {
+ write32le(P, cast<Defined>(E.Sym)->getRVA() | Bit);
+ }
+ }
+ }
+ size_t Size;
+class NamePointersChunk : public Chunk {
+ explicit NamePointersChunk(std::vector<Chunk *> &V) : Chunks(V) {}
+ size_t getSize() const override { return Chunks.size() * 4; }
+ void writeTo(uint8_t *Buf) const override {
+ uint8_t *P = Buf + OutputSectionOff;
+ for (Chunk *C : Chunks) {
+ write32le(P, C->getRVA());
+ P += 4;
+ }
+ }
+ std::vector<Chunk *> Chunks;
+class ExportOrdinalChunk : public Chunk {
+ explicit ExportOrdinalChunk(size_t I) : Size(I) {}
+ size_t getSize() const override { return Size * 2; }
+ void writeTo(uint8_t *Buf) const override {
+ uint8_t *P = Buf + OutputSectionOff;
+ for (Export &E : Config->Exports) {
+ if (E.Noname)
+ continue;
+ write16le(P, E.Ordinal);
+ P += 2;
+ }
+ }
+ size_t Size;
+} // anonymous namespace
+void IdataContents::create() {
+ std::vector<std::vector<DefinedImportData *>> V = binImports(Imports);
+ // Create .idata contents for each DLL.
+ for (std::vector<DefinedImportData *> &Syms : V) {
+ // Create lookup and address tables. If they have external names,
+ // we need to create HintName chunks to store the names.
+ // If they don't (if they are import-by-ordinals), we store only
+ // ordinal values to the table.
+ size_t Base = Lookups.size();
+ for (DefinedImportData *S : Syms) {
+ uint16_t Ord = S->getOrdinal();
+ if (S->getExternalName().empty()) {
+ Lookups.push_back(make<OrdinalOnlyChunk>(Ord));
+ Addresses.push_back(make<OrdinalOnlyChunk>(Ord));
+ continue;
+ }
+ auto *C = make<HintNameChunk>(S->getExternalName(), Ord);
+ Lookups.push_back(make<LookupChunk>(C));
+ Addresses.push_back(make<LookupChunk>(C));
+ Hints.push_back(C);
+ }
+ // Terminate with null values.
+ Lookups.push_back(make<NullChunk>(Config->Wordsize));
+ Addresses.push_back(make<NullChunk>(Config->Wordsize));
+ for (int I = 0, E = Syms.size(); I < E; ++I)
+ Syms[I]->setLocation(Addresses[Base + I]);
+ // Create the import table header.
+ DLLNames.push_back(make<StringChunk>(Syms[0]->getDLLName()));
+ auto *Dir = make<ImportDirectoryChunk>(DLLNames.back());
+ Dir->LookupTab = Lookups[Base];
+ Dir->AddressTab = Addresses[Base];
+ Dirs.push_back(Dir);
+ }
+ // Add null terminator.
+ Dirs.push_back(make<NullChunk>(sizeof(ImportDirectoryTableEntry)));
+std::vector<Chunk *> DelayLoadContents::getChunks() {
+ std::vector<Chunk *> V;
+ V.insert(V.end(), Dirs.begin(), Dirs.end());
+ V.insert(V.end(), Names.begin(), Names.end());
+ V.insert(V.end(), HintNames.begin(), HintNames.end());
+ V.insert(V.end(), DLLNames.begin(), DLLNames.end());
+ return V;
+std::vector<Chunk *> DelayLoadContents::getDataChunks() {
+ std::vector<Chunk *> V;
+ V.insert(V.end(), ModuleHandles.begin(), ModuleHandles.end());
+ V.insert(V.end(), Addresses.begin(), Addresses.end());
+ return V;
+uint64_t DelayLoadContents::getDirSize() {
+ return Dirs.size() * sizeof(delay_import_directory_table_entry);
+void DelayLoadContents::create(Defined *H) {
+ Helper = H;
+ std::vector<std::vector<DefinedImportData *>> V = binImports(Imports);
+ // Create .didat contents for each DLL.
+ for (std::vector<DefinedImportData *> &Syms : V) {
+ // Create the delay import table header.
+ DLLNames.push_back(make<StringChunk>(Syms[0]->getDLLName()));
+ auto *Dir = make<DelayDirectoryChunk>(DLLNames.back());
+ size_t Base = Addresses.size();
+ for (DefinedImportData *S : Syms) {
+ Chunk *T = newThunkChunk(S, Dir);
+ auto *A = make<DelayAddressChunk>(T);
+ Addresses.push_back(A);
+ Thunks.push_back(T);
+ StringRef ExtName = S->getExternalName();
+ if (ExtName.empty()) {
+ Names.push_back(make<OrdinalOnlyChunk>(S->getOrdinal()));
+ } else {
+ auto *C = make<HintNameChunk>(ExtName, 0);
+ Names.push_back(make<LookupChunk>(C));
+ HintNames.push_back(C);
+ }
+ }
+ // Terminate with null values.
+ Addresses.push_back(make<NullChunk>(8));
+ Names.push_back(make<NullChunk>(8));
+ for (int I = 0, E = Syms.size(); I < E; ++I)
+ Syms[I]->setLocation(Addresses[Base + I]);
+ auto *MH = make<NullChunk>(8);
+ MH->Alignment = 8;
+ ModuleHandles.push_back(MH);
+ // Fill the delay import table header fields.
+ Dir->ModuleHandle = MH;
+ Dir->AddressTab = Addresses[Base];
+ Dir->NameTab = Names[Base];
+ Dirs.push_back(Dir);
+ }
+ // Add null terminator.
+ Dirs.push_back(make<NullChunk>(sizeof(delay_import_directory_table_entry)));
+Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) {
+ switch (Config->Machine) {
+ case AMD64:
+ return make<ThunkChunkX64>(S, Dir, Helper);
+ case I386:
+ return make<ThunkChunkX86>(S, Dir, Helper);
+ case ARMNT:
+ return make<ThunkChunkARM>(S, Dir, Helper);
+ case ARM64:
+ return make<ThunkChunkARM64>(S, Dir, Helper);
+ default:
+ llvm_unreachable("unsupported machine type");
+ }
+EdataContents::EdataContents() {
+ uint16_t MaxOrdinal = 0;
+ for (Export &E : Config->Exports)
+ MaxOrdinal = std::max(MaxOrdinal, E.Ordinal);
+ auto *DLLName = make<StringChunk>(sys::path::filename(Config->OutputFile));
+ auto *AddressTab = make<AddressTableChunk>(MaxOrdinal);
+ std::vector<Chunk *> Names;
+ for (Export &E : Config->Exports)
+ if (!E.Noname)
+ Names.push_back(make<StringChunk>(E.ExportName));
+ std::vector<Chunk *> Forwards;
+ for (Export &E : Config->Exports) {
+ if (E.ForwardTo.empty())
+ continue;
+ E.ForwardChunk = make<StringChunk>(E.ForwardTo);
+ Forwards.push_back(E.ForwardChunk);
+ }
+ auto *NameTab = make<NamePointersChunk>(Names);
+ auto *OrdinalTab = make<ExportOrdinalChunk>(Names.size());
+ auto *Dir = make<ExportDirectoryChunk>(MaxOrdinal, Names.size(), DLLName,
+ AddressTab, NameTab, OrdinalTab);
+ Chunks.push_back(Dir);
+ Chunks.push_back(DLLName);
+ Chunks.push_back(AddressTab);
+ Chunks.push_back(NameTab);
+ Chunks.push_back(OrdinalTab);
+ Chunks.insert(Chunks.end(), Names.begin(), Names.end());
+ Chunks.insert(Chunks.end(), Forwards.begin(), Forwards.end());
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/DLL.h b/contrib/llvm/tools/lld/COFF/DLL.h
new file mode 100644
index 000000000000..a298271e2c0d
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/DLL.h
@@ -0,0 +1,82 @@
+//===- DLL.h ----------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_COFF_DLL_H
+#define LLD_COFF_DLL_H
+#include "Chunks.h"
+#include "Symbols.h"
+namespace lld {
+namespace coff {
+// Windows-specific.
+// IdataContents creates all chunks for the DLL import table.
+// You are supposed to call add() to add symbols and then
+// call create() to populate the chunk vectors.
+class IdataContents {
+ void add(DefinedImportData *Sym) { Imports.push_back(Sym); }
+ bool empty() { return Imports.empty(); }
+ void create();
+ std::vector<DefinedImportData *> Imports;
+ std::vector<Chunk *> Dirs;
+ std::vector<Chunk *> Lookups;
+ std::vector<Chunk *> Addresses;
+ std::vector<Chunk *> Hints;
+ std::vector<Chunk *> DLLNames;
+// Windows-specific.
+// DelayLoadContents creates all chunks for the delay-load DLL import table.
+class DelayLoadContents {
+ void add(DefinedImportData *Sym) { Imports.push_back(Sym); }
+ bool empty() { return Imports.empty(); }
+ void create(Defined *Helper);
+ std::vector<Chunk *> getChunks();
+ std::vector<Chunk *> getDataChunks();
+ ArrayRef<Chunk *> getCodeChunks() { return Thunks; }
+ uint64_t getDirRVA() { return Dirs[0]->getRVA(); }
+ uint64_t getDirSize();
+ Chunk *newThunkChunk(DefinedImportData *S, Chunk *Dir);
+ Defined *Helper;
+ std::vector<DefinedImportData *> Imports;
+ std::vector<Chunk *> Dirs;
+ std::vector<Chunk *> ModuleHandles;
+ std::vector<Chunk *> Addresses;
+ std::vector<Chunk *> Names;
+ std::vector<Chunk *> HintNames;
+ std::vector<Chunk *> Thunks;
+ std::vector<Chunk *> DLLNames;
+// Windows-specific.
+// EdataContents creates all chunks for the DLL export table.
+class EdataContents {
+ EdataContents();
+ std::vector<Chunk *> Chunks;
+ uint64_t getRVA() { return Chunks[0]->getRVA(); }
+ uint64_t getSize() {
+ return Chunks.back()->getRVA() + Chunks.back()->getSize() - getRVA();
+ }
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp
new file mode 100644
index 000000000000..2e4b1e6d3147
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Driver.cpp
@@ -0,0 +1,1681 @@
+//===- Driver.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Driver.h"
+#include "Config.h"
+#include "ICF.h"
+#include "InputFiles.h"
+#include "MarkLive.h"
+#include "MinGW.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "Writer.h"
+#include "lld/Common/Args.h"
+#include "lld/Common/Driver.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Timer.h"
+#include "lld/Common/Version.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Object/COFFModuleDefinition.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/TarWriter.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
+#include <algorithm>
+#include <future>
+#include <memory>
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::COFF;
+using llvm::sys::Process;
+namespace lld {
+namespace coff {
+static Timer InputFileTimer("Input File Reading", Timer::root());
+Configuration *Config;
+LinkerDriver *Driver;
+bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) {
+ errorHandler().LogName = args::getFilenameWithoutExe(Args[0]);
+ errorHandler().ErrorOS = &Diag;
+ errorHandler().ColorDiagnostics = Diag.has_colors();
+ errorHandler().ErrorLimitExceededMsg =
+ "too many errors emitted, stopping now"
+ " (use /errorlimit:0 to see all errors)";
+ errorHandler().ExitEarly = CanExitEarly;
+ Config = make<Configuration>();
+ Symtab = make<SymbolTable>();
+ Driver = make<LinkerDriver>();
+ Driver->link(Args);
+ // Call exit() if we can to avoid calling destructors.
+ if (CanExitEarly)
+ exitLld(errorCount() ? 1 : 0);
+ freeArena();
+ ObjFile::Instances.clear();
+ ImportFile::Instances.clear();
+ BitcodeFile::Instances.clear();
+ return !errorCount();
+// Drop directory components and replace extension with ".exe" or ".dll".
+static std::string getOutputPath(StringRef Path) {
+ auto P = Path.find_last_of("\\/");
+ StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1);
+ const char* E = Config->DLL ? ".dll" : ".exe";
+ return (S.substr(0, S.rfind('.')) + E).str();
+// ErrorOr is not default constructible, so it cannot be used as the type
+// parameter of a future.
+// FIXME: We could open the file in createFutureForFile and avoid needing to
+// return an error here, but for the moment that would cost us a file descriptor
+// (a limited resource on Windows) for the duration that the future is pending.
+typedef std::pair<std::unique_ptr<MemoryBuffer>, std::error_code> MBErrPair;
+// Create a std::future that opens and maps a file using the best strategy for
+// the host platform.
+static std::future<MBErrPair> createFutureForFile(std::string Path) {
+#if _WIN32
+ // On Windows, file I/O is relatively slow so it is best to do this
+ // asynchronously.
+ auto Strategy = std::launch::async;
+ auto Strategy = std::launch::deferred;
+ return std::async(Strategy, [=]() {
+ auto MBOrErr = MemoryBuffer::getFile(Path,
+ /*FileSize*/ -1,
+ /*RequiresNullTerminator*/ false);
+ if (!MBOrErr)
+ return MBErrPair{nullptr, MBOrErr.getError()};
+ return MBErrPair{std::move(*MBOrErr), std::error_code()};
+ });
+// Symbol names are mangled by prepending "_" on x86.
+static StringRef mangle(StringRef Sym) {
+ assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN);
+ if (Config->Machine == I386)
+ return Saver.save("_" + Sym);
+ return Sym;
+static bool findUnderscoreMangle(StringRef Sym) {
+ StringRef Entry = Symtab->findMangle(mangle(Sym));
+ return !Entry.empty() && !isa<Undefined>(Symtab->find(Entry));
+MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) {
+ MemoryBufferRef MBRef = *MB;
+ make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take ownership
+ if (Driver->Tar)
+ Driver->Tar->append(relativeToRoot(MBRef.getBufferIdentifier()),
+ MBRef.getBuffer());
+ return MBRef;
+void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB,
+ bool WholeArchive) {
+ StringRef Filename = MB->getBufferIdentifier();
+ MemoryBufferRef MBRef = takeBuffer(std::move(MB));
+ FilePaths.push_back(Filename);
+ // File type is detected by contents, not by file extension.
+ switch (identify_magic(MBRef.getBuffer())) {
+ case file_magic::windows_resource:
+ Resources.push_back(MBRef);
+ break;
+ case file_magic::archive:
+ if (WholeArchive) {
+ std::unique_ptr<Archive> File =
+ CHECK(Archive::create(MBRef), Filename + ": failed to parse archive");
+ for (MemoryBufferRef M : getArchiveMembers(File.get()))
+ addArchiveBuffer(M, "<whole-archive>", Filename);
+ return;
+ }
+ Symtab->addFile(make<ArchiveFile>(MBRef));
+ break;
+ case file_magic::bitcode:
+ Symtab->addFile(make<BitcodeFile>(MBRef));
+ break;
+ case file_magic::coff_object:
+ case file_magic::coff_import_library:
+ Symtab->addFile(make<ObjFile>(MBRef));
+ break;
+ case file_magic::coff_cl_gl_object:
+ error(Filename + ": is not a native COFF file. Recompile without /GL");
+ break;
+ case file_magic::pecoff_executable:
+ if (Filename.endswith_lower(".dll")) {
+ error(Filename + ": bad file type. Did you specify a DLL instead of an "
+ "import library?");
+ break;
+ }
+ default:
+ error(MBRef.getBufferIdentifier() + ": unknown file type");
+ break;
+ }
+void LinkerDriver::enqueuePath(StringRef Path, bool WholeArchive) {
+ auto Future =
+ std::make_shared<std::future<MBErrPair>>(createFutureForFile(Path));
+ std::string PathStr = Path;
+ enqueueTask([=]() {
+ auto MBOrErr = Future->get();
+ if (MBOrErr.second)
+ error("could not open " + PathStr + ": " + MBOrErr.second.message());
+ else
+ Driver->addBuffer(std::move(MBOrErr.first), WholeArchive);
+ });
+void LinkerDriver::addArchiveBuffer(MemoryBufferRef MB, StringRef SymName,
+ StringRef ParentName) {
+ file_magic Magic = identify_magic(MB.getBuffer());
+ if (Magic == file_magic::coff_import_library) {
+ Symtab->addFile(make<ImportFile>(MB));
+ return;
+ }
+ InputFile *Obj;
+ if (Magic == file_magic::coff_object) {
+ Obj = make<ObjFile>(MB);
+ } else if (Magic == file_magic::bitcode) {
+ Obj = make<BitcodeFile>(MB);
+ } else {
+ error("unknown file type: " + MB.getBufferIdentifier());
+ return;
+ }
+ Obj->ParentName = ParentName;
+ Symtab->addFile(Obj);
+ log("Loaded " + toString(Obj) + " for " + SymName);
+void LinkerDriver::enqueueArchiveMember(const Archive::Child &C,
+ StringRef SymName,
+ StringRef ParentName) {
+ if (!C.getParent()->isThin()) {
+ MemoryBufferRef MB = CHECK(
+ C.getMemoryBufferRef(),
+ "could not get the buffer for the member defining symbol " + SymName);
+ enqueueTask([=]() { Driver->addArchiveBuffer(MB, SymName, ParentName); });
+ return;
+ }
+ auto Future = std::make_shared<std::future<MBErrPair>>(createFutureForFile(
+ CHECK(C.getFullName(),
+ "could not get the filename for the member defining symbol " +
+ SymName)));
+ enqueueTask([=]() {
+ auto MBOrErr = Future->get();
+ if (MBOrErr.second)
+ fatal("could not get the buffer for the member defining " + SymName +
+ ": " + MBOrErr.second.message());
+ Driver->addArchiveBuffer(takeBuffer(std::move(MBOrErr.first)), SymName,
+ ParentName);
+ });
+static bool isDecorated(StringRef Sym) {
+ return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
+ (!Config->MinGW && Sym.contains('@'));
+// Parses .drectve section contents and returns a list of files
+// specified by /defaultlib.
+void LinkerDriver::parseDirectives(StringRef S) {
+ ArgParser Parser;
+ // .drectve is always tokenized using Windows shell rules.
+ // /EXPORT: option can appear too many times, processing in fastpath.
+ opt::InputArgList Args;
+ std::vector<StringRef> Exports;
+ std::tie(Args, Exports) = Parser.parseDirectives(S);
+ for (StringRef E : Exports) {
+ // If a common header file contains dllexported function
+ // declarations, many object files may end up with having the
+ // same /EXPORT options. In order to save cost of parsing them,
+ // we dedup them first.
+ if (!DirectivesExports.insert(E).second)
+ continue;
+ Export Exp = parseExport(E);
+ if (Config->Machine == I386 && Config->MinGW) {
+ if (!isDecorated(Exp.Name))
+ Exp.Name = Saver.save("_" + Exp.Name);
+ if (!Exp.ExtName.empty() && !isDecorated(Exp.ExtName))
+ Exp.ExtName = Saver.save("_" + Exp.ExtName);
+ }
+ Exp.Directives = true;
+ Config->Exports.push_back(Exp);
+ }
+ for (auto *Arg : Args) {
+ switch (Arg->getOption().getUnaliasedOption().getID()) {
+ case OPT_aligncomm:
+ parseAligncomm(Arg->getValue());
+ break;
+ case OPT_alternatename:
+ parseAlternateName(Arg->getValue());
+ break;
+ case OPT_defaultlib:
+ if (Optional<StringRef> Path = findLib(Arg->getValue()))
+ enqueuePath(*Path, false);
+ break;
+ case OPT_entry:
+ Config->Entry = addUndefined(mangle(Arg->getValue()));
+ break;
+ case OPT_failifmismatch:
+ checkFailIfMismatch(Arg->getValue());
+ break;
+ case OPT_incl:
+ addUndefined(Arg->getValue());
+ break;
+ case OPT_merge:
+ parseMerge(Arg->getValue());
+ break;
+ case OPT_nodefaultlib:
+ Config->NoDefaultLibs.insert(doFindLib(Arg->getValue()));
+ break;
+ case OPT_section:
+ parseSection(Arg->getValue());
+ break;
+ case OPT_subsystem:
+ parseSubsystem(Arg->getValue(), &Config->Subsystem,
+ &Config->MajorOSVersion, &Config->MinorOSVersion);
+ break;
+ case OPT_editandcontinue:
+ case OPT_fastfail:
+ case OPT_guardsym:
+ case OPT_natvis:
+ case OPT_throwingnew:
+ break;
+ default:
+ error(Arg->getSpelling() + " is not allowed in .drectve");
+ }
+ }
+// Find file from search paths. You can omit ".obj", this function takes
+// care of that. Note that the returned path is not guaranteed to exist.
+StringRef LinkerDriver::doFindFile(StringRef Filename) {
+ bool HasPathSep = (Filename.find_first_of("/\\") != StringRef::npos);
+ if (HasPathSep)
+ return Filename;
+ bool HasExt = Filename.contains('.');
+ for (StringRef Dir : SearchPaths) {
+ SmallString<128> Path = Dir;
+ sys::path::append(Path, Filename);
+ if (sys::fs::exists(Path.str()))
+ return Saver.save(Path.str());
+ if (!HasExt) {
+ Path.append(".obj");
+ if (sys::fs::exists(Path.str()))
+ return Saver.save(Path.str());
+ }
+ }
+ return Filename;
+static Optional<sys::fs::UniqueID> getUniqueID(StringRef Path) {
+ sys::fs::UniqueID Ret;
+ if (sys::fs::getUniqueID(Path, Ret))
+ return None;
+ return Ret;
+// Resolves a file path. This never returns the same path
+// (in that case, it returns None).
+Optional<StringRef> LinkerDriver::findFile(StringRef Filename) {
+ StringRef Path = doFindFile(Filename);
+ if (Optional<sys::fs::UniqueID> ID = getUniqueID(Path)) {
+ bool Seen = !VisitedFiles.insert(*ID).second;
+ if (Seen)
+ return None;
+ }
+ if (Path.endswith_lower(".lib"))
+ VisitedLibs.insert(sys::path::filename(Path));
+ return Path;
+// MinGW specific. If an embedded directive specified to link to
+// foo.lib, but it isn't found, try libfoo.a instead.
+StringRef LinkerDriver::doFindLibMinGW(StringRef Filename) {
+ if (Filename.contains('/') || Filename.contains('\\'))
+ return Filename;
+ SmallString<128> S = Filename;
+ sys::path::replace_extension(S, ".a");
+ StringRef LibName = Saver.save("lib" + S.str());
+ return doFindFile(LibName);
+// Find library file from search path.
+StringRef LinkerDriver::doFindLib(StringRef Filename) {
+ // Add ".lib" to Filename if that has no file extension.
+ bool HasExt = Filename.contains('.');
+ if (!HasExt)
+ Filename = Saver.save(Filename + ".lib");
+ StringRef Ret = doFindFile(Filename);
+ // For MinGW, if the find above didn't turn up anything, try
+ // looking for a MinGW formatted library name.
+ if (Config->MinGW && Ret == Filename)
+ return doFindLibMinGW(Filename);
+ return Ret;
+// Resolves a library path. /nodefaultlib options are taken into
+// consideration. This never returns the same path (in that case,
+// it returns None).
+Optional<StringRef> LinkerDriver::findLib(StringRef Filename) {
+ if (Config->NoDefaultLibAll)
+ return None;
+ if (!VisitedLibs.insert(Filename.lower()).second)
+ return None;
+ StringRef Path = doFindLib(Filename);
+ if (Config->NoDefaultLibs.count(Path))
+ return None;
+ if (Optional<sys::fs::UniqueID> ID = getUniqueID(Path))
+ if (!VisitedFiles.insert(*ID).second)
+ return None;
+ return Path;
+// Parses LIB environment which contains a list of search paths.
+void LinkerDriver::addLibSearchPaths() {
+ Optional<std::string> EnvOpt = Process::GetEnv("LIB");
+ if (!EnvOpt.hasValue())
+ return;
+ StringRef Env = Saver.save(*EnvOpt);
+ while (!Env.empty()) {
+ StringRef Path;
+ std::tie(Path, Env) = Env.split(';');
+ SearchPaths.push_back(Path);
+ }
+Symbol *LinkerDriver::addUndefined(StringRef Name) {
+ Symbol *B = Symtab->addUndefined(Name);
+ if (!B->IsGCRoot) {
+ B->IsGCRoot = true;
+ Config->GCRoot.push_back(B);
+ }
+ return B;
+// Windows specific -- find default entry point name.
+// There are four different entry point functions for Windows executables,
+// each of which corresponds to a user-defined "main" function. This function
+// infers an entry point from a user-defined "main" function.
+StringRef LinkerDriver::findDefaultEntry() {
+ assert(Config->Subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
+ "must handle /subsystem before calling this");
+ if (Config->MinGW)
+ return mangle(Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
+ ? "WinMainCRTStartup"
+ : "mainCRTStartup");
+ if (Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
+ if (findUnderscoreMangle("wWinMain")) {
+ if (!findUnderscoreMangle("WinMain"))
+ return mangle("wWinMainCRTStartup");
+ warn("found both wWinMain and WinMain; using latter");
+ }
+ return mangle("WinMainCRTStartup");
+ }
+ if (findUnderscoreMangle("wmain")) {
+ if (!findUnderscoreMangle("main"))
+ return mangle("wmainCRTStartup");
+ warn("found both wmain and main; using latter");
+ }
+ return mangle("mainCRTStartup");
+WindowsSubsystem LinkerDriver::inferSubsystem() {
+ if (Config->DLL)
+ if (Config->MinGW)
+ // Note that link.exe infers the subsystem from the presence of these
+ // functions even if /entry: or /nodefaultlib are passed which causes them
+ // to not be called.
+ bool HaveMain = findUnderscoreMangle("main");
+ bool HaveWMain = findUnderscoreMangle("wmain");
+ bool HaveWinMain = findUnderscoreMangle("WinMain");
+ bool HaveWWinMain = findUnderscoreMangle("wWinMain");
+ if (HaveMain || HaveWMain) {
+ if (HaveWinMain || HaveWWinMain) {
+ warn(std::string("found ") + (HaveMain ? "main" : "wmain") + " and " +
+ (HaveWinMain ? "WinMain" : "wWinMain") +
+ "; defaulting to /subsystem:console");
+ }
+ }
+ if (HaveWinMain || HaveWWinMain)
+static uint64_t getDefaultImageBase() {
+ if (Config->is64())
+ return Config->DLL ? 0x180000000 : 0x140000000;
+ return Config->DLL ? 0x10000000 : 0x400000;
+static std::string createResponseFile(const opt::InputArgList &Args,
+ ArrayRef<StringRef> FilePaths,
+ ArrayRef<StringRef> SearchPaths) {
+ SmallString<0> Data;
+ raw_svector_ostream OS(Data);
+ for (auto *Arg : Args) {
+ switch (Arg->getOption().getID()) {
+ case OPT_linkrepro:
+ case OPT_INPUT:
+ case OPT_defaultlib:
+ case OPT_libpath:
+ case OPT_manifest:
+ case OPT_manifest_colon:
+ case OPT_manifestdependency:
+ case OPT_manifestfile:
+ case OPT_manifestinput:
+ case OPT_manifestuac:
+ break;
+ default:
+ OS << toString(*Arg) << "\n";
+ }
+ }
+ for (StringRef Path : SearchPaths) {
+ std::string RelPath = relativeToRoot(Path);
+ OS << "/libpath:" << quote(RelPath) << "\n";
+ }
+ for (StringRef Path : FilePaths)
+ OS << quote(relativeToRoot(Path)) << "\n";
+ return Data.str();
+enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab };
+static DebugKind parseDebugKind(const opt::InputArgList &Args) {
+ auto *A = Args.getLastArg(OPT_debug, OPT_debug_opt);
+ if (!A)
+ return DebugKind::None;
+ if (A->getNumValues() == 0)
+ return DebugKind::Full;
+ DebugKind Debug = StringSwitch<DebugKind>(A->getValue())
+ .CaseLower("none", DebugKind::None)
+ .CaseLower("full", DebugKind::Full)
+ .CaseLower("fastlink", DebugKind::FastLink)
+ // LLD extensions
+ .CaseLower("ghash", DebugKind::GHash)
+ .CaseLower("dwarf", DebugKind::Dwarf)
+ .CaseLower("symtab", DebugKind::Symtab)
+ .Default(DebugKind::Unknown);
+ if (Debug == DebugKind::FastLink) {
+ warn("/debug:fastlink unsupported; using /debug:full");
+ return DebugKind::Full;
+ }
+ if (Debug == DebugKind::Unknown) {
+ error("/debug: unknown option: " + Twine(A->getValue()));
+ return DebugKind::None;
+ }
+ return Debug;
+static unsigned parseDebugTypes(const opt::InputArgList &Args) {
+ unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
+ if (auto *A = Args.getLastArg(OPT_debugtype)) {
+ SmallVector<StringRef, 3> Types;
+ A->getSpelling().split(Types, ',', /*KeepEmpty=*/false);
+ for (StringRef Type : Types) {
+ unsigned V = StringSwitch<unsigned>(Type.lower())
+ .Case("cv", static_cast<unsigned>(DebugType::CV))
+ .Case("pdata", static_cast<unsigned>(DebugType::PData))
+ .Case("fixup", static_cast<unsigned>(DebugType::Fixup))
+ .Default(0);
+ if (V == 0) {
+ warn("/debugtype: unknown option: " + Twine(A->getValue()));
+ continue;
+ }
+ DebugTypes |= V;
+ }
+ return DebugTypes;
+ }
+ // Default debug types
+ DebugTypes = static_cast<unsigned>(DebugType::CV);
+ if (Args.hasArg(OPT_driver))
+ DebugTypes |= static_cast<unsigned>(DebugType::PData);
+ if (Args.hasArg(OPT_profile))
+ DebugTypes |= static_cast<unsigned>(DebugType::Fixup);
+ return DebugTypes;
+static std::string getMapFile(const opt::InputArgList &Args) {
+ auto *Arg = Args.getLastArg(OPT_lldmap, OPT_lldmap_file);
+ if (!Arg)
+ return "";
+ if (Arg->getOption().getID() == OPT_lldmap_file)
+ return Arg->getValue();
+ assert(Arg->getOption().getID() == OPT_lldmap);
+ StringRef OutFile = Config->OutputFile;
+ return (OutFile.substr(0, OutFile.rfind('.')) + ".map").str();
+static std::string getImplibPath() {
+ if (!Config->Implib.empty())
+ return Config->Implib;
+ SmallString<128> Out = StringRef(Config->OutputFile);
+ sys::path::replace_extension(Out, ".lib");
+ return Out.str();
+// The import name is caculated as the following:
+// | LIBRARY w/ ext | LIBRARY w/o ext | no LIBRARY
+// -----+----------------+---------------------+------------------
+// LINK | {value} | {value}.{.dll/.exe} | {output name}
+// LIB | {value} | {value}.dll | {output name}.dll
+static std::string getImportName(bool AsLib) {
+ SmallString<128> Out;
+ if (Config->ImportName.empty()) {
+ Out.assign(sys::path::filename(Config->OutputFile));
+ if (AsLib)
+ sys::path::replace_extension(Out, ".dll");
+ } else {
+ Out.assign(Config->ImportName);
+ if (!sys::path::has_extension(Out))
+ sys::path::replace_extension(Out,
+ (Config->DLL || AsLib) ? ".dll" : ".exe");
+ }
+ return Out.str();
+static void createImportLibrary(bool AsLib) {
+ std::vector<COFFShortExport> Exports;
+ for (Export &E1 : Config->Exports) {
+ COFFShortExport E2;
+ E2.Name = E1.Name;
+ E2.SymbolName = E1.SymbolName;
+ E2.ExtName = E1.ExtName;
+ E2.Ordinal = E1.Ordinal;
+ E2.Noname = E1.Noname;
+ E2.Data = E1.Data;
+ E2.Private = E1.Private;
+ E2.Constant = E1.Constant;
+ Exports.push_back(E2);
+ }
+ auto HandleError = [](Error &&E) {
+ handleAllErrors(std::move(E),
+ [](ErrorInfoBase &EIB) { error(EIB.message()); });
+ };
+ std::string LibName = getImportName(AsLib);
+ std::string Path = getImplibPath();
+ if (!Config->Incremental) {
+ HandleError(writeImportLibrary(LibName, Path, Exports, Config->Machine,
+ Config->MinGW));
+ return;
+ }
+ // If the import library already exists, replace it only if the contents
+ // have changed.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> OldBuf = MemoryBuffer::getFile(
+ Path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false);
+ if (!OldBuf) {
+ HandleError(writeImportLibrary(LibName, Path, Exports, Config->Machine,
+ Config->MinGW));
+ return;
+ }
+ SmallString<128> TmpName;
+ if (std::error_code EC =
+ sys::fs::createUniqueFile(Path + ".tmp-%%%%%%%%.lib", TmpName))
+ fatal("cannot create temporary file for import library " + Path + ": " +
+ EC.message());
+ if (Error E = writeImportLibrary(LibName, TmpName, Exports, Config->Machine,
+ Config->MinGW)) {
+ HandleError(std::move(E));
+ return;
+ }
+ std::unique_ptr<MemoryBuffer> NewBuf = check(MemoryBuffer::getFile(
+ TmpName, /*FileSize*/ -1, /*RequiresNullTerminator*/ false));
+ if ((*OldBuf)->getBuffer() != NewBuf->getBuffer()) {
+ OldBuf->reset();
+ HandleError(errorCodeToError(sys::fs::rename(TmpName, Path)));
+ } else {
+ sys::fs::remove(TmpName);
+ }
+static void parseModuleDefs(StringRef Path) {
+ std::unique_ptr<MemoryBuffer> MB = CHECK(
+ MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
+ COFFModuleDefinition M = check(parseCOFFModuleDefinition(
+ MB->getMemBufferRef(), Config->Machine, Config->MinGW));
+ if (Config->OutputFile.empty())
+ Config->OutputFile = Saver.save(M.OutputFile);
+ Config->ImportName = Saver.save(M.ImportName);
+ if (M.ImageBase)
+ Config->ImageBase = M.ImageBase;
+ if (M.StackReserve)
+ Config->StackReserve = M.StackReserve;
+ if (M.StackCommit)
+ Config->StackCommit = M.StackCommit;
+ if (M.HeapReserve)
+ Config->HeapReserve = M.HeapReserve;
+ if (M.HeapCommit)
+ Config->HeapCommit = M.HeapCommit;
+ if (M.MajorImageVersion)
+ Config->MajorImageVersion = M.MajorImageVersion;
+ if (M.MinorImageVersion)
+ Config->MinorImageVersion = M.MinorImageVersion;
+ if (M.MajorOSVersion)
+ Config->MajorOSVersion = M.MajorOSVersion;
+ if (M.MinorOSVersion)
+ Config->MinorOSVersion = M.MinorOSVersion;
+ for (COFFShortExport E1 : M.Exports) {
+ Export E2;
+ // In simple cases, only Name is set. Renamed exports are parsed
+ // and set as "ExtName = Name". If Name has the form "OtherDll.Func",
+ // it shouldn't be a normal exported function but a forward to another
+ // DLL instead. This is supported by both MS and GNU linkers.
+ if (E1.ExtName != E1.Name && StringRef(E1.Name).contains('.')) {
+ E2.Name = Saver.save(E1.ExtName);
+ E2.ForwardTo = Saver.save(E1.Name);
+ Config->Exports.push_back(E2);
+ continue;
+ }
+ E2.Name = Saver.save(E1.Name);
+ E2.ExtName = Saver.save(E1.ExtName);
+ E2.Ordinal = E1.Ordinal;
+ E2.Noname = E1.Noname;
+ E2.Data = E1.Data;
+ E2.Private = E1.Private;
+ E2.Constant = E1.Constant;
+ Config->Exports.push_back(E2);
+ }
+void LinkerDriver::enqueueTask(std::function<void()> Task) {
+ TaskQueue.push_back(std::move(Task));
+bool LinkerDriver::run() {
+ ScopedTimer T(InputFileTimer);
+ bool DidWork = !TaskQueue.empty();
+ while (!TaskQueue.empty()) {
+ TaskQueue.front()();
+ TaskQueue.pop_front();
+ }
+ return DidWork;
+// Parse an /order file. If an option is given, the linker places
+// COMDAT sections in the same order as their names appear in the
+// given file.
+static void parseOrderFile(StringRef Arg) {
+ // For some reason, the MSVC linker requires a filename to be
+ // preceded by "@".
+ if (!Arg.startswith("@")) {
+ error("malformed /order option: '@' missing");
+ return;
+ }
+ // Get a list of all comdat sections for error checking.
+ DenseSet<StringRef> Set;
+ for (Chunk *C : Symtab->getChunks())
+ if (auto *Sec = dyn_cast<SectionChunk>(C))
+ if (Sec->Sym)
+ Set.insert(Sec->Sym->getName());
+ // Open a file.
+ StringRef Path = Arg.substr(1);
+ std::unique_ptr<MemoryBuffer> MB = CHECK(
+ MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
+ // Parse a file. An order file contains one symbol per line.
+ // All symbols that were not present in a given order file are
+ // considered to have the lowest priority 0 and are placed at
+ // end of an output section.
+ for (std::string S : args::getLines(MB->getMemBufferRef())) {
+ if (Config->Machine == I386 && !isDecorated(S))
+ S = "_" + S;
+ if (Set.count(S) == 0) {
+ if (Config->WarnMissingOrderSymbol)
+ warn("/order:" + Arg + ": missing symbol: " + S + " [LNK4037]");
+ }
+ else
+ Config->Order[S] = INT_MIN + Config->Order.size();
+ }
+static void markAddrsig(Symbol *S) {
+ if (auto *D = dyn_cast_or_null<Defined>(S))
+ if (Chunk *C = D->getChunk())
+ C->KeepUnique = true;
+static void findKeepUniqueSections() {
+ // Exported symbols could be address-significant in other executables or DSOs,
+ // so we conservatively mark them as address-significant.
+ for (Export &R : Config->Exports)
+ markAddrsig(R.Sym);
+ // Visit the address-significance table in each object file and mark each
+ // referenced symbol as address-significant.
+ for (ObjFile *Obj : ObjFile::Instances) {
+ ArrayRef<Symbol *> Syms = Obj->getSymbols();
+ if (Obj->AddrsigSec) {
+ ArrayRef<uint8_t> Contents;
+ Obj->getCOFFObj()->getSectionContents(Obj->AddrsigSec, Contents);
+ const uint8_t *Cur = Contents.begin();
+ while (Cur != Contents.end()) {
+ unsigned Size;
+ const char *Err;
+ uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
+ if (Err)
+ fatal(toString(Obj) + ": could not decode addrsig section: " + Err);
+ if (SymIndex >= Syms.size())
+ fatal(toString(Obj) + ": invalid symbol index in addrsig section");
+ markAddrsig(Syms[SymIndex]);
+ Cur += Size;
+ }
+ } else {
+ // If an object file does not have an address-significance table,
+ // conservatively mark all of its symbols as address-significant.
+ for (Symbol *S : Syms)
+ markAddrsig(S);
+ }
+ }
+// link.exe replaces each %foo% in AltPath with the contents of environment
+// variable foo, and adds the two magic env vars _PDB (expands to the basename
+// of pdb's output path) and _EXT (expands to the extension of the output
+// binary).
+// lld only supports %_PDB% and %_EXT% and warns on references to all other env
+// vars.
+static void parsePDBAltPath(StringRef AltPath) {
+ SmallString<128> Buf;
+ StringRef PDBBasename =
+ sys::path::filename(Config->PDBPath, sys::path::Style::windows);
+ StringRef BinaryExtension =
+ sys::path::extension(Config->OutputFile, sys::path::Style::windows);
+ if (!BinaryExtension.empty())
+ BinaryExtension = BinaryExtension.substr(1); // %_EXT% does not include '.'.
+ // Invariant:
+ // +--------- Cursor ('a...' might be the empty string).
+ // | +----- FirstMark
+ // | | +- SecondMark
+ // v v v
+ // a...%...%...
+ size_t Cursor = 0;
+ while (Cursor < AltPath.size()) {
+ size_t FirstMark, SecondMark;
+ if ((FirstMark = AltPath.find('%', Cursor)) == StringRef::npos ||
+ (SecondMark = AltPath.find('%', FirstMark + 1)) == StringRef::npos) {
+ // Didn't find another full fragment, treat rest of string as literal.
+ Buf.append(AltPath.substr(Cursor));
+ break;
+ }
+ // Found a full fragment. Append text in front of first %, and interpret
+ // text between first and second % as variable name.
+ Buf.append(AltPath.substr(Cursor, FirstMark - Cursor));
+ StringRef Var = AltPath.substr(FirstMark, SecondMark - FirstMark + 1);
+ if (Var.equals_lower("%_pdb%"))
+ Buf.append(PDBBasename);
+ else if (Var.equals_lower("%_ext%"))
+ Buf.append(BinaryExtension);
+ else {
+ warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " +
+ Var + " as literal");
+ Buf.append(Var);
+ }
+ Cursor = SecondMark + 1;
+ }
+ Config->PDBAltPath = Buf;
+void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
+ // If the first command line argument is "/lib", link.exe acts like lib.exe.
+ // We call our own implementation of lib.exe that understands bitcode files.
+ if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) {
+ if (llvm::libDriverMain(ArgsArr.slice(1)) != 0)
+ fatal("lib failed");
+ return;
+ }
+ // Needed for LTO.
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ InitializeAllAsmParsers();
+ InitializeAllAsmPrinters();
+ // Parse command line options.
+ ArgParser Parser;
+ opt::InputArgList Args = Parser.parseLINK(ArgsArr);
+ // Parse and evaluate -mllvm options.
+ std::vector<const char *> V;
+ V.push_back("lld-link (LLVM option parsing)");
+ for (auto *Arg : Args.filtered(OPT_mllvm))
+ V.push_back(Arg->getValue());
+ cl::ParseCommandLineOptions(V.size(), V.data());
+ // Handle /errorlimit early, because error() depends on it.
+ if (auto *Arg = Args.getLastArg(OPT_errorlimit)) {
+ int N = 20;
+ StringRef S = Arg->getValue();
+ if (S.getAsInteger(10, N))
+ error(Arg->getSpelling() + " number expected, but got " + S);
+ errorHandler().ErrorLimit = N;
+ }
+ // Handle /help
+ if (Args.hasArg(OPT_help)) {
+ printHelp(ArgsArr[0]);
+ return;
+ }
+ if (Args.hasArg(OPT_show_timing))
+ Config->ShowTiming = true;
+ ScopedTimer T(Timer::root());
+ // Handle --version, which is an lld extension. This option is a bit odd
+ // because it doesn't start with "/", but we deliberately chose "--" to
+ // avoid conflict with /version and for compatibility with clang-cl.
+ if (Args.hasArg(OPT_dash_dash_version)) {
+ outs() << getLLDVersion() << "\n";
+ return;
+ }
+ // Handle /lldmingw early, since it can potentially affect how other
+ // options are handled.
+ Config->MinGW = Args.hasArg(OPT_lldmingw);
+ if (auto *Arg = Args.getLastArg(OPT_linkrepro)) {
+ SmallString<64> Path = StringRef(Arg->getValue());
+ sys::path::append(Path, "repro.tar");
+ Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
+ TarWriter::create(Path, "repro");
+ if (ErrOrWriter) {
+ Tar = std::move(*ErrOrWriter);
+ } else {
+ error("/linkrepro: failed to open " + Path + ": " +
+ toString(ErrOrWriter.takeError()));
+ }
+ }
+ if (!Args.hasArg(OPT_INPUT)) {
+ if (Args.hasArg(OPT_deffile))
+ Config->NoEntry = true;
+ else
+ fatal("no input files");
+ }
+ // Construct search path list.
+ SearchPaths.push_back("");
+ for (auto *Arg : Args.filtered(OPT_libpath))
+ SearchPaths.push_back(Arg->getValue());
+ addLibSearchPaths();
+ // Handle /ignore
+ for (auto *Arg : Args.filtered(OPT_ignore)) {
+ SmallVector<StringRef, 8> Vec;
+ StringRef(Arg->getValue()).split(Vec, ',');
+ for (StringRef S : Vec) {
+ if (S == "4037")
+ Config->WarnMissingOrderSymbol = false;
+ else if (S == "4099")
+ Config->WarnDebugInfoUnusable = false;
+ else if (S == "4217")
+ Config->WarnLocallyDefinedImported = false;
+ // Other warning numbers are ignored.
+ }
+ }
+ // Handle /out
+ if (auto *Arg = Args.getLastArg(OPT_out))
+ Config->OutputFile = Arg->getValue();
+ // Handle /verbose
+ if (Args.hasArg(OPT_verbose))
+ Config->Verbose = true;
+ errorHandler().Verbose = Config->Verbose;
+ // Handle /force or /force:unresolved
+ if (Args.hasArg(OPT_force, OPT_force_unresolved))
+ Config->ForceUnresolved = true;
+ // Handle /force or /force:multiple
+ if (Args.hasArg(OPT_force, OPT_force_multiple))
+ Config->ForceMultiple = true;
+ // Handle /debug
+ DebugKind Debug = parseDebugKind(Args);
+ if (Debug == DebugKind::Full || Debug == DebugKind::Dwarf ||
+ Debug == DebugKind::GHash) {
+ Config->Debug = true;
+ Config->Incremental = true;
+ }
+ // Handle /debugtype
+ Config->DebugTypes = parseDebugTypes(Args);
+ // Handle /pdb
+ bool ShouldCreatePDB =
+ (Debug == DebugKind::Full || Debug == DebugKind::GHash);
+ if (ShouldCreatePDB) {
+ if (auto *Arg = Args.getLastArg(OPT_pdb))
+ Config->PDBPath = Arg->getValue();
+ if (auto *Arg = Args.getLastArg(OPT_pdbaltpath))
+ Config->PDBAltPath = Arg->getValue();
+ if (Args.hasArg(OPT_natvis))
+ Config->NatvisFiles = Args.getAllArgValues(OPT_natvis);
+ if (auto *Arg = Args.getLastArg(OPT_pdb_source_path))
+ Config->PDBSourcePath = Arg->getValue();
+ }
+ // Handle /noentry
+ if (Args.hasArg(OPT_noentry)) {
+ if (Args.hasArg(OPT_dll))
+ Config->NoEntry = true;
+ else
+ error("/noentry must be specified with /dll");
+ }
+ // Handle /dll
+ if (Args.hasArg(OPT_dll)) {
+ Config->DLL = true;
+ Config->ManifestID = 2;
+ }
+ // Handle /dynamicbase and /fixed. We can't use hasFlag for /dynamicbase
+ // because we need to explicitly check whether that option or its inverse was
+ // present in the argument list in order to handle /fixed.
+ auto *DynamicBaseArg = Args.getLastArg(OPT_dynamicbase, OPT_dynamicbase_no);
+ if (DynamicBaseArg &&
+ DynamicBaseArg->getOption().getID() == OPT_dynamicbase_no)
+ Config->DynamicBase = false;
+ // MSDN claims "/FIXED:NO is the default setting for a DLL, and /FIXED is the
+ // default setting for any other project type.", but link.exe defaults to
+ // /FIXED:NO for exe outputs as well. Match behavior, not docs.
+ bool Fixed = Args.hasFlag(OPT_fixed, OPT_fixed_no, false);
+ if (Fixed) {
+ if (DynamicBaseArg &&
+ DynamicBaseArg->getOption().getID() == OPT_dynamicbase) {
+ error("/fixed must not be specified with /dynamicbase");
+ } else {
+ Config->Relocatable = false;
+ Config->DynamicBase = false;
+ }
+ }
+ // Handle /appcontainer
+ Config->AppContainer =
+ Args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false);
+ // Handle /machine
+ if (auto *Arg = Args.getLastArg(OPT_machine))
+ Config->Machine = getMachineType(Arg->getValue());
+ // Handle /nodefaultlib:<filename>
+ for (auto *Arg : Args.filtered(OPT_nodefaultlib))
+ Config->NoDefaultLibs.insert(doFindLib(Arg->getValue()));
+ // Handle /nodefaultlib
+ if (Args.hasArg(OPT_nodefaultlib_all))
+ Config->NoDefaultLibAll = true;
+ // Handle /base
+ if (auto *Arg = Args.getLastArg(OPT_base))
+ parseNumbers(Arg->getValue(), &Config->ImageBase);
+ // Handle /stack
+ if (auto *Arg = Args.getLastArg(OPT_stack))
+ parseNumbers(Arg->getValue(), &Config->StackReserve, &Config->StackCommit);
+ // Handle /guard:cf
+ if (auto *Arg = Args.getLastArg(OPT_guard))
+ parseGuard(Arg->getValue());
+ // Handle /heap
+ if (auto *Arg = Args.getLastArg(OPT_heap))
+ parseNumbers(Arg->getValue(), &Config->HeapReserve, &Config->HeapCommit);
+ // Handle /version
+ if (auto *Arg = Args.getLastArg(OPT_version))
+ parseVersion(Arg->getValue(), &Config->MajorImageVersion,
+ &Config->MinorImageVersion);
+ // Handle /subsystem
+ if (auto *Arg = Args.getLastArg(OPT_subsystem))
+ parseSubsystem(Arg->getValue(), &Config->Subsystem, &Config->MajorOSVersion,
+ &Config->MinorOSVersion);
+ // Handle /timestamp
+ if (llvm::opt::Arg *Arg = Args.getLastArg(OPT_timestamp, OPT_repro)) {
+ if (Arg->getOption().getID() == OPT_repro) {
+ Config->Timestamp = 0;
+ Config->Repro = true;
+ } else {
+ Config->Repro = false;
+ StringRef Value(Arg->getValue());
+ if (Value.getAsInteger(0, Config->Timestamp))
+ fatal(Twine("invalid timestamp: ") + Value +
+ ". Expected 32-bit integer");
+ }
+ } else {
+ Config->Repro = false;
+ Config->Timestamp = time(nullptr);
+ }
+ // Handle /alternatename
+ for (auto *Arg : Args.filtered(OPT_alternatename))
+ parseAlternateName(Arg->getValue());
+ // Handle /include
+ for (auto *Arg : Args.filtered(OPT_incl))
+ addUndefined(Arg->getValue());
+ // Handle /implib
+ if (auto *Arg = Args.getLastArg(OPT_implib))
+ Config->Implib = Arg->getValue();
+ // Handle /opt.
+ bool DoGC = Debug == DebugKind::None || Args.hasArg(OPT_profile);
+ unsigned ICFLevel =
+ Args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on
+ unsigned TailMerge = 1;
+ for (auto *Arg : Args.filtered(OPT_opt)) {
+ std::string Str = StringRef(Arg->getValue()).lower();
+ SmallVector<StringRef, 1> Vec;
+ StringRef(Str).split(Vec, ',');
+ for (StringRef S : Vec) {
+ if (S == "ref") {
+ DoGC = true;
+ } else if (S == "noref") {
+ DoGC = false;
+ } else if (S == "icf" || S.startswith("icf=")) {
+ ICFLevel = 2;
+ } else if (S == "noicf") {
+ ICFLevel = 0;
+ } else if (S == "lldtailmerge") {
+ TailMerge = 2;
+ } else if (S == "nolldtailmerge") {
+ TailMerge = 0;
+ } else if (S.startswith("lldlto=")) {
+ StringRef OptLevel = S.substr(7);
+ if (OptLevel.getAsInteger(10, Config->LTOO) || Config->LTOO > 3)
+ error("/opt:lldlto: invalid optimization level: " + OptLevel);
+ } else if (S.startswith("lldltojobs=")) {
+ StringRef Jobs = S.substr(11);
+ if (Jobs.getAsInteger(10, Config->ThinLTOJobs) ||
+ Config->ThinLTOJobs == 0)
+ error("/opt:lldltojobs: invalid job count: " + Jobs);
+ } else if (S.startswith("lldltopartitions=")) {
+ StringRef N = S.substr(17);
+ if (N.getAsInteger(10, Config->LTOPartitions) ||
+ Config->LTOPartitions == 0)
+ error("/opt:lldltopartitions: invalid partition count: " + N);
+ } else if (S != "lbr" && S != "nolbr")
+ error("/opt: unknown option: " + S);
+ }
+ }
+ // Limited ICF is enabled if GC is enabled and ICF was never mentioned
+ // explicitly.
+ // FIXME: LLD only implements "limited" ICF, i.e. it only merges identical
+ // code. If the user passes /OPT:ICF explicitly, LLD should merge identical
+ // comdat readonly data.
+ if (ICFLevel == 1 && !DoGC)
+ ICFLevel = 0;
+ Config->DoGC = DoGC;
+ Config->DoICF = ICFLevel > 0;
+ Config->TailMerge = (TailMerge == 1 && Config->DoICF) || TailMerge == 2;
+ // Handle /lldsavetemps
+ if (Args.hasArg(OPT_lldsavetemps))
+ Config->SaveTemps = true;
+ // Handle /kill-at
+ if (Args.hasArg(OPT_kill_at))
+ Config->KillAt = true;
+ // Handle /lldltocache
+ if (auto *Arg = Args.getLastArg(OPT_lldltocache))
+ Config->LTOCache = Arg->getValue();
+ // Handle /lldsavecachepolicy
+ if (auto *Arg = Args.getLastArg(OPT_lldltocachepolicy))
+ Config->LTOCachePolicy = CHECK(
+ parseCachePruningPolicy(Arg->getValue()),
+ Twine("/lldltocachepolicy: invalid cache policy: ") + Arg->getValue());
+ // Handle /failifmismatch
+ for (auto *Arg : Args.filtered(OPT_failifmismatch))
+ checkFailIfMismatch(Arg->getValue());
+ // Handle /merge
+ for (auto *Arg : Args.filtered(OPT_merge))
+ parseMerge(Arg->getValue());
+ // Add default section merging rules after user rules. User rules take
+ // precedence, but we will emit a warning if there is a conflict.
+ parseMerge(".idata=.rdata");
+ parseMerge(".didat=.rdata");
+ parseMerge(".edata=.rdata");
+ parseMerge(".xdata=.rdata");
+ parseMerge(".bss=.data");
+ if (Config->MinGW) {
+ parseMerge(".ctors=.rdata");
+ parseMerge(".dtors=.rdata");
+ parseMerge(".CRT=.rdata");
+ }
+ // Handle /section
+ for (auto *Arg : Args.filtered(OPT_section))
+ parseSection(Arg->getValue());
+ // Handle /aligncomm
+ for (auto *Arg : Args.filtered(OPT_aligncomm))
+ parseAligncomm(Arg->getValue());
+ // Handle /manifestdependency. This enables /manifest unless /manifest:no is
+ // also passed.
+ if (auto *Arg = Args.getLastArg(OPT_manifestdependency)) {
+ Config->ManifestDependency = Arg->getValue();
+ Config->Manifest = Configuration::SideBySide;
+ }
+ // Handle /manifest and /manifest:
+ if (auto *Arg = Args.getLastArg(OPT_manifest, OPT_manifest_colon)) {
+ if (Arg->getOption().getID() == OPT_manifest)
+ Config->Manifest = Configuration::SideBySide;
+ else
+ parseManifest(Arg->getValue());
+ }
+ // Handle /manifestuac
+ if (auto *Arg = Args.getLastArg(OPT_manifestuac))
+ parseManifestUAC(Arg->getValue());
+ // Handle /manifestfile
+ if (auto *Arg = Args.getLastArg(OPT_manifestfile))
+ Config->ManifestFile = Arg->getValue();
+ // Handle /manifestinput
+ for (auto *Arg : Args.filtered(OPT_manifestinput))
+ Config->ManifestInput.push_back(Arg->getValue());
+ if (!Config->ManifestInput.empty() &&
+ Config->Manifest != Configuration::Embed) {
+ fatal("/manifestinput: requires /manifest:embed");
+ }
+ // Handle miscellaneous boolean flags.
+ Config->AllowBind = Args.hasFlag(OPT_allowbind, OPT_allowbind_no, true);
+ Config->AllowIsolation =
+ Args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true);
+ Config->Incremental =
+ Args.hasFlag(OPT_incremental, OPT_incremental_no,
+ !Config->DoGC && !Config->DoICF && !Args.hasArg(OPT_order) &&
+ !Args.hasArg(OPT_profile));
+ Config->IntegrityCheck =
+ Args.hasFlag(OPT_integritycheck, OPT_integritycheck_no, false);
+ Config->NxCompat = Args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true);
+ Config->TerminalServerAware =
+ !Config->DLL && Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true);
+ Config->DebugDwarf = Debug == DebugKind::Dwarf;
+ Config->DebugGHashes = Debug == DebugKind::GHash;
+ Config->DebugSymtab = Debug == DebugKind::Symtab;
+ Config->MapFile = getMapFile(Args);
+ if (Config->Incremental && Args.hasArg(OPT_profile)) {
+ warn("ignoring '/incremental' due to '/profile' specification");
+ Config->Incremental = false;
+ }
+ if (Config->Incremental && Args.hasArg(OPT_order)) {
+ warn("ignoring '/incremental' due to '/order' specification");
+ Config->Incremental = false;
+ }
+ if (Config->Incremental && Config->DoGC) {
+ warn("ignoring '/incremental' because REF is enabled; use '/opt:noref' to "
+ "disable");
+ Config->Incremental = false;
+ }
+ if (Config->Incremental && Config->DoICF) {
+ warn("ignoring '/incremental' because ICF is enabled; use '/opt:noicf' to "
+ "disable");
+ Config->Incremental = false;
+ }
+ if (errorCount())
+ return;
+ std::set<sys::fs::UniqueID> WholeArchives;
+ AutoExporter Exporter;
+ for (auto *Arg : Args.filtered(OPT_wholearchive_file)) {
+ if (Optional<StringRef> Path = doFindFile(Arg->getValue())) {
+ if (Optional<sys::fs::UniqueID> ID = getUniqueID(*Path))
+ WholeArchives.insert(*ID);
+ Exporter.addWholeArchive(*Path);
+ }
+ }
+ // A predicate returning true if a given path is an argument for
+ // /wholearchive:, or /wholearchive is enabled globally.
+ // This function is a bit tricky because "foo.obj /wholearchive:././foo.obj"
+ // needs to be handled as "/wholearchive:foo.obj foo.obj".
+ auto IsWholeArchive = [&](StringRef Path) -> bool {
+ if (Args.hasArg(OPT_wholearchive_flag))
+ return true;
+ if (Optional<sys::fs::UniqueID> ID = getUniqueID(Path))
+ return WholeArchives.count(*ID);
+ return false;
+ };
+ // Create a list of input files. Files can be given as arguments
+ // for /defaultlib option.
+ for (auto *Arg : Args.filtered(OPT_INPUT, OPT_wholearchive_file))
+ if (Optional<StringRef> Path = findFile(Arg->getValue()))
+ enqueuePath(*Path, IsWholeArchive(*Path));
+ for (auto *Arg : Args.filtered(OPT_defaultlib))
+ if (Optional<StringRef> Path = findLib(Arg->getValue()))
+ enqueuePath(*Path, false);
+ // Windows specific -- Create a resource file containing a manifest file.
+ if (Config->Manifest == Configuration::Embed)
+ addBuffer(createManifestRes(), false);
+ // Read all input files given via the command line.
+ run();
+ if (errorCount())
+ return;
+ // We should have inferred a machine type by now from the input files, but if
+ // not we assume x64.
+ if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
+ warn("/machine is not specified. x64 is assumed");
+ Config->Machine = AMD64;
+ }
+ Config->Wordsize = Config->is64() ? 8 : 4;
+ // Input files can be Windows resource files (.res files). We use
+ // WindowsResource to convert resource files to a regular COFF file,
+ // then link the resulting file normally.
+ if (!Resources.empty())
+ Symtab->addFile(make<ObjFile>(convertResToCOFF(Resources)));
+ if (Tar)
+ Tar->append("response.txt",
+ createResponseFile(Args, FilePaths,
+ ArrayRef<StringRef>(SearchPaths).slice(1)));
+ // Handle /largeaddressaware
+ Config->LargeAddressAware = Args.hasFlag(
+ OPT_largeaddressaware, OPT_largeaddressaware_no, Config->is64());
+ // Handle /highentropyva
+ Config->HighEntropyVA =
+ Config->is64() &&
+ Args.hasFlag(OPT_highentropyva, OPT_highentropyva_no, true);
+ if (!Config->DynamicBase &&
+ (Config->Machine == ARMNT || Config->Machine == ARM64))
+ error("/dynamicbase:no is not compatible with " +
+ machineToStr(Config->Machine));
+ // Handle /export
+ for (auto *Arg : Args.filtered(OPT_export)) {
+ Export E = parseExport(Arg->getValue());
+ if (Config->Machine == I386) {
+ if (!isDecorated(E.Name))
+ E.Name = Saver.save("_" + E.Name);
+ if (!E.ExtName.empty() && !isDecorated(E.ExtName))
+ E.ExtName = Saver.save("_" + E.ExtName);
+ }
+ Config->Exports.push_back(E);
+ }
+ // Handle /def
+ if (auto *Arg = Args.getLastArg(OPT_deffile)) {
+ // parseModuleDefs mutates Config object.
+ parseModuleDefs(Arg->getValue());
+ }
+ // Handle generation of import library from a def file.
+ if (!Args.hasArg(OPT_INPUT)) {
+ fixupExports();
+ createImportLibrary(/*AsLib=*/true);
+ return;
+ }
+ // Windows specific -- if no /subsystem is given, we need to infer
+ // that from entry point name. Must happen before /entry handling,
+ // and after the early return when just writing an import library.
+ if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) {
+ Config->Subsystem = inferSubsystem();
+ if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN)
+ fatal("subsystem must be defined");
+ }
+ // Handle /entry and /dll
+ if (auto *Arg = Args.getLastArg(OPT_entry)) {
+ Config->Entry = addUndefined(mangle(Arg->getValue()));
+ } else if (!Config->Entry && !Config->NoEntry) {
+ if (Args.hasArg(OPT_dll)) {
+ StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12"
+ : "_DllMainCRTStartup";
+ Config->Entry = addUndefined(S);
+ } else {
+ // Windows specific -- If entry point name is not given, we need to
+ // infer that from user-defined entry name.
+ StringRef S = findDefaultEntry();
+ if (S.empty())
+ fatal("entry point must be defined");
+ Config->Entry = addUndefined(S);
+ log("Entry name inferred: " + S);
+ }
+ }
+ // Handle /delayload
+ for (auto *Arg : Args.filtered(OPT_delayload)) {
+ Config->DelayLoads.insert(StringRef(Arg->getValue()).lower());
+ if (Config->Machine == I386) {
+ Config->DelayLoadHelper = addUndefined("___delayLoadHelper2@8");
+ } else {
+ Config->DelayLoadHelper = addUndefined("__delayLoadHelper2");
+ }
+ }
+ // Set default image name if neither /out or /def set it.
+ if (Config->OutputFile.empty()) {
+ Config->OutputFile =
+ getOutputPath((*Args.filtered(OPT_INPUT).begin())->getValue());
+ }
+ if (ShouldCreatePDB) {
+ // Put the PDB next to the image if no /pdb flag was passed.
+ if (Config->PDBPath.empty()) {
+ Config->PDBPath = Config->OutputFile;
+ sys::path::replace_extension(Config->PDBPath, ".pdb");
+ }
+ // The embedded PDB path should be the absolute path to the PDB if no
+ // /pdbaltpath flag was passed.
+ if (Config->PDBAltPath.empty()) {
+ Config->PDBAltPath = Config->PDBPath;
+ // It's important to make the path absolute and remove dots. This path
+ // will eventually be written into the PE header, and certain Microsoft
+ // tools won't work correctly if these assumptions are not held.
+ sys::fs::make_absolute(Config->PDBAltPath);
+ sys::path::remove_dots(Config->PDBAltPath);
+ } else {
+ // Don't do this earlier, so that Config->OutputFile is ready.
+ parsePDBAltPath(Config->PDBAltPath);
+ }
+ }
+ // Set default image base if /base is not given.
+ if (Config->ImageBase == uint64_t(-1))
+ Config->ImageBase = getDefaultImageBase();
+ Symtab->addSynthetic(mangle("__ImageBase"), nullptr);
+ if (Config->Machine == I386) {
+ Symtab->addAbsolute("___safe_se_handler_table", 0);
+ Symtab->addAbsolute("___safe_se_handler_count", 0);
+ }
+ Symtab->addAbsolute(mangle("__guard_fids_count"), 0);
+ Symtab->addAbsolute(mangle("__guard_fids_table"), 0);
+ Symtab->addAbsolute(mangle("__guard_flags"), 0);
+ Symtab->addAbsolute(mangle("__guard_iat_count"), 0);
+ Symtab->addAbsolute(mangle("__guard_iat_table"), 0);
+ Symtab->addAbsolute(mangle("__guard_longjmp_count"), 0);
+ Symtab->addAbsolute(mangle("__guard_longjmp_table"), 0);
+ // Needed for MSVC 2017 15.5 CRT.
+ Symtab->addAbsolute(mangle("__enclave_config"), 0);
+ if (Config->MinGW) {
+ Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
+ Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
+ Symtab->addAbsolute(mangle("__CTOR_LIST__"), 0);
+ Symtab->addAbsolute(mangle("__DTOR_LIST__"), 0);
+ }
+ // This code may add new undefined symbols to the link, which may enqueue more
+ // symbol resolution tasks, so we need to continue executing tasks until we
+ // converge.
+ do {
+ // Windows specific -- if entry point is not found,
+ // search for its mangled names.
+ if (Config->Entry)
+ Symtab->mangleMaybe(Config->Entry);
+ // Windows specific -- Make sure we resolve all dllexported symbols.
+ for (Export &E : Config->Exports) {
+ if (!E.ForwardTo.empty())
+ continue;
+ E.Sym = addUndefined(E.Name);
+ if (!E.Directives)
+ Symtab->mangleMaybe(E.Sym);
+ }
+ // Add weak aliases. Weak aliases is a mechanism to give remaining
+ // undefined symbols final chance to be resolved successfully.
+ for (auto Pair : Config->AlternateNames) {
+ StringRef From = Pair.first;
+ StringRef To = Pair.second;
+ Symbol *Sym = Symtab->find(From);
+ if (!Sym)
+ continue;
+ if (auto *U = dyn_cast<Undefined>(Sym))
+ if (!U->WeakAlias)
+ U->WeakAlias = Symtab->addUndefined(To);
+ }
+ // Windows specific -- if __load_config_used can be resolved, resolve it.
+ if (Symtab->findUnderscore("_load_config_used"))
+ addUndefined(mangle("_load_config_used"));
+ } while (run());
+ if (errorCount())
+ return;
+ // Do LTO by compiling bitcode input files to a set of native COFF files then
+ // link those files.
+ Symtab->addCombinedLTOObjects();
+ run();
+ if (Config->MinGW) {
+ // Load any further object files that might be needed for doing automatic
+ // imports.
+ //
+ // For cases with no automatically imported symbols, this iterates once
+ // over the symbol table and doesn't do anything.
+ //
+ // For the normal case with a few automatically imported symbols, this
+ // should only need to be run once, since each new object file imported
+ // is an import library and wouldn't add any new undefined references,
+ // but there's nothing stopping the __imp_ symbols from coming from a
+ // normal object file as well (although that won't be used for the
+ // actual autoimport later on). If this pass adds new undefined references,
+ // we won't iterate further to resolve them.
+ Symtab->loadMinGWAutomaticImports();
+ run();
+ }
+ // Make sure we have resolved all symbols.
+ Symtab->reportRemainingUndefines();
+ if (errorCount())
+ return;
+ // Handle /safeseh.
+ if (Args.hasFlag(OPT_safeseh, OPT_safeseh_no, false)) {
+ for (ObjFile *File : ObjFile::Instances)
+ if (!File->hasSafeSEH())
+ error("/safeseh: " + File->getName() + " is not compatible with SEH");
+ if (errorCount())
+ return;
+ }
+ // In MinGW, all symbols are automatically exported if no symbols
+ // are chosen to be exported.
+ if (Config->DLL && ((Config->MinGW && Config->Exports.empty()) ||
+ Args.hasArg(OPT_export_all_symbols))) {
+ Exporter.initSymbolExcludes();
+ Symtab->forEachSymbol([=](Symbol *S) {
+ auto *Def = dyn_cast<Defined>(S);
+ if (!Exporter.shouldExport(Def))
+ return;
+ Export E;
+ E.Name = Def->getName();
+ E.Sym = Def;
+ if (Def->getChunk() &&
+ !(Def->getChunk()->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE))
+ E.Data = true;
+ Config->Exports.push_back(E);
+ });
+ }
+ // Windows specific -- when we are creating a .dll file, we also
+ // need to create a .lib file.
+ if (!Config->Exports.empty() || Config->DLL) {
+ fixupExports();
+ createImportLibrary(/*AsLib=*/false);
+ assignExportOrdinals();
+ }
+ // Handle /output-def (MinGW specific).
+ if (auto *Arg = Args.getLastArg(OPT_output_def))
+ writeDefFile(Arg->getValue());
+ // Set extra alignment for .comm symbols
+ for (auto Pair : Config->AlignComm) {
+ StringRef Name = Pair.first;
+ uint32_t Alignment = Pair.second;
+ Symbol *Sym = Symtab->find(Name);
+ if (!Sym) {
+ warn("/aligncomm symbol " + Name + " not found");
+ continue;
+ }
+ // If the symbol isn't common, it must have been replaced with a regular
+ // symbol, which will carry its own alignment.
+ auto *DC = dyn_cast<DefinedCommon>(Sym);
+ if (!DC)
+ continue;
+ CommonChunk *C = DC->getChunk();
+ C->Alignment = std::max(C->Alignment, Alignment);
+ }
+ // Windows specific -- Create a side-by-side manifest file.
+ if (Config->Manifest == Configuration::SideBySide)
+ createSideBySideManifest();
+ // Handle /order. We want to do this at this moment because we
+ // need a complete list of comdat sections to warn on nonexistent
+ // functions.
+ if (auto *Arg = Args.getLastArg(OPT_order))
+ parseOrderFile(Arg->getValue());
+ // Identify unreferenced COMDAT sections.
+ if (Config->DoGC)
+ markLive(Symtab->getChunks());
+ // Identify identical COMDAT sections to merge them.
+ if (Config->DoICF) {
+ findKeepUniqueSections();
+ doICF(Symtab->getChunks());
+ }
+ // Write the result.
+ writeResult();
+ // Stop early so we can print the results.
+ Timer::root().stop();
+ if (Config->ShowTiming)
+ Timer::root().print();
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Driver.h b/contrib/llvm/tools/lld/COFF/Driver.h
new file mode 100644
index 000000000000..e779721ab75d
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Driver.h
@@ -0,0 +1,197 @@
+//===- Driver.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "SymbolTable.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Common/Reproduce.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/TarWriter.h"
+#include <memory>
+#include <set>
+#include <vector>
+namespace lld {
+namespace coff {
+class LinkerDriver;
+extern LinkerDriver *Driver;
+using llvm::COFF::MachineTypes;
+using llvm::COFF::WindowsSubsystem;
+using llvm::Optional;
+class COFFOptTable : public llvm::opt::OptTable {
+ COFFOptTable();
+class ArgParser {
+ // Concatenate LINK environment variable and given arguments and parse them.
+ llvm::opt::InputArgList parseLINK(std::vector<const char *> Args);
+ // Tokenizes a given string and then parses as command line options.
+ llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); }
+ // Tokenizes a given string and then parses as command line options in
+ // .drectve section. /EXPORT options are returned in second element
+ // to be processed in fastpath.
+ std::pair<llvm::opt::InputArgList, std::vector<StringRef>>
+ parseDirectives(StringRef S);
+ // Parses command line options.
+ llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args);
+ std::vector<const char *> tokenize(StringRef S);
+ COFFOptTable Table;
+class LinkerDriver {
+ void link(llvm::ArrayRef<const char *> Args);
+ // Used by the resolver to parse .drectve section contents.
+ void parseDirectives(StringRef S);
+ // Used by ArchiveFile to enqueue members.
+ void enqueueArchiveMember(const Archive::Child &C, StringRef SymName,
+ StringRef ParentName);
+ MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB);
+ std::unique_ptr<llvm::TarWriter> Tar; // for /linkrepro
+ // Opens a file. Path has to be resolved already.
+ MemoryBufferRef openFile(StringRef Path);
+ // Searches a file from search paths.
+ Optional<StringRef> findFile(StringRef Filename);
+ Optional<StringRef> findLib(StringRef Filename);
+ StringRef doFindFile(StringRef Filename);
+ StringRef doFindLib(StringRef Filename);
+ StringRef doFindLibMinGW(StringRef Filename);
+ // Parses LIB environment which contains a list of search paths.
+ void addLibSearchPaths();
+ // Library search path. The first element is always "" (current directory).
+ std::vector<StringRef> SearchPaths;
+ // We don't want to add the same file more than once.
+ // Files are uniquified by their filesystem and file number.
+ std::set<llvm::sys::fs::UniqueID> VisitedFiles;
+ std::set<std::string> VisitedLibs;
+ Symbol *addUndefined(StringRef Sym);
+ // Windows specific -- "main" is not the only main function in Windows.
+ // You can choose one from these four -- {w,}{WinMain,main}.
+ // There are four different entry point functions for them,
+ // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to
+ // choose the right one depending on which "main" function is defined.
+ // This function looks up the symbol table and resolve corresponding
+ // entry point name.
+ StringRef findDefaultEntry();
+ WindowsSubsystem inferSubsystem();
+ void addBuffer(std::unique_ptr<MemoryBuffer> MB, bool WholeArchive);
+ void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
+ StringRef ParentName);
+ void enqueuePath(StringRef Path, bool WholeArchive);
+ void enqueueTask(std::function<void()> Task);
+ bool run();
+ std::list<std::function<void()>> TaskQueue;
+ std::vector<StringRef> FilePaths;
+ std::vector<MemoryBufferRef> Resources;
+ llvm::StringSet<> DirectivesExports;
+// Functions below this line are defined in DriverUtils.cpp.
+void printHelp(const char *Argv0);
+// For /machine option.
+MachineTypes getMachineType(StringRef Arg);
+StringRef machineToStr(MachineTypes MT);
+// Parses a string in the form of "<integer>[,<integer>]".
+void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size = nullptr);
+void parseGuard(StringRef Arg);
+// Parses a string in the form of "<integer>[.<integer>]".
+// Minor's default value is 0.
+void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor);
+// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
+void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
+ uint32_t *Minor);
+void parseAlternateName(StringRef);
+void parseMerge(StringRef);
+void parseSection(StringRef);
+void parseAligncomm(StringRef);
+// Parses a string in the form of "EMBED[,=<integer>]|NO".
+void parseManifest(StringRef Arg);
+// Parses a string in the form of "level=<string>|uiAccess=<string>"
+void parseManifestUAC(StringRef Arg);
+// Create a resource file containing a manifest XML.
+std::unique_ptr<MemoryBuffer> createManifestRes();
+void createSideBySideManifest();
+// Used for dllexported symbols.
+Export parseExport(StringRef Arg);
+void fixupExports();
+void assignExportOrdinals();
+// Parses a string in the form of "key=value" and check
+// if value matches previous values for the key.
+// This feature used in the directive section to reject
+// incompatible objects.
+void checkFailIfMismatch(StringRef Arg);
+// Convert Windows resource files (.res files) to a .obj file.
+MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs);
+void runMSVCLinker(std::string Rsp, ArrayRef<StringRef> Objects);
+// Create enum with OPT_xxx values for each option in Options.td
+enum {
+#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
+#include "Options.inc"
+#undef OPTION
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp
new file mode 100644
index 000000000000..3a11895497a4
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp
@@ -0,0 +1,872 @@
+//===- DriverUtils.cpp ----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains utility functions for the driver. Because there
+// are so many small functions, we created this separate file to make
+// Driver.cpp less cluttered.
+#include "Config.h"
+#include "Driver.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/WindowsResource.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/WindowsManifest/WindowsManifestMerger.h"
+#include <memory>
+using namespace llvm::COFF;
+using namespace llvm;
+using llvm::sys::Process;
+namespace lld {
+namespace coff {
+namespace {
+const uint16_t SUBLANG_ENGLISH_US = 0x0409;
+const uint16_t RT_MANIFEST = 24;
+class Executor {
+ explicit Executor(StringRef S) : Prog(Saver.save(S)) {}
+ void add(StringRef S) { Args.push_back(Saver.save(S)); }
+ void add(std::string &S) { Args.push_back(Saver.save(S)); }
+ void add(Twine S) { Args.push_back(Saver.save(S)); }
+ void add(const char *S) { Args.push_back(Saver.save(S)); }
+ void run() {
+ ErrorOr<std::string> ExeOrErr = sys::findProgramByName(Prog);
+ if (auto EC = ExeOrErr.getError())
+ fatal("unable to find " + Prog + " in PATH: " + EC.message());
+ StringRef Exe = Saver.save(*ExeOrErr);
+ Args.insert(Args.begin(), Exe);
+ if (sys::ExecuteAndWait(Args[0], Args) != 0)
+ fatal("ExecuteAndWait failed: " +
+ llvm::join(Args.begin(), Args.end(), " "));
+ }
+ StringRef Prog;
+ std::vector<StringRef> Args;
+} // anonymous namespace
+// Returns /machine's value.
+MachineTypes getMachineType(StringRef S) {
+ MachineTypes MT = StringSwitch<MachineTypes>(S.lower())
+ .Cases("x64", "amd64", AMD64)
+ .Cases("x86", "i386", I386)
+ .Case("arm", ARMNT)
+ .Case("arm64", ARM64)
+ return MT;
+ fatal("unknown /machine argument: " + S);
+StringRef machineToStr(MachineTypes MT) {
+ switch (MT) {
+ case ARMNT:
+ return "arm";
+ case ARM64:
+ return "arm64";
+ case AMD64:
+ return "x64";
+ case I386:
+ return "x86";
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+// Parses a string in the form of "<integer>[,<integer>]".
+void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size) {
+ StringRef S1, S2;
+ std::tie(S1, S2) = Arg.split(',');
+ if (S1.getAsInteger(0, *Addr))
+ fatal("invalid number: " + S1);
+ if (Size && !S2.empty() && S2.getAsInteger(0, *Size))
+ fatal("invalid number: " + S2);
+// Parses a string in the form of "<integer>[.<integer>]".
+// If second number is not present, Minor is set to 0.
+void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) {
+ StringRef S1, S2;
+ std::tie(S1, S2) = Arg.split('.');
+ if (S1.getAsInteger(0, *Major))
+ fatal("invalid number: " + S1);
+ *Minor = 0;
+ if (!S2.empty() && S2.getAsInteger(0, *Minor))
+ fatal("invalid number: " + S2);
+void parseGuard(StringRef FullArg) {
+ SmallVector<StringRef, 1> SplitArgs;
+ FullArg.split(SplitArgs, ",");
+ for (StringRef Arg : SplitArgs) {
+ if (Arg.equals_lower("no"))
+ Config->GuardCF = GuardCFLevel::Off;
+ else if (Arg.equals_lower("nolongjmp"))
+ Config->GuardCF = GuardCFLevel::NoLongJmp;
+ else if (Arg.equals_lower("cf") || Arg.equals_lower("longjmp"))
+ Config->GuardCF = GuardCFLevel::Full;
+ else
+ fatal("invalid argument to /guard: " + Arg);
+ }
+// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
+void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
+ uint32_t *Minor) {
+ StringRef SysStr, Ver;
+ std::tie(SysStr, Ver) = Arg.split(',');
+ *Sys = StringSwitch<WindowsSubsystem>(SysStr.lower())
+ .Case("efi_application", IMAGE_SUBSYSTEM_EFI_APPLICATION)
+ .Case("efi_boot_service_driver", IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER)
+ .Case("efi_rom", IMAGE_SUBSYSTEM_EFI_ROM)
+ .Case("efi_runtime_driver", IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER)
+ fatal("unknown subsystem: " + SysStr);
+ if (!Ver.empty())
+ parseVersion(Ver, Major, Minor);
+// Parse a string of the form of "<from>=<to>".
+// Results are directly written to Config.
+void parseAlternateName(StringRef S) {
+ StringRef From, To;
+ std::tie(From, To) = S.split('=');
+ if (From.empty() || To.empty())
+ fatal("/alternatename: invalid argument: " + S);
+ auto It = Config->AlternateNames.find(From);
+ if (It != Config->AlternateNames.end() && It->second != To)
+ fatal("/alternatename: conflicts: " + S);
+ Config->AlternateNames.insert(It, std::make_pair(From, To));
+// Parse a string of the form of "<from>=<to>".
+// Results are directly written to Config.
+void parseMerge(StringRef S) {
+ StringRef From, To;
+ std::tie(From, To) = S.split('=');
+ if (From.empty() || To.empty())
+ fatal("/merge: invalid argument: " + S);
+ if (From == ".rsrc" || To == ".rsrc")
+ fatal("/merge: cannot merge '.rsrc' with any section");
+ if (From == ".reloc" || To == ".reloc")
+ fatal("/merge: cannot merge '.reloc' with any section");
+ auto Pair = Config->Merge.insert(std::make_pair(From, To));
+ bool Inserted = Pair.second;
+ if (!Inserted) {
+ StringRef Existing = Pair.first->second;
+ if (Existing != To)
+ warn(S + ": already merged into " + Existing);
+ }
+static uint32_t parseSectionAttributes(StringRef S) {
+ uint32_t Ret = 0;
+ for (char C : S.lower()) {
+ switch (C) {
+ case 'd':
+ break;
+ case 'e':
+ break;
+ case 'k':
+ break;
+ case 'p':
+ break;
+ case 'r':
+ break;
+ case 's':
+ break;
+ case 'w':
+ break;
+ default:
+ fatal("/section: invalid argument: " + S);
+ }
+ }
+ return Ret;
+// Parses /section option argument.
+void parseSection(StringRef S) {
+ StringRef Name, Attrs;
+ std::tie(Name, Attrs) = S.split(',');
+ if (Name.empty() || Attrs.empty())
+ fatal("/section: invalid argument: " + S);
+ Config->Section[Name] = parseSectionAttributes(Attrs);
+// Parses /aligncomm option argument.
+void parseAligncomm(StringRef S) {
+ StringRef Name, Align;
+ std::tie(Name, Align) = S.split(',');
+ if (Name.empty() || Align.empty()) {
+ error("/aligncomm: invalid argument: " + S);
+ return;
+ }
+ int V;
+ if (Align.getAsInteger(0, V)) {
+ error("/aligncomm: invalid argument: " + S);
+ return;
+ }
+ Config->AlignComm[Name] = std::max(Config->AlignComm[Name], 1 << V);
+// Parses a string in the form of "EMBED[,=<integer>]|NO".
+// Results are directly written to Config.
+void parseManifest(StringRef Arg) {
+ if (Arg.equals_lower("no")) {
+ Config->Manifest = Configuration::No;
+ return;
+ }
+ if (!Arg.startswith_lower("embed"))
+ fatal("invalid option " + Arg);
+ Config->Manifest = Configuration::Embed;
+ Arg = Arg.substr(strlen("embed"));
+ if (Arg.empty())
+ return;
+ if (!Arg.startswith_lower(",id="))
+ fatal("invalid option " + Arg);
+ Arg = Arg.substr(strlen(",id="));
+ if (Arg.getAsInteger(0, Config->ManifestID))
+ fatal("invalid option " + Arg);
+// Parses a string in the form of "level=<string>|uiAccess=<string>|NO".
+// Results are directly written to Config.
+void parseManifestUAC(StringRef Arg) {
+ if (Arg.equals_lower("no")) {
+ Config->ManifestUAC = false;
+ return;
+ }
+ for (;;) {
+ Arg = Arg.ltrim();
+ if (Arg.empty())
+ return;
+ if (Arg.startswith_lower("level=")) {
+ Arg = Arg.substr(strlen("level="));
+ std::tie(Config->ManifestLevel, Arg) = Arg.split(" ");
+ continue;
+ }
+ if (Arg.startswith_lower("uiaccess=")) {
+ Arg = Arg.substr(strlen("uiaccess="));
+ std::tie(Config->ManifestUIAccess, Arg) = Arg.split(" ");
+ continue;
+ }
+ fatal("invalid option " + Arg);
+ }
+// An RAII temporary file class that automatically removes a temporary file.
+namespace {
+class TemporaryFile {
+ TemporaryFile(StringRef Prefix, StringRef Extn, StringRef Contents = "") {
+ SmallString<128> S;
+ if (auto EC = sys::fs::createTemporaryFile("lld-" + Prefix, Extn, S))
+ fatal("cannot create a temporary file: " + EC.message());
+ Path = S.str();
+ if (!Contents.empty()) {
+ std::error_code EC;
+ raw_fd_ostream OS(Path, EC, sys::fs::F_None);
+ if (EC)
+ fatal("failed to open " + Path + ": " + EC.message());
+ OS << Contents;
+ }
+ }
+ TemporaryFile(TemporaryFile &&Obj) {
+ std::swap(Path, Obj.Path);
+ }
+ ~TemporaryFile() {
+ if (Path.empty())
+ return;
+ if (sys::fs::remove(Path))
+ fatal("failed to remove " + Path);
+ }
+ // Returns a memory buffer of this temporary file.
+ // Note that this function does not leave the file open,
+ // so it is safe to remove the file immediately after this function
+ // is called (you cannot remove an opened file on Windows.)
+ std::unique_ptr<MemoryBuffer> getMemoryBuffer() {
+ // IsVolatileSize=true forces MemoryBuffer to not use mmap().
+ return CHECK(MemoryBuffer::getFile(Path, /*FileSize=*/-1,
+ /*RequiresNullTerminator=*/false,
+ /*IsVolatileSize=*/true),
+ "could not open " + Path);
+ }
+ std::string Path;
+static std::string createDefaultXml() {
+ std::string Ret;
+ raw_string_ostream OS(Ret);
+ // Emit the XML. Note that we do *not* verify that the XML attributes are
+ // syntactically correct. This is intentional for link.exe compatibility.
+ OS << "<?xml version=\"1.0\" standalone=\"yes\"?>\n"
+ << "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\"\n"
+ << " manifestVersion=\"1.0\">\n";
+ if (Config->ManifestUAC) {
+ OS << " <trustInfo>\n"
+ << " <security>\n"
+ << " <requestedPrivileges>\n"
+ << " <requestedExecutionLevel level=" << Config->ManifestLevel
+ << " uiAccess=" << Config->ManifestUIAccess << "/>\n"
+ << " </requestedPrivileges>\n"
+ << " </security>\n"
+ << " </trustInfo>\n";
+ }
+ if (!Config->ManifestDependency.empty()) {
+ OS << " <dependency>\n"
+ << " <dependentAssembly>\n"
+ << " <assemblyIdentity " << Config->ManifestDependency << " />\n"
+ << " </dependentAssembly>\n"
+ << " </dependency>\n";
+ }
+ OS << "</assembly>\n";
+ return OS.str();
+static std::string createManifestXmlWithInternalMt(StringRef DefaultXml) {
+ std::unique_ptr<MemoryBuffer> DefaultXmlCopy =
+ MemoryBuffer::getMemBufferCopy(DefaultXml);
+ windows_manifest::WindowsManifestMerger Merger;
+ if (auto E = Merger.merge(*DefaultXmlCopy.get()))
+ fatal("internal manifest tool failed on default xml: " +
+ toString(std::move(E)));
+ for (StringRef Filename : Config->ManifestInput) {
+ std::unique_ptr<MemoryBuffer> Manifest =
+ check(MemoryBuffer::getFile(Filename));
+ if (auto E = Merger.merge(*Manifest.get()))
+ fatal("internal manifest tool failed on file " + Filename + ": " +
+ toString(std::move(E)));
+ }
+ return Merger.getMergedManifest().get()->getBuffer();
+static std::string createManifestXmlWithExternalMt(StringRef DefaultXml) {
+ // Create the default manifest file as a temporary file.
+ TemporaryFile Default("defaultxml", "manifest");
+ std::error_code EC;
+ raw_fd_ostream OS(Default.Path, EC, sys::fs::F_Text);
+ if (EC)
+ fatal("failed to open " + Default.Path + ": " + EC.message());
+ OS << DefaultXml;
+ OS.close();
+ // Merge user-supplied manifests if they are given. Since libxml2 is not
+ // enabled, we must shell out to Microsoft's mt.exe tool.
+ TemporaryFile User("user", "manifest");
+ Executor E("mt.exe");
+ E.add("/manifest");
+ E.add(Default.Path);
+ for (StringRef Filename : Config->ManifestInput) {
+ E.add("/manifest");
+ E.add(Filename);
+ }
+ E.add("/nologo");
+ E.add("/out:" + StringRef(User.Path));
+ E.run();
+ return CHECK(MemoryBuffer::getFile(User.Path), "could not open " + User.Path)
+ .get()
+ ->getBuffer();
+static std::string createManifestXml() {
+ std::string DefaultXml = createDefaultXml();
+ if (Config->ManifestInput.empty())
+ return DefaultXml;
+ if (windows_manifest::isAvailable())
+ return createManifestXmlWithInternalMt(DefaultXml);
+ return createManifestXmlWithExternalMt(DefaultXml);
+static std::unique_ptr<WritableMemoryBuffer>
+createMemoryBufferForManifestRes(size_t ManifestSize) {
+ size_t ResSize = alignTo(
+ sizeof(object::WinResHeaderPrefix) + sizeof(object::WinResIDs) +
+ sizeof(object::WinResHeaderSuffix) + ManifestSize,
+ return WritableMemoryBuffer::getNewMemBuffer(ResSize, Config->OutputFile +
+ ".manifest.res");
+static void writeResFileHeader(char *&Buf) {
+ memcpy(Buf, COFF::WinResMagic, sizeof(COFF::WinResMagic));
+ Buf += sizeof(COFF::WinResMagic);
+ memset(Buf, 0, object::WIN_RES_NULL_ENTRY_SIZE);
+ Buf += object::WIN_RES_NULL_ENTRY_SIZE;
+static void writeResEntryHeader(char *&Buf, size_t ManifestSize) {
+ // Write the prefix.
+ auto *Prefix = reinterpret_cast<object::WinResHeaderPrefix *>(Buf);
+ Prefix->DataSize = ManifestSize;
+ Prefix->HeaderSize = sizeof(object::WinResHeaderPrefix) +
+ sizeof(object::WinResIDs) +
+ sizeof(object::WinResHeaderSuffix);
+ Buf += sizeof(object::WinResHeaderPrefix);
+ // Write the Type/Name IDs.
+ auto *IDs = reinterpret_cast<object::WinResIDs *>(Buf);
+ IDs->setType(RT_MANIFEST);
+ IDs->setName(Config->ManifestID);
+ Buf += sizeof(object::WinResIDs);
+ // Write the suffix.
+ auto *Suffix = reinterpret_cast<object::WinResHeaderSuffix *>(Buf);
+ Suffix->DataVersion = 0;
+ Suffix->MemoryFlags = object::WIN_RES_PURE_MOVEABLE;
+ Suffix->Language = SUBLANG_ENGLISH_US;
+ Suffix->Version = 0;
+ Suffix->Characteristics = 0;
+ Buf += sizeof(object::WinResHeaderSuffix);
+// Create a resource file containing a manifest XML.
+std::unique_ptr<MemoryBuffer> createManifestRes() {
+ std::string Manifest = createManifestXml();
+ std::unique_ptr<WritableMemoryBuffer> Res =
+ createMemoryBufferForManifestRes(Manifest.size());
+ char *Buf = Res->getBufferStart();
+ writeResFileHeader(Buf);
+ writeResEntryHeader(Buf, Manifest.size());
+ // Copy the manifest data into the .res file.
+ std::copy(Manifest.begin(), Manifest.end(), Buf);
+ return std::move(Res);
+void createSideBySideManifest() {
+ std::string Path = Config->ManifestFile;
+ if (Path == "")
+ Path = Config->OutputFile + ".manifest";
+ std::error_code EC;
+ raw_fd_ostream Out(Path, EC, sys::fs::F_Text);
+ if (EC)
+ fatal("failed to create manifest: " + EC.message());
+ Out << createManifestXml();
+// Parse a string in the form of
+// "<name>[=<internalname>][,@ordinal[,NONAME]][,DATA][,PRIVATE]"
+// or "<name>=<dllname>.<name>".
+// Used for parsing /export arguments.
+Export parseExport(StringRef Arg) {
+ Export E;
+ StringRef Rest;
+ std::tie(E.Name, Rest) = Arg.split(",");
+ if (E.Name.empty())
+ goto err;
+ if (E.Name.contains('=')) {
+ StringRef X, Y;
+ std::tie(X, Y) = E.Name.split("=");
+ // If "<name>=<dllname>.<name>".
+ if (Y.contains(".")) {
+ E.Name = X;
+ E.ForwardTo = Y;
+ return E;
+ }
+ E.ExtName = X;
+ E.Name = Y;
+ if (E.Name.empty())
+ goto err;
+ }
+ // If "<name>=<internalname>[,@ordinal[,NONAME]][,DATA][,PRIVATE]"
+ while (!Rest.empty()) {
+ StringRef Tok;
+ std::tie(Tok, Rest) = Rest.split(",");
+ if (Tok.equals_lower("noname")) {
+ if (E.Ordinal == 0)
+ goto err;
+ E.Noname = true;
+ continue;
+ }
+ if (Tok.equals_lower("data")) {
+ E.Data = true;
+ continue;
+ }
+ if (Tok.equals_lower("constant")) {
+ E.Constant = true;
+ continue;
+ }
+ if (Tok.equals_lower("private")) {
+ E.Private = true;
+ continue;
+ }
+ if (Tok.startswith("@")) {
+ int32_t Ord;
+ if (Tok.substr(1).getAsInteger(0, Ord))
+ goto err;
+ if (Ord <= 0 || 65535 < Ord)
+ goto err;
+ E.Ordinal = Ord;
+ continue;
+ }
+ goto err;
+ }
+ return E;
+ fatal("invalid /export: " + Arg);
+static StringRef undecorate(StringRef Sym) {
+ if (Config->Machine != I386)
+ return Sym;
+ // In MSVC mode, a fully decorated stdcall function is exported
+ // as-is with the leading underscore (with type IMPORT_NAME).
+ // In MinGW mode, a decorated stdcall function gets the underscore
+ // removed, just like normal cdecl functions.
+ if (Sym.startswith("_") && Sym.contains('@') && !Config->MinGW)
+ return Sym;
+ return Sym.startswith("_") ? Sym.substr(1) : Sym;
+// Convert stdcall/fastcall style symbols into unsuffixed symbols,
+// with or without a leading underscore. (MinGW specific.)
+static StringRef killAt(StringRef Sym, bool Prefix) {
+ if (Sym.empty())
+ return Sym;
+ // Strip any trailing stdcall suffix
+ Sym = Sym.substr(0, Sym.find('@', 1));
+ if (!Sym.startswith("@")) {
+ if (Prefix && !Sym.startswith("_"))
+ return Saver.save("_" + Sym);
+ return Sym;
+ }
+ // For fastcall, remove the leading @ and replace it with an
+ // underscore, if prefixes are used.
+ Sym = Sym.substr(1);
+ if (Prefix)
+ Sym = Saver.save("_" + Sym);
+ return Sym;
+// Performs error checking on all /export arguments.
+// It also sets ordinals.
+void fixupExports() {
+ // Symbol ordinals must be unique.
+ std::set<uint16_t> Ords;
+ for (Export &E : Config->Exports) {
+ if (E.Ordinal == 0)
+ continue;
+ if (!Ords.insert(E.Ordinal).second)
+ fatal("duplicate export ordinal: " + E.Name);
+ }
+ for (Export &E : Config->Exports) {
+ Symbol *Sym = E.Sym;
+ if (!E.ForwardTo.empty() || !Sym) {
+ E.SymbolName = E.Name;
+ } else {
+ if (auto *U = dyn_cast<Undefined>(Sym))
+ if (U->WeakAlias)
+ Sym = U->WeakAlias;
+ E.SymbolName = Sym->getName();
+ }
+ }
+ for (Export &E : Config->Exports) {
+ if (!E.ForwardTo.empty()) {
+ E.ExportName = undecorate(E.Name);
+ } else {
+ E.ExportName = undecorate(E.ExtName.empty() ? E.Name : E.ExtName);
+ }
+ }
+ if (Config->KillAt && Config->Machine == I386) {
+ for (Export &E : Config->Exports) {
+ E.Name = killAt(E.Name, true);
+ E.ExportName = killAt(E.ExportName, false);
+ E.ExtName = killAt(E.ExtName, true);
+ E.SymbolName = killAt(E.SymbolName, true);
+ }
+ }
+ // Uniquefy by name.
+ DenseMap<StringRef, Export *> Map(Config->Exports.size());
+ std::vector<Export> V;
+ for (Export &E : Config->Exports) {
+ auto Pair = Map.insert(std::make_pair(E.ExportName, &E));
+ bool Inserted = Pair.second;
+ if (Inserted) {
+ V.push_back(E);
+ continue;
+ }
+ Export *Existing = Pair.first->second;
+ if (E == *Existing || E.Name != Existing->Name)
+ continue;
+ warn("duplicate /export option: " + E.Name);
+ }
+ Config->Exports = std::move(V);
+ // Sort by name.
+ std::sort(Config->Exports.begin(), Config->Exports.end(),
+ [](const Export &A, const Export &B) {
+ return A.ExportName < B.ExportName;
+ });
+void assignExportOrdinals() {
+ // Assign unique ordinals if default (= 0).
+ uint16_t Max = 0;
+ for (Export &E : Config->Exports)
+ Max = std::max(Max, E.Ordinal);
+ for (Export &E : Config->Exports)
+ if (E.Ordinal == 0)
+ E.Ordinal = ++Max;
+// Parses a string in the form of "key=value" and check
+// if value matches previous values for the same key.
+void checkFailIfMismatch(StringRef Arg) {
+ StringRef K, V;
+ std::tie(K, V) = Arg.split('=');
+ if (K.empty() || V.empty())
+ fatal("/failifmismatch: invalid argument: " + Arg);
+ StringRef Existing = Config->MustMatch[K];
+ if (!Existing.empty() && V != Existing)
+ fatal("/failifmismatch: mismatch detected: " + Existing + " and " + V +
+ " for key " + K);
+ Config->MustMatch[K] = V;
+// Convert Windows resource files (.res files) to a .obj file.
+MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs) {
+ object::WindowsResourceParser Parser;
+ for (MemoryBufferRef MB : MBs) {
+ std::unique_ptr<object::Binary> Bin = check(object::createBinary(MB));
+ object::WindowsResource *RF = dyn_cast<object::WindowsResource>(Bin.get());
+ if (!RF)
+ fatal("cannot compile non-resource file as resource");
+ if (auto EC = Parser.parse(RF))
+ fatal("failed to parse .res file: " + toString(std::move(EC)));
+ }
+ Expected<std::unique_ptr<MemoryBuffer>> E =
+ llvm::object::writeWindowsResourceCOFF(Config->Machine, Parser);
+ if (!E)
+ fatal("failed to write .res to COFF: " + toString(E.takeError()));
+ MemoryBufferRef MBRef = **E;
+ make<std::unique_ptr<MemoryBuffer>>(std::move(*E)); // take ownership
+ return MBRef;
+// Create OptTable
+// Create prefix string literals used in Options.td
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Options.inc"
+#undef PREFIX
+// Create table mapping all options defined in Options.td
+static const llvm::opt::OptTable::Info InfoTable[] = {
+#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
+ {X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
+ X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
+#include "Options.inc"
+#undef OPTION
+COFFOptTable::COFFOptTable() : OptTable(InfoTable, true) {}
+// Set color diagnostics according to --color-diagnostics={auto,always,never}
+// or --no-color-diagnostics flags.
+static void handleColorDiagnostics(opt::InputArgList &Args) {
+ auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq,
+ OPT_no_color_diagnostics);
+ if (!Arg)
+ return;
+ if (Arg->getOption().getID() == OPT_color_diagnostics) {
+ errorHandler().ColorDiagnostics = true;
+ } else if (Arg->getOption().getID() == OPT_no_color_diagnostics) {
+ errorHandler().ColorDiagnostics = false;
+ } else {
+ StringRef S = Arg->getValue();
+ if (S == "always")
+ errorHandler().ColorDiagnostics = true;
+ else if (S == "never")
+ errorHandler().ColorDiagnostics = false;
+ else if (S != "auto")
+ error("unknown option: --color-diagnostics=" + S);
+ }
+static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) {
+ if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) {
+ StringRef S = Arg->getValue();
+ if (S != "windows" && S != "posix")
+ error("invalid response file quoting: " + S);
+ if (S == "windows")
+ return cl::TokenizeWindowsCommandLine;
+ return cl::TokenizeGNUCommandLine;
+ }
+ // The COFF linker always defaults to Windows quoting.
+ return cl::TokenizeWindowsCommandLine;
+// Parses a given list of options.
+opt::InputArgList ArgParser::parse(ArrayRef<const char *> Argv) {
+ // Make InputArgList from string vectors.
+ unsigned MissingIndex;
+ unsigned MissingCount;
+ // We need to get the quoting style for response files before parsing all
+ // options so we parse here before and ignore all the options but
+ // --rsp-quoting.
+ opt::InputArgList Args = Table.ParseArgs(Argv, MissingIndex, MissingCount);
+ // Expand response files (arguments in the form of @<filename>)
+ // and then parse the argument again.
+ SmallVector<const char *, 256> ExpandedArgv(Argv.data(), Argv.data() + Argv.size());
+ cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), ExpandedArgv);
+ Args = Table.ParseArgs(makeArrayRef(ExpandedArgv).drop_front(), MissingIndex,
+ MissingCount);
+ // Print the real command line if response files are expanded.
+ if (Args.hasArg(OPT_verbose) && Argv.size() != ExpandedArgv.size()) {
+ std::string Msg = "Command line:";
+ for (const char *S : ExpandedArgv)
+ Msg += " " + std::string(S);
+ message(Msg);
+ }
+ // Save the command line after response file expansion so we can write it to
+ // the PDB if necessary.
+ Config->Argv = {ExpandedArgv.begin(), ExpandedArgv.end()};
+ // Handle /WX early since it converts missing argument warnings to errors.
+ errorHandler().FatalWarnings = Args.hasFlag(OPT_WX, OPT_WX_no, false);
+ if (MissingCount)
+ fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument");
+ handleColorDiagnostics(Args);
+ for (auto *Arg : Args.filtered(OPT_UNKNOWN))
+ warn("ignoring unknown argument: " + Arg->getSpelling());
+ if (Args.hasArg(OPT_lib))
+ warn("ignoring /lib since it's not the first argument");
+ return Args;
+// Tokenizes and parses a given string as command line in .drective section.
+// /EXPORT options are processed in fastpath.
+std::pair<opt::InputArgList, std::vector<StringRef>>
+ArgParser::parseDirectives(StringRef S) {
+ std::vector<StringRef> Exports;
+ SmallVector<const char *, 16> Rest;
+ for (StringRef Tok : tokenize(S)) {
+ if (Tok.startswith_lower("/export:") || Tok.startswith_lower("-export:"))
+ Exports.push_back(Tok.substr(strlen("/export:")));
+ else
+ Rest.push_back(Tok.data());
+ }
+ // Make InputArgList from unparsed string vectors.
+ unsigned MissingIndex;
+ unsigned MissingCount;
+ opt::InputArgList Args = Table.ParseArgs(Rest, MissingIndex, MissingCount);
+ if (MissingCount)
+ fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument");
+ for (auto *Arg : Args.filtered(OPT_UNKNOWN))
+ warn("ignoring unknown argument: " + Arg->getSpelling());
+ return {std::move(Args), std::move(Exports)};
+// link.exe has an interesting feature. If LINK or _LINK_ environment
+// variables exist, their contents are handled as command line strings.
+// So you can pass extra arguments using them.
+opt::InputArgList ArgParser::parseLINK(std::vector<const char *> Argv) {
+ // Concatenate LINK env and command line arguments, and then parse them.
+ if (Optional<std::string> S = Process::GetEnv("LINK")) {
+ std::vector<const char *> V = tokenize(*S);
+ Argv.insert(std::next(Argv.begin()), V.begin(), V.end());
+ }
+ if (Optional<std::string> S = Process::GetEnv("_LINK_")) {
+ std::vector<const char *> V = tokenize(*S);
+ Argv.insert(std::next(Argv.begin()), V.begin(), V.end());
+ }
+ return parse(Argv);
+std::vector<const char *> ArgParser::tokenize(StringRef S) {
+ SmallVector<const char *, 16> Tokens;
+ cl::TokenizeWindowsCommandLine(S, Saver, Tokens);
+ return std::vector<const char *>(Tokens.begin(), Tokens.end());
+void printHelp(const char *Argv0) {
+ COFFOptTable().PrintHelp(outs(),
+ (std::string(Argv0) + " [options] file...").c_str(),
+ "LLVM Linker", false);
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/ICF.cpp b/contrib/llvm/tools/lld/COFF/ICF.cpp
new file mode 100644
index 000000000000..f6904eb7d24f
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/ICF.cpp
@@ -0,0 +1,318 @@
+//===- ICF.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// ICF is short for Identical Code Folding. That is a size optimization to
+// identify and merge two or more read-only sections (typically functions)
+// that happened to have the same contents. It usually reduces output size
+// by a few percent.
+// On Windows, ICF is enabled by default.
+// See ELF/ICF.cpp for the details about the algortihm.
+#include "ICF.h"
+#include "Chunks.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Threads.h"
+#include "lld/Common/Timer.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
+#include <algorithm>
+#include <atomic>
+#include <vector>
+using namespace llvm;
+namespace lld {
+namespace coff {
+static Timer ICFTimer("ICF", Timer::root());
+class ICF {
+ void run(ArrayRef<Chunk *> V);
+ void segregate(size_t Begin, size_t End, bool Constant);
+ bool assocEquals(const SectionChunk *A, const SectionChunk *B);
+ bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
+ bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
+ uint32_t getHash(SectionChunk *C);
+ bool isEligible(SectionChunk *C);
+ size_t findBoundary(size_t Begin, size_t End);
+ void forEachClassRange(size_t Begin, size_t End,
+ std::function<void(size_t, size_t)> Fn);
+ void forEachClass(std::function<void(size_t, size_t)> Fn);
+ std::vector<SectionChunk *> Chunks;
+ int Cnt = 0;
+ std::atomic<bool> Repeat = {false};
+// Returns true if section S is subject of ICF.
+// Microsoft's documentation
+// (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April
+// 2017) says that /opt:icf folds both functions and read-only data.
+// Despite that, the MSVC linker folds only functions. We found
+// a few instances of programs that are not safe for data merging.
+// Therefore, we merge only functions just like the MSVC tool. However, we also
+// merge read-only sections in a couple of cases where the address of the
+// section is insignificant to the user program and the behaviour matches that
+// of the Visual C++ linker.
+bool ICF::isEligible(SectionChunk *C) {
+ // Non-comdat chunks, dead chunks, and writable chunks are not elegible.
+ bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
+ if (!C->isCOMDAT() || !C->Live || Writable)
+ return false;
+ // Code sections are eligible.
+ if (C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
+ return true;
+ // .pdata and .xdata unwind info sections are eligible.
+ StringRef OutSecName = C->getSectionName().split('$').first;
+ if (OutSecName == ".pdata" || OutSecName == ".xdata")
+ return true;
+ // So are vtables.
+ if (C->Sym && C->Sym->getName().startswith("??_7"))
+ return true;
+ // Anything else not in an address-significance table is eligible.
+ return !C->KeepUnique;
+// Split an equivalence class into smaller classes.
+void ICF::segregate(size_t Begin, size_t End, bool Constant) {
+ while (Begin < End) {
+ // Divide [Begin, End) into two. Let Mid be the start index of the
+ // second group.
+ auto Bound = std::stable_partition(
+ Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) {
+ if (Constant)
+ return equalsConstant(Chunks[Begin], S);
+ return equalsVariable(Chunks[Begin], S);
+ });
+ size_t Mid = Bound - Chunks.begin();
+ // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an
+ // equivalence class ID because every group ends with a unique index.
+ for (size_t I = Begin; I < Mid; ++I)
+ Chunks[I]->Class[(Cnt + 1) % 2] = Mid;
+ // If we created a group, we need to iterate the main loop again.
+ if (Mid != End)
+ Repeat = true;
+ Begin = Mid;
+ }
+// Returns true if two sections' associative children are equal.
+bool ICF::assocEquals(const SectionChunk *A, const SectionChunk *B) {
+ auto ChildClasses = [&](const SectionChunk *SC) {
+ std::vector<uint32_t> Classes;
+ for (const SectionChunk *C : SC->children())
+ if (!C->SectionName.startswith(".debug") &&
+ C->SectionName != ".gfids$y" && C->SectionName != ".gljmp$y")
+ Classes.push_back(C->Class[Cnt % 2]);
+ return Classes;
+ };
+ return ChildClasses(A) == ChildClasses(B);
+// Compare "non-moving" part of two sections, namely everything
+// except relocation targets.
+bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
+ if (A->Relocs.size() != B->Relocs.size())
+ return false;
+ // Compare relocations.
+ auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
+ if (R1.Type != R2.Type ||
+ R1.VirtualAddress != R2.VirtualAddress) {
+ return false;
+ }
+ Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
+ Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
+ if (B1 == B2)
+ return true;
+ if (auto *D1 = dyn_cast<DefinedRegular>(B1))
+ if (auto *D2 = dyn_cast<DefinedRegular>(B2))
+ return D1->getValue() == D2->getValue() &&
+ D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
+ return false;
+ };
+ if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
+ return false;
+ // Compare section attributes and contents.
+ return A->getOutputCharacteristics() == B->getOutputCharacteristics() &&
+ A->SectionName == B->SectionName &&
+ A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
+ A->Checksum == B->Checksum && A->getContents() == B->getContents() &&
+ assocEquals(A, B);
+// Compare "moving" part of two sections, namely relocation targets.
+bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
+ // Compare relocations.
+ auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
+ Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
+ Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
+ if (B1 == B2)
+ return true;
+ if (auto *D1 = dyn_cast<DefinedRegular>(B1))
+ if (auto *D2 = dyn_cast<DefinedRegular>(B2))
+ return D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
+ return false;
+ };
+ return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(),
+ Eq) &&
+ assocEquals(A, B);
+// Find the first Chunk after Begin that has a different class from Begin.
+size_t ICF::findBoundary(size_t Begin, size_t End) {
+ for (size_t I = Begin + 1; I < End; ++I)
+ if (Chunks[Begin]->Class[Cnt % 2] != Chunks[I]->Class[Cnt % 2])
+ return I;
+ return End;
+void ICF::forEachClassRange(size_t Begin, size_t End,
+ std::function<void(size_t, size_t)> Fn) {
+ while (Begin < End) {
+ size_t Mid = findBoundary(Begin, End);
+ Fn(Begin, Mid);
+ Begin = Mid;
+ }
+// Call Fn on each class group.
+void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
+ // If the number of sections are too small to use threading,
+ // call Fn sequentially.
+ if (Chunks.size() < 1024) {
+ forEachClassRange(0, Chunks.size(), Fn);
+ ++Cnt;
+ return;
+ }
+ // Shard into non-overlapping intervals, and call Fn in parallel.
+ // The sharding must be completed before any calls to Fn are made
+ // so that Fn can modify the Chunks in its shard without causing data
+ // races.
+ const size_t NumShards = 256;
+ size_t Step = Chunks.size() / NumShards;
+ size_t Boundaries[NumShards + 1];
+ Boundaries[0] = 0;
+ Boundaries[NumShards] = Chunks.size();
+ parallelForEachN(1, NumShards, [&](size_t I) {
+ Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size());
+ });
+ parallelForEachN(1, NumShards + 1, [&](size_t I) {
+ if (Boundaries[I - 1] < Boundaries[I]) {
+ forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
+ }
+ });
+ ++Cnt;
+// Merge identical COMDAT sections.
+// Two sections are considered the same if their section headers,
+// contents and relocations are all the same.
+void ICF::run(ArrayRef<Chunk *> Vec) {
+ ScopedTimer T(ICFTimer);
+ // Collect only mergeable sections and group by hash value.
+ uint32_t NextId = 1;
+ for (Chunk *C : Vec) {
+ if (auto *SC = dyn_cast<SectionChunk>(C)) {
+ if (isEligible(SC))
+ Chunks.push_back(SC);
+ else
+ SC->Class[0] = NextId++;
+ }
+ }
+ // Make sure that ICF doesn't merge sections that are being handled by string
+ // tail merging.
+ for (auto &P : MergeChunk::Instances)
+ for (SectionChunk *SC : P.second->Sections)
+ SC->Class[0] = NextId++;
+ // Initially, we use hash values to partition sections.
+ parallelForEach(Chunks, [&](SectionChunk *SC) {
+ SC->Class[0] = xxHash64(SC->getContents());
+ });
+ // Combine the hashes of the sections referenced by each section into its
+ // hash.
+ for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
+ parallelForEach(Chunks, [&](SectionChunk *SC) {
+ uint32_t Hash = SC->Class[Cnt % 2];
+ for (Symbol *B : SC->symbols())
+ if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
+ Hash += Sym->getChunk()->Class[Cnt % 2];
+ // Set MSB to 1 to avoid collisions with non-hash classs.
+ SC->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
+ });
+ }
+ // From now on, sections in Chunks are ordered so that sections in
+ // the same group are consecutive in the vector.
+ std::stable_sort(Chunks.begin(), Chunks.end(),
+ [](SectionChunk *A, SectionChunk *B) {
+ return A->Class[0] < B->Class[0];
+ });
+ // Compare static contents and assign unique IDs for each static content.
+ forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); });
+ // Split groups by comparing relocations until convergence is obtained.
+ do {
+ Repeat = false;
+ forEachClass(
+ [&](size_t Begin, size_t End) { segregate(Begin, End, false); });
+ } while (Repeat);
+ log("ICF needed " + Twine(Cnt) + " iterations");
+ // Merge sections in the same classs.
+ forEachClass([&](size_t Begin, size_t End) {
+ if (End - Begin == 1)
+ return;
+ log("Selected " + Chunks[Begin]->getDebugName());
+ for (size_t I = Begin + 1; I < End; ++I) {
+ log(" Removed " + Chunks[I]->getDebugName());
+ Chunks[Begin]->replace(Chunks[I]);
+ }
+ });
+// Entry point to ICF.
+void doICF(ArrayRef<Chunk *> Chunks) { ICF().run(Chunks); }
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/ICF.h b/contrib/llvm/tools/lld/COFF/ICF.h
new file mode 100644
index 000000000000..9c54e0c9ec2d
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/ICF.h
@@ -0,0 +1,26 @@
+//===- ICF.h --------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_COFF_ICF_H
+#define LLD_COFF_ICF_H
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+namespace lld {
+namespace coff {
+class Chunk;
+void doICF(ArrayRef<Chunk *> Chunks);
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.cpp b/contrib/llvm/tools/lld/COFF/InputFiles.cpp
new file mode 100644
index 000000000000..236c90ef0388
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/InputFiles.cpp
@@ -0,0 +1,590 @@
+//===- InputFiles.cpp -----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Chunks.h"
+#include "Config.h"
+#include "Driver.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm-c/lto.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cstring>
+#include <system_error>
+#include <utility>
+using namespace llvm;
+using namespace llvm::COFF;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using llvm::Triple;
+using llvm::support::ulittle32_t;
+namespace lld {
+namespace coff {
+std::vector<ObjFile *> ObjFile::Instances;
+std::vector<ImportFile *> ImportFile::Instances;
+std::vector<BitcodeFile *> BitcodeFile::Instances;
+/// Checks that Source is compatible with being a weak alias to Target.
+/// If Source is Undefined and has no weak alias set, makes it a weak
+/// alias to Target.
+static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
+ Symbol *Source, Symbol *Target) {
+ if (auto *U = dyn_cast<Undefined>(Source)) {
+ if (U->WeakAlias && U->WeakAlias != Target) {
+ // Weak aliases as produced by GCC are named in the form
+ // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
+ // of another symbol emitted near the weak symbol.
+ // Just use the definition from the first object file that defined
+ // this weak symbol.
+ if (Config->MinGW)
+ return;
+ Symtab->reportDuplicate(Source, F);
+ }
+ U->WeakAlias = Target;
+ }
+ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
+void ArchiveFile::parse() {
+ // Parse a MemoryBufferRef as an archive file.
+ File = CHECK(Archive::create(MB), this);
+ // Read the symbol table to construct Lazy objects.
+ for (const Archive::Symbol &Sym : File->symbols())
+ Symtab->addLazy(this, Sym);
+// Returns a buffer pointing to a member file containing a given symbol.
+void ArchiveFile::addMember(const Archive::Symbol *Sym) {
+ const Archive::Child &C =
+ CHECK(Sym->getMember(),
+ "could not get the member for symbol " + Sym->getName());
+ // Return an empty buffer if we have already returned the same buffer.
+ if (!Seen.insert(C.getChildOffset()).second)
+ return;
+ Driver->enqueueArchiveMember(C, Sym->getName(), getName());
+std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
+ std::vector<MemoryBufferRef> V;
+ Error Err = Error::success();
+ for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
+ Archive::Child C =
+ File->getFileName() + ": could not get the child of the archive");
+ MemoryBufferRef MBRef =
+ CHECK(C.getMemoryBufferRef(),
+ File->getFileName() +
+ ": could not get the buffer for a child of the archive");
+ V.push_back(MBRef);
+ }
+ if (Err)
+ fatal(File->getFileName() +
+ ": Archive::children failed: " + toString(std::move(Err)));
+ return V;
+void ObjFile::parse() {
+ // Parse a memory buffer as a COFF file.
+ std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this);
+ if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
+ Bin.release();
+ COFFObj.reset(Obj);
+ } else {
+ fatal(toString(this) + " is not a COFF file");
+ }
+ // Read section and symbol tables.
+ initializeChunks();
+ initializeSymbols();
+// We set SectionChunk pointers in the SparseChunks vector to this value
+// temporarily to mark comdat sections as having an unknown resolution. As we
+// walk the object file's symbol table, once we visit either a leader symbol or
+// an associative section definition together with the parent comdat's leader,
+// we set the pointer to either nullptr (to mark the section as discarded) or a
+// valid SectionChunk for that section.
+static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1);
+void ObjFile::initializeChunks() {
+ uint32_t NumSections = COFFObj->getNumberOfSections();
+ Chunks.reserve(NumSections);
+ SparseChunks.resize(NumSections + 1);
+ for (uint32_t I = 1; I < NumSections + 1; ++I) {
+ const coff_section *Sec;
+ if (auto EC = COFFObj->getSection(I, Sec))
+ fatal("getSection failed: #" + Twine(I) + ": " + EC.message());
+ if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
+ SparseChunks[I] = PendingComdat;
+ else
+ SparseChunks[I] = readSection(I, nullptr, "");
+ }
+SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
+ const coff_aux_section_definition *Def,
+ StringRef LeaderName) {
+ const coff_section *Sec;
+ if (auto EC = COFFObj->getSection(SectionNumber, Sec))
+ fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
+ StringRef Name;
+ if (auto EC = COFFObj->getSectionName(Sec, Name))
+ fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
+ EC.message());
+ if (Name == ".drectve") {
+ ArrayRef<uint8_t> Data;
+ COFFObj->getSectionContents(Sec, Data);
+ Directives = std::string((const char *)Data.data(), Data.size());
+ return nullptr;
+ }
+ if (Name == ".llvm_addrsig") {
+ AddrsigSec = Sec;
+ return nullptr;
+ }
+ // Object files may have DWARF debug info or MS CodeView debug info
+ // (or both).
+ //
+ // DWARF sections don't need any special handling from the perspective
+ // of the linker; they are just a data section containing relocations.
+ // We can just link them to complete debug info.
+ //
+ // CodeView needs linker support. We need to interpret debug info,
+ // and then write it to a separate .pdb file.
+ // Ignore DWARF debug info unless /debug is given.
+ if (!Config->Debug && Name.startswith(".debug_"))
+ return nullptr;
+ if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
+ return nullptr;
+ auto *C = make<SectionChunk>(this, Sec);
+ if (Def)
+ C->Checksum = Def->CheckSum;
+ // CodeView sections are stored to a different vector because they are not
+ // linked in the regular manner.
+ if (C->isCodeView())
+ DebugChunks.push_back(C);
+ else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y")
+ GuardFidChunks.push_back(C);
+ else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y")
+ GuardLJmpChunks.push_back(C);
+ else if (Name == ".sxdata")
+ SXDataChunks.push_back(C);
+ else if (Config->TailMerge && Sec->NumberOfRelocations == 0 &&
+ Name == ".rdata" && LeaderName.startswith("??_C@"))
+ // COFF sections that look like string literal sections (i.e. no
+ // relocations, in .rdata, leader symbol name matches the MSVC name mangling
+ // for string literals) are subject to string tail merging.
+ MergeChunk::addSection(C);
+ else
+ Chunks.push_back(C);
+ return C;
+void ObjFile::readAssociativeDefinition(
+ COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
+ readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj()));
+void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym,
+ const coff_aux_section_definition *Def,
+ uint32_t ParentSection) {
+ SectionChunk *Parent = SparseChunks[ParentSection];
+ // If the parent is pending, it probably means that its section definition
+ // appears after us in the symbol table. Leave the associated section as
+ // pending; we will handle it during the second pass in initializeSymbols().
+ if (Parent == PendingComdat)
+ return;
+ // Check whether the parent is prevailing. If it is, so are we, and we read
+ // the section; otherwise mark it as discarded.
+ int32_t SectionNumber = Sym.getSectionNumber();
+ if (Parent) {
+ SparseChunks[SectionNumber] = readSection(SectionNumber, Def, "");
+ if (SparseChunks[SectionNumber])
+ Parent->addAssociative(SparseChunks[SectionNumber]);
+ } else {
+ SparseChunks[SectionNumber] = nullptr;
+ }
+void ObjFile::recordPrevailingSymbolForMingw(
+ COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
+ // For comdat symbols in executable sections, where this is the copy
+ // of the section chunk we actually include instead of discarding it,
+ // add the symbol to a map to allow using it for implicitly
+ // associating .[px]data$<func> sections to it.
+ int32_t SectionNumber = Sym.getSectionNumber();
+ SectionChunk *SC = SparseChunks[SectionNumber];
+ if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
+ StringRef Name;
+ COFFObj->getSymbolName(Sym, Name);
+ PrevailingSectionMap[Name] = SectionNumber;
+ }
+void ObjFile::maybeAssociateSEHForMingw(
+ COFFSymbolRef Sym, const coff_aux_section_definition *Def,
+ const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
+ StringRef Name;
+ COFFObj->getSymbolName(Sym, Name);
+ if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) {
+ // For MinGW, treat .[px]data$<func> as implicitly associative to
+ // the symbol <func>.
+ auto ParentSym = PrevailingSectionMap.find(Name);
+ if (ParentSym != PrevailingSectionMap.end())
+ readAssociativeDefinition(Sym, Def, ParentSym->second);
+ }
+Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
+ SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
+ if (Sym.isExternal()) {
+ StringRef Name;
+ COFFObj->getSymbolName(Sym, Name);
+ if (SC)
+ return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
+ // For MinGW symbols named .weak.* that point to a discarded section,
+ // don't create an Undefined symbol. If nothing ever refers to the symbol,
+ // everything should be fine. If something actually refers to the symbol
+ // (e.g. the undefined weak alias), linking will fail due to undefined
+ // references at the end.
+ if (Config->MinGW && Name.startswith(".weak."))
+ return nullptr;
+ return Symtab->addUndefined(Name, this, false);
+ }
+ if (SC)
+ return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
+ /*IsExternal*/ false, Sym.getGeneric(), SC);
+ return nullptr;
+void ObjFile::initializeSymbols() {
+ uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
+ Symbols.resize(NumSymbols);
+ SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases;
+ std::vector<uint32_t> PendingIndexes;
+ PendingIndexes.reserve(NumSymbols);
+ DenseMap<StringRef, uint32_t> PrevailingSectionMap;
+ std::vector<const coff_aux_section_definition *> ComdatDefs(
+ COFFObj->getNumberOfSections() + 1);
+ for (uint32_t I = 0; I < NumSymbols; ++I) {
+ COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
+ bool PrevailingComdat;
+ if (COFFSym.isUndefined()) {
+ Symbols[I] = createUndefined(COFFSym);
+ } else if (COFFSym.isWeakExternal()) {
+ Symbols[I] = createUndefined(COFFSym);
+ uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
+ WeakAliases.emplace_back(Symbols[I], TagIndex);
+ } else if (Optional<Symbol *> OptSym =
+ createDefined(COFFSym, ComdatDefs, PrevailingComdat)) {
+ Symbols[I] = *OptSym;
+ if (Config->MinGW && PrevailingComdat)
+ recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap);
+ } else {
+ // createDefined() returns None if a symbol belongs to a section that
+ // was pending at the point when the symbol was read. This can happen in
+ // two cases:
+ // 1) section definition symbol for a comdat leader;
+ // 2) symbol belongs to a comdat section associated with a section whose
+ // section definition symbol appears later in the symbol table.
+ // In both of these cases, we can expect the section to be resolved by
+ // the time we finish visiting the remaining symbols in the symbol
+ // table. So we postpone the handling of this symbol until that time.
+ PendingIndexes.push_back(I);
+ }
+ I += COFFSym.getNumberOfAuxSymbols();
+ }
+ for (uint32_t I : PendingIndexes) {
+ COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
+ if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
+ readAssociativeDefinition(Sym, Def);
+ else if (Config->MinGW)
+ maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap);
+ }
+ if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) {
+ StringRef Name;
+ COFFObj->getSymbolName(Sym, Name);
+ log("comdat section " + Name +
+ " without leader and unassociated, discarding");
+ continue;
+ }
+ Symbols[I] = createRegular(Sym);
+ }
+ for (auto &KV : WeakAliases) {
+ Symbol *Sym = KV.first;
+ uint32_t Idx = KV.second;
+ checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]);
+ }
+Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
+ StringRef Name;
+ COFFObj->getSymbolName(Sym, Name);
+ return Symtab->addUndefined(Name, this, Sym.isWeakExternal());
+Optional<Symbol *> ObjFile::createDefined(
+ COFFSymbolRef Sym,
+ std::vector<const coff_aux_section_definition *> &ComdatDefs,
+ bool &Prevailing) {
+ Prevailing = false;
+ auto GetName = [&]() {
+ StringRef S;
+ COFFObj->getSymbolName(Sym, S);
+ return S;
+ };
+ if (Sym.isCommon()) {
+ auto *C = make<CommonChunk>(Sym);
+ Chunks.push_back(C);
+ return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(),
+ C);
+ }
+ if (Sym.isAbsolute()) {
+ StringRef Name = GetName();
+ // Skip special symbols.
+ if (Name == "@comp.id")
+ return nullptr;
+ if (Name == "@feat.00") {
+ Feat00Flags = Sym.getValue();
+ return nullptr;
+ }
+ if (Sym.isExternal())
+ return Symtab->addAbsolute(Name, Sym);
+ return make<DefinedAbsolute>(Name, Sym);
+ }
+ int32_t SectionNumber = Sym.getSectionNumber();
+ if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
+ return nullptr;
+ if (llvm::COFF::isReservedSectionNumber(SectionNumber))
+ fatal(toString(this) + ": " + GetName() +
+ " should not refer to special section " + Twine(SectionNumber));
+ if ((uint32_t)SectionNumber >= SparseChunks.size())
+ fatal(toString(this) + ": " + GetName() +
+ " should not refer to non-existent section " + Twine(SectionNumber));
+ // Handle comdat leader symbols.
+ if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
+ ComdatDefs[SectionNumber] = nullptr;
+ Symbol *Leader;
+ if (Sym.isExternal()) {
+ std::tie(Leader, Prevailing) =
+ Symtab->addComdat(this, GetName(), Sym.getGeneric());
+ } else {
+ Leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
+ /*IsExternal*/ false, Sym.getGeneric());
+ Prevailing = true;
+ }
+ if (Prevailing) {
+ SectionChunk *C = readSection(SectionNumber, Def, GetName());
+ SparseChunks[SectionNumber] = C;
+ C->Sym = cast<DefinedRegular>(Leader);
+ cast<DefinedRegular>(Leader)->Data = &C->Repl;
+ } else {
+ SparseChunks[SectionNumber] = nullptr;
+ }
+ return Leader;
+ }
+ // Read associative section definitions and prepare to handle the comdat
+ // leader symbol by setting the section's ComdatDefs pointer if we encounter a
+ // non-associative comdat.
+ if (SparseChunks[SectionNumber] == PendingComdat) {
+ if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
+ readAssociativeDefinition(Sym, Def);
+ else
+ ComdatDefs[SectionNumber] = Def;
+ }
+ }
+ // readAssociativeDefinition() writes to SparseChunks, so need to check again.
+ if (SparseChunks[SectionNumber] == PendingComdat)
+ return None;
+ return createRegular(Sym);
+MachineTypes ObjFile::getMachineType() {
+ if (COFFObj)
+ return static_cast<MachineTypes>(COFFObj->getMachine());
+StringRef ltrim1(StringRef S, const char *Chars) {
+ if (!S.empty() && strchr(Chars, S[0]))
+ return S.substr(1);
+ return S;
+void ImportFile::parse() {
+ const char *Buf = MB.getBufferStart();
+ const char *End = MB.getBufferEnd();
+ const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
+ // Check if the total size is valid.
+ if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
+ fatal("broken import library");
+ // Read names and create an __imp_ symbol.
+ StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr)));
+ StringRef ImpName = Saver.save("__imp_" + Name);
+ const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
+ DLLName = StringRef(NameStart);
+ StringRef ExtName;
+ switch (Hdr->getNameType()) {
+ ExtName = "";
+ break;
+ ExtName = Name;
+ break;
+ ExtName = ltrim1(Name, "?@_");
+ break;
+ ExtName = ltrim1(Name, "?@_");
+ ExtName = ExtName.substr(0, ExtName.find('@'));
+ break;
+ }
+ this->Hdr = Hdr;
+ ExternalName = ExtName;
+ ImpSym = Symtab->addImportData(ImpName, this);
+ // If this was a duplicate, we logged an error but may continue;
+ // in this case, ImpSym is nullptr.
+ if (!ImpSym)
+ return;
+ if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
+ static_cast<void>(Symtab->addImportData(Name, this));
+ // If type is function, we need to create a thunk which jump to an
+ // address pointed by the __imp_ symbol. (This allows you to call
+ // DLL functions just like regular non-DLL functions.)
+ if (Hdr->getType() == llvm::COFF::IMPORT_CODE)
+ ThunkSym = Symtab->addImportThunk(
+ Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine);
+void BitcodeFile::parse() {
+ Obj = check(lto::InputFile::create(MemoryBufferRef(
+ MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
+ std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size());
+ for (size_t I = 0; I != Obj->getComdatTable().size(); ++I)
+ Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I]));
+ for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
+ StringRef SymName = Saver.save(ObjSym.getName());
+ int ComdatIndex = ObjSym.getComdatIndex();
+ Symbol *Sym;
+ if (ObjSym.isUndefined()) {
+ Sym = Symtab->addUndefined(SymName, this, false);
+ } else if (ObjSym.isCommon()) {
+ Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize());
+ } else if (ObjSym.isWeak() && ObjSym.isIndirect()) {
+ // Weak external.
+ Sym = Symtab->addUndefined(SymName, this, true);
+ std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
+ Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback));
+ checkAndSetWeakAlias(Symtab, this, Sym, Alias);
+ } else if (ComdatIndex != -1) {
+ if (SymName == Obj->getComdatTable()[ComdatIndex])
+ Sym = Comdat[ComdatIndex].first;
+ else if (Comdat[ComdatIndex].second)
+ Sym = Symtab->addRegular(this, SymName);
+ else
+ Sym = Symtab->addUndefined(SymName, this, false);
+ } else {
+ Sym = Symtab->addRegular(this, SymName);
+ }
+ Symbols.push_back(Sym);
+ }
+ Directives = Obj->getCOFFLinkerOpts();
+MachineTypes BitcodeFile::getMachineType() {
+ switch (Triple(Obj->getTargetTriple()).getArch()) {
+ case Triple::x86_64:
+ return AMD64;
+ case Triple::x86:
+ return I386;
+ case Triple::arm:
+ return ARMNT;
+ case Triple::aarch64:
+ return ARM64;
+ default:
+ }
+} // namespace coff
+} // namespace lld
+// Returns the last element of a path, which is supposed to be a filename.
+static StringRef getBasename(StringRef Path) {
+ return sys::path::filename(Path, sys::path::Style::windows);
+// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
+std::string lld::toString(const coff::InputFile *File) {
+ if (!File)
+ return "<internal>";
+ if (File->ParentName.empty())
+ return File->getName();
+ return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) +
+ ")")
+ .str();
diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.h b/contrib/llvm/tools/lld/COFF/InputFiles.h
new file mode 100644
index 000000000000..ec802f2d0300
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/InputFiles.h
@@ -0,0 +1,280 @@
+//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/StringSaver.h"
+#include <memory>
+#include <set>
+#include <vector>
+namespace llvm {
+namespace pdb {
+class DbiModuleDescriptorBuilder;
+namespace lld {
+namespace coff {
+std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *File);
+using llvm::COFF::MachineTypes;
+using llvm::object::Archive;
+using llvm::object::COFFObjectFile;
+using llvm::object::COFFSymbolRef;
+using llvm::object::coff_import_header;
+using llvm::object::coff_section;
+class Chunk;
+class Defined;
+class DefinedImportData;
+class DefinedImportThunk;
+class Lazy;
+class SectionChunk;
+class Symbol;
+class Undefined;
+// The root class of input files.
+class InputFile {
+ enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind };
+ Kind kind() const { return FileKind; }
+ virtual ~InputFile() {}
+ // Returns the filename.
+ StringRef getName() const { return MB.getBufferIdentifier(); }
+ // Reads a file (the constructor doesn't do that).
+ virtual void parse() = 0;
+ // Returns the CPU type this file was compiled to.
+ virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
+ MemoryBufferRef MB;
+ // An archive file name if this file is created from an archive.
+ StringRef ParentName;
+ // Returns .drectve section contents if exist.
+ StringRef getDirectives() { return StringRef(Directives).trim(); }
+ InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
+ std::string Directives;
+ const Kind FileKind;
+// .lib or .a file.
+class ArchiveFile : public InputFile {
+ explicit ArchiveFile(MemoryBufferRef M);
+ static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
+ void parse() override;
+ // Enqueues an archive member load for the given symbol. If we've already
+ // enqueued a load for the same archive member, this function does nothing,
+ // which ensures that we don't load the same member more than once.
+ void addMember(const Archive::Symbol *Sym);
+ std::unique_ptr<Archive> File;
+ std::string Filename;
+ llvm::DenseSet<uint64_t> Seen;
+// .obj or .o file. This may be a member of an archive file.
+class ObjFile : public InputFile {
+ explicit ObjFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {}
+ static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
+ void parse() override;
+ MachineTypes getMachineType() override;
+ ArrayRef<Chunk *> getChunks() { return Chunks; }
+ ArrayRef<SectionChunk *> getDebugChunks() { return DebugChunks; }
+ ArrayRef<SectionChunk *> getSXDataChunks() { return SXDataChunks; }
+ ArrayRef<SectionChunk *> getGuardFidChunks() { return GuardFidChunks; }
+ ArrayRef<SectionChunk *> getGuardLJmpChunks() { return GuardLJmpChunks; }
+ ArrayRef<Symbol *> getSymbols() { return Symbols; }
+ // Returns a Symbol object for the SymbolIndex'th symbol in the
+ // underlying object file.
+ Symbol *getSymbol(uint32_t SymbolIndex) {
+ return Symbols[SymbolIndex];
+ }
+ // Returns the underlying COFF file.
+ COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
+ // Whether the object was already merged into the final PDB or not
+ bool wasProcessedForPDB() const { return !!ModuleDBI; }
+ static std::vector<ObjFile *> Instances;
+ // Flags in the absolute @feat.00 symbol if it is present. These usually
+ // indicate if an object was compiled with certain security features enabled
+ // like stack guard, safeseh, /guard:cf, or other things.
+ uint32_t Feat00Flags = 0;
+ // True if this object file is compatible with SEH. COFF-specific and
+ // x86-only. COFF spec 5.10.1. The .sxdata section.
+ bool hasSafeSEH() { return Feat00Flags & 0x1; }
+ // True if this file was compiled with /guard:cf.
+ bool hasGuardCF() { return Feat00Flags & 0x800; }
+ // Pointer to the PDB module descriptor builder. Various debug info records
+ // will reference object files by "module index", which is here. Things like
+ // source files and section contributions are also recorded here. Will be null
+ // if we are not producing a PDB.
+ llvm::pdb::DbiModuleDescriptorBuilder *ModuleDBI = nullptr;
+ const coff_section *AddrsigSec = nullptr;
+ // When using Microsoft precompiled headers, this is the PCH's key.
+ // The same key is used by both the precompiled object, and objects using the
+ // precompiled object. Any difference indicates out-of-date objects.
+ llvm::Optional<uint32_t> PCHSignature;
+ void initializeChunks();
+ void initializeSymbols();
+ SectionChunk *
+ readSection(uint32_t SectionNumber,
+ const llvm::object::coff_aux_section_definition *Def,
+ StringRef LeaderName);
+ void readAssociativeDefinition(
+ COFFSymbolRef COFFSym,
+ const llvm::object::coff_aux_section_definition *Def);
+ void readAssociativeDefinition(
+ COFFSymbolRef COFFSym,
+ const llvm::object::coff_aux_section_definition *Def,
+ uint32_t ParentSection);
+ void recordPrevailingSymbolForMingw(
+ COFFSymbolRef COFFSym,
+ llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap);
+ void maybeAssociateSEHForMingw(
+ COFFSymbolRef Sym, const llvm::object::coff_aux_section_definition *Def,
+ const llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap);
+ llvm::Optional<Symbol *>
+ createDefined(COFFSymbolRef Sym,
+ std::vector<const llvm::object::coff_aux_section_definition *>
+ &ComdatDefs,
+ bool &PrevailingComdat);
+ Symbol *createRegular(COFFSymbolRef Sym);
+ Symbol *createUndefined(COFFSymbolRef Sym);
+ std::unique_ptr<COFFObjectFile> COFFObj;
+ // List of all chunks defined by this file. This includes both section
+ // chunks and non-section chunks for common symbols.
+ std::vector<Chunk *> Chunks;
+ // CodeView debug info sections.
+ std::vector<SectionChunk *> DebugChunks;
+ // Chunks containing symbol table indices of exception handlers. Only used for
+ // 32-bit x86.
+ std::vector<SectionChunk *> SXDataChunks;
+ // Chunks containing symbol table indices of address taken symbols and longjmp
+ // targets. These are not linked into the final binary when /guard:cf is set.
+ std::vector<SectionChunk *> GuardFidChunks;
+ std::vector<SectionChunk *> GuardLJmpChunks;
+ // This vector contains the same chunks as Chunks, but they are
+ // indexed such that you can get a SectionChunk by section index.
+ // Nonexistent section indices are filled with null pointers.
+ // (Because section number is 1-based, the first slot is always a
+ // null pointer.)
+ std::vector<SectionChunk *> SparseChunks;
+ // This vector contains a list of all symbols defined or referenced by this
+ // file. They are indexed such that you can get a Symbol by symbol
+ // index. Nonexistent indices (which are occupied by auxiliary
+ // symbols in the real symbol table) are filled with null pointers.
+ std::vector<Symbol *> Symbols;
+// This type represents import library members that contain DLL names
+// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
+// for details about the format.
+class ImportFile : public InputFile {
+ explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M) {}
+ static bool classof(const InputFile *F) { return F->kind() == ImportKind; }
+ static std::vector<ImportFile *> Instances;
+ Symbol *ImpSym = nullptr;
+ Symbol *ThunkSym = nullptr;
+ std::string DLLName;
+ void parse() override;
+ StringRef ExternalName;
+ const coff_import_header *Hdr;
+ Chunk *Location = nullptr;
+ // We want to eliminate dllimported symbols if no one actually refers them.
+ // These "Live" bits are used to keep track of which import library members
+ // are actually in use.
+ //
+ // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
+ // symbols provided by this import library member. We also track whether the
+ // imported symbol is used separately from whether the thunk is used in order
+ // to avoid creating unnecessary thunks.
+ bool Live = !Config->DoGC;
+ bool ThunkLive = !Config->DoGC;
+// Used for LTO.
+class BitcodeFile : public InputFile {
+ explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
+ static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
+ ArrayRef<Symbol *> getSymbols() { return Symbols; }
+ MachineTypes getMachineType() override;
+ static std::vector<BitcodeFile *> Instances;
+ std::unique_ptr<llvm::lto::InputFile> Obj;
+ void parse() override;
+ std::vector<Symbol *> Symbols;
+} // namespace coff
+std::string toString(const coff::InputFile *File);
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/LTO.cpp b/contrib/llvm/tools/lld/COFF/LTO.cpp
new file mode 100644
index 000000000000..92d9ff0937c0
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/LTO.cpp
@@ -0,0 +1,152 @@
+//===- LTO.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "LTO.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/TargetOptionsCommandFlags.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/LTO/Caching.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+using namespace llvm;
+using namespace llvm::object;
+using namespace lld;
+using namespace lld::coff;
+static std::unique_ptr<lto::LTO> createLTO() {
+ lto::Config C;
+ C.Options = InitTargetOptionsFromCodeGenFlags();
+ // Always emit a section per function/datum with LTO. LLVM LTO should get most
+ // of the benefit of linker GC, but there are still opportunities for ICF.
+ C.Options.FunctionSections = true;
+ C.Options.DataSections = true;
+ // Use static reloc model on 32-bit x86 because it usually results in more
+ // compact code, and because there are also known code generation bugs when
+ // using the PIC model (see PR34306).
+ if (Config->Machine == COFF::IMAGE_FILE_MACHINE_I386)
+ C.RelocModel = Reloc::Static;
+ else
+ C.RelocModel = Reloc::PIC_;
+ C.DisableVerify = true;
+ C.DiagHandler = diagnosticHandler;
+ C.OptLevel = Config->LTOO;
+ C.CPU = GetCPUStr();
+ C.MAttrs = GetMAttrs();
+ if (Config->SaveTemps)
+ checkError(C.addSaveTemps(std::string(Config->OutputFile) + ".",
+ /*UseInputModulePath*/ true));
+ lto::ThinBackend Backend;
+ if (Config->ThinLTOJobs != 0)
+ Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs);
+ return llvm::make_unique<lto::LTO>(std::move(C), Backend,
+ Config->LTOPartitions);
+BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {}
+BitcodeCompiler::~BitcodeCompiler() = default;
+static void undefine(Symbol *S) { replaceSymbol<Undefined>(S, S->getName()); }
+void BitcodeCompiler::add(BitcodeFile &F) {
+ lto::InputFile &Obj = *F.Obj;
+ unsigned SymNum = 0;
+ std::vector<Symbol *> SymBodies = F.getSymbols();
+ std::vector<lto::SymbolResolution> Resols(SymBodies.size());
+ // Provide a resolution to the LTO API for each symbol.
+ for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) {
+ Symbol *Sym = SymBodies[SymNum];
+ lto::SymbolResolution &R = Resols[SymNum];
+ ++SymNum;
+ // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile
+ // reports two symbols for module ASM defined. Without this check, lld
+ // flags an undefined in IR with a definition in ASM as prevailing.
+ // Once IRObjectFile is fixed to report only one symbol this hack can
+ // be removed.
+ R.Prevailing = !ObjSym.isUndefined() && Sym->getFile() == &F;
+ R.VisibleToRegularObj = Sym->IsUsedInRegularObj;
+ if (R.Prevailing)
+ undefine(Sym);
+ }
+ checkError(LTOObj->add(std::move(F.Obj), Resols));
+// Merge all the bitcode files we have seen, codegen the result
+// and return the resulting objects.
+std::vector<StringRef> BitcodeCompiler::compile() {
+ unsigned MaxTasks = LTOObj->getMaxTasks();
+ Buf.resize(MaxTasks);
+ Files.resize(MaxTasks);
+ // The /lldltocache option specifies the path to a directory in which to cache
+ // native object files for ThinLTO incremental builds. If a path was
+ // specified, configure LTO to use it as the cache directory.
+ lto::NativeObjectCache Cache;
+ if (!Config->LTOCache.empty())
+ Cache = check(lto::localCache(
+ Config->LTOCache, [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
+ Files[Task] = std::move(MB);
+ }));
+ checkError(LTOObj->run(
+ [&](size_t Task) {
+ return llvm::make_unique<lto::NativeObjectStream>(
+ llvm::make_unique<raw_svector_ostream>(Buf[Task]));
+ },
+ Cache));
+ if (!Config->LTOCache.empty())
+ pruneCache(Config->LTOCache, Config->LTOCachePolicy);
+ std::vector<StringRef> Ret;
+ for (unsigned I = 0; I != MaxTasks; ++I) {
+ if (Buf[I].empty())
+ continue;
+ if (Config->SaveTemps) {
+ if (I == 0)
+ saveBuffer(Buf[I], Config->OutputFile + ".lto.obj");
+ else
+ saveBuffer(Buf[I], Config->OutputFile + Twine(I) + ".lto.obj");
+ }
+ Ret.emplace_back(Buf[I].data(), Buf[I].size());
+ }
+ for (std::unique_ptr<MemoryBuffer> &File : Files)
+ if (File)
+ Ret.push_back(File->getBuffer());
+ return Ret;
diff --git a/contrib/llvm/tools/lld/COFF/LTO.h b/contrib/llvm/tools/lld/COFF/LTO.h
new file mode 100644
index 000000000000..f00924654780
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/LTO.h
@@ -0,0 +1,57 @@
+//===- LTO.h ----------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file provides a way to combine bitcode files into one COFF
+// file by compiling them using LLVM.
+// If LTO is in use, your input files are not in regular COFF files
+// but instead LLVM bitcode files. In that case, the linker has to
+// convert bitcode files into the native format so that we can create
+// a COFF file that contains native code. This file provides that
+// functionality.
+#ifndef LLD_COFF_LTO_H
+#define LLD_COFF_LTO_H
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/SmallString.h"
+#include <memory>
+#include <vector>
+namespace llvm {
+namespace lto {
+class LTO;
+namespace lld {
+namespace coff {
+class BitcodeFile;
+class InputFile;
+class BitcodeCompiler {
+ BitcodeCompiler();
+ ~BitcodeCompiler();
+ void add(BitcodeFile &F);
+ std::vector<StringRef> compile();
+ std::unique_ptr<llvm::lto::LTO> LTOObj;
+ std::vector<SmallString<0>> Buf;
+ std::vector<std::unique_ptr<MemoryBuffer>> Files;
diff --git a/contrib/llvm/tools/lld/COFF/MapFile.cpp b/contrib/llvm/tools/lld/COFF/MapFile.cpp
new file mode 100644
index 000000000000..fd4894250223
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/MapFile.cpp
@@ -0,0 +1,125 @@
+//===- MapFile.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements the /lldmap option. It shows lists in order and
+// hierarchically the output sections, input sections, input files and
+// symbol:
+// Address Size Align Out File Symbol
+// 00201000 00000015 4 .text
+// 00201000 0000000e 4 test.o:(.text)
+// 0020100e 00000000 0 local
+// 00201005 00000000 0 f(int)
+#include "MapFile.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "Writer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace lld;
+using namespace lld::coff;
+typedef DenseMap<const SectionChunk *, SmallVector<DefinedRegular *, 4>>
+ SymbolMapTy;
+static const std::string Indent8 = " "; // 8 spaces
+static const std::string Indent16 = " "; // 16 spaces
+// Print out the first three columns of a line.
+static void writeHeader(raw_ostream &OS, uint64_t Addr, uint64_t Size,
+ uint64_t Align) {
+ OS << format("%08llx %08llx %5lld ", Addr, Size, Align);
+// Returns a list of all symbols that we want to print out.
+static std::vector<DefinedRegular *> getSymbols() {
+ std::vector<DefinedRegular *> V;
+ for (ObjFile *File : ObjFile::Instances)
+ for (Symbol *B : File->getSymbols())
+ if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
+ if (Sym && !Sym->getCOFFSymbol().isSectionDefinition())
+ V.push_back(Sym);
+ return V;
+// Returns a map from sections to their symbols.
+static SymbolMapTy getSectionSyms(ArrayRef<DefinedRegular *> Syms) {
+ SymbolMapTy Ret;
+ for (DefinedRegular *S : Syms)
+ Ret[S->getChunk()].push_back(S);
+ // Sort symbols by address.
+ for (auto &It : Ret) {
+ SmallVectorImpl<DefinedRegular *> &V = It.second;
+ std::sort(V.begin(), V.end(), [](DefinedRegular *A, DefinedRegular *B) {
+ return A->getRVA() < B->getRVA();
+ });
+ }
+ return Ret;
+// Construct a map from symbols to their stringified representations.
+static DenseMap<DefinedRegular *, std::string>
+getSymbolStrings(ArrayRef<DefinedRegular *> Syms) {
+ std::vector<std::string> Str(Syms.size());
+ for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) {
+ raw_string_ostream OS(Str[I]);
+ writeHeader(OS, Syms[I]->getRVA(), 0, 0);
+ OS << Indent16 << toString(*Syms[I]);
+ });
+ DenseMap<DefinedRegular *, std::string> Ret;
+ for (size_t I = 0, E = Syms.size(); I < E; ++I)
+ Ret[Syms[I]] = std::move(Str[I]);
+ return Ret;
+void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) {
+ if (Config->MapFile.empty())
+ return;
+ std::error_code EC;
+ raw_fd_ostream OS(Config->MapFile, EC, sys::fs::F_None);
+ if (EC)
+ fatal("cannot open " + Config->MapFile + ": " + EC.message());
+ // Collect symbol info that we want to print out.
+ std::vector<DefinedRegular *> Syms = getSymbols();
+ SymbolMapTy SectionSyms = getSectionSyms(Syms);
+ DenseMap<DefinedRegular *, std::string> SymStr = getSymbolStrings(Syms);
+ // Print out the header line.
+ OS << "Address Size Align Out In Symbol\n";
+ // Print out file contents.
+ for (OutputSection *Sec : OutputSections) {
+ writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize);
+ OS << Sec->Name << '\n';
+ for (Chunk *C : Sec->Chunks) {
+ auto *SC = dyn_cast<SectionChunk>(C);
+ if (!SC)
+ continue;
+ writeHeader(OS, SC->getRVA(), SC->getSize(), SC->Alignment);
+ OS << Indent8 << SC->File->getName() << ":(" << SC->getSectionName()
+ << ")\n";
+ for (DefinedRegular *Sym : SectionSyms[SC])
+ OS << SymStr[Sym] << '\n';
+ }
+ }
diff --git a/contrib/llvm/tools/lld/COFF/MapFile.h b/contrib/llvm/tools/lld/COFF/MapFile.h
new file mode 100644
index 000000000000..0d0d68ce3ead
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/MapFile.h
@@ -0,0 +1,22 @@
+//===- MapFile.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/ArrayRef.h"
+namespace lld {
+namespace coff {
+class OutputSection;
+void writeMapFile(llvm::ArrayRef<OutputSection *> OutputSections);
diff --git a/contrib/llvm/tools/lld/COFF/MarkLive.cpp b/contrib/llvm/tools/lld/COFF/MarkLive.cpp
new file mode 100644
index 000000000000..18b1c9c2529f
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/MarkLive.cpp
@@ -0,0 +1,74 @@
+//===- MarkLive.cpp -------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Chunks.h"
+#include "Symbols.h"
+#include "lld/Common/Timer.h"
+#include "llvm/ADT/STLExtras.h"
+#include <vector>
+namespace lld {
+namespace coff {
+static Timer GCTimer("GC", Timer::root());
+// Set live bit on for each reachable chunk. Unmarked (unreachable)
+// COMDAT chunks will be ignored by Writer, so they will be excluded
+// from the final output.
+void markLive(ArrayRef<Chunk *> Chunks) {
+ ScopedTimer T(GCTimer);
+ // We build up a worklist of sections which have been marked as live. We only
+ // push into the worklist when we discover an unmarked section, and we mark
+ // as we push, so sections never appear twice in the list.
+ SmallVector<SectionChunk *, 256> Worklist;
+ // COMDAT section chunks are dead by default. Add non-COMDAT chunks.
+ for (Chunk *C : Chunks)
+ if (auto *SC = dyn_cast<SectionChunk>(C))
+ if (SC->Live)
+ Worklist.push_back(SC);
+ auto Enqueue = [&](SectionChunk *C) {
+ if (C->Live)
+ return;
+ C->Live = true;
+ Worklist.push_back(C);
+ };
+ auto AddSym = [&](Symbol *B) {
+ if (auto *Sym = dyn_cast<DefinedRegular>(B))
+ Enqueue(Sym->getChunk());
+ else if (auto *Sym = dyn_cast<DefinedImportData>(B))
+ Sym->File->Live = true;
+ else if (auto *Sym = dyn_cast<DefinedImportThunk>(B))
+ Sym->WrappedSym->File->Live = Sym->WrappedSym->File->ThunkLive = true;
+ };
+ // Add GC root chunks.
+ for (Symbol *B : Config->GCRoot)
+ AddSym(B);
+ while (!Worklist.empty()) {
+ SectionChunk *SC = Worklist.pop_back_val();
+ assert(SC->Live && "We mark as live when pushing onto the worklist!");
+ // Mark all symbols listed in the relocation table for this section.
+ for (Symbol *B : SC->symbols())
+ if (B)
+ AddSym(B);
+ // Mark associative sections if any.
+ for (SectionChunk *C : SC->children())
+ Enqueue(C);
+ }
diff --git a/contrib/llvm/tools/lld/COFF/MarkLive.h b/contrib/llvm/tools/lld/COFF/MarkLive.h
new file mode 100644
index 000000000000..5b652dd48196
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/MarkLive.h
@@ -0,0 +1,24 @@
+//===- MarkLive.h -----------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+namespace lld {
+namespace coff {
+void markLive(ArrayRef<Chunk *> Chunks);
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/MinGW.cpp b/contrib/llvm/tools/lld/COFF/MinGW.cpp
new file mode 100644
index 000000000000..b2c8c4eadca4
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/MinGW.cpp
@@ -0,0 +1,176 @@
+//===- MinGW.cpp ----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "MinGW.h"
+#include "SymbolTable.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace lld;
+using namespace lld::coff;
+using namespace llvm;
+using namespace llvm::COFF;
+void AutoExporter::initSymbolExcludes() {
+ ExcludeSymbolPrefixes = {
+ // Import symbols
+ "__imp_",
+ // Extra import symbols from GNU import libraries
+ "__nm_",
+ // C++ symbols
+ "__rtti_",
+ "__builtin_",
+ // Artifical symbols such as .refptr
+ ".",
+ };
+ ExcludeSymbolSuffixes = {
+ "_iname",
+ };
+ if (Config->Machine == I386) {
+ ExcludeSymbols = {
+ "__pei386_runtime_relocator",
+ "_do_pseudo_reloc",
+ "_impure_ptr",
+ "__impure_ptr",
+ "__fmode",
+ "_environ",
+ "___dso_handle",
+ // These are the MinGW names that differ from the standard
+ // ones (lacking an extra underscore).
+ "_DllMain@12",
+ "_DllEntryPoint@12",
+ "_DllMainCRTStartup@12",
+ };
+ ExcludeSymbolPrefixes.insert("__head_");
+ } else {
+ ExcludeSymbols = {
+ "_pei386_runtime_relocator",
+ "do_pseudo_reloc",
+ "impure_ptr",
+ "_impure_ptr",
+ "_fmode",
+ "environ",
+ "__dso_handle",
+ // These are the MinGW names that differ from the standard
+ // ones (lacking an extra underscore).
+ "DllMain",
+ "DllEntryPoint",
+ "DllMainCRTStartup",
+ };
+ ExcludeSymbolPrefixes.insert("_head_");
+ }
+AutoExporter::AutoExporter() {
+ ExcludeLibs = {
+ "libgcc",
+ "libgcc_s",
+ "libstdc++",
+ "libmingw32",
+ "libmingwex",
+ "libg2c",
+ "libsupc++",
+ "libobjc",
+ "libgcj",
+ "libclang_rt.builtins",
+ "libclang_rt.builtins-aarch64",
+ "libclang_rt.builtins-arm",
+ "libclang_rt.builtins-i386",
+ "libclang_rt.builtins-x86_64",
+ "libc++",
+ "libc++abi",
+ "libunwind",
+ "libmsvcrt",
+ "libucrtbase",
+ };
+ ExcludeObjects = {
+ "crt0.o",
+ "crt1.o",
+ "crt1u.o",
+ "crt2.o",
+ "crt2u.o",
+ "dllcrt1.o",
+ "dllcrt2.o",
+ "gcrt0.o",
+ "gcrt1.o",
+ "gcrt2.o",
+ "crtbegin.o",
+ "crtend.o",
+ };
+void AutoExporter::addWholeArchive(StringRef Path) {
+ StringRef LibName = sys::path::filename(Path);
+ // Drop the file extension, to match the processing below.
+ LibName = LibName.substr(0, LibName.rfind('.'));
+ ExcludeLibs.erase(LibName);
+bool AutoExporter::shouldExport(Defined *Sym) const {
+ if (!Sym || !Sym->isLive() || !Sym->getChunk())
+ return false;
+ // Only allow the symbol kinds that make sense to export; in particular,
+ // disallow import symbols.
+ if (!isa<DefinedRegular>(Sym) && !isa<DefinedCommon>(Sym))
+ return false;
+ if (ExcludeSymbols.count(Sym->getName()))
+ return false;
+ for (StringRef Prefix : ExcludeSymbolPrefixes.keys())
+ if (Sym->getName().startswith(Prefix))
+ return false;
+ for (StringRef Suffix : ExcludeSymbolSuffixes.keys())
+ if (Sym->getName().endswith(Suffix))
+ return false;
+ // If a corresponding __imp_ symbol exists and is defined, don't export it.
+ if (Symtab->find(("__imp_" + Sym->getName()).str()))
+ return false;
+ // Check that file is non-null before dereferencing it, symbols not
+ // originating in regular object files probably shouldn't be exported.
+ if (!Sym->getFile())
+ return false;
+ StringRef LibName = sys::path::filename(Sym->getFile()->ParentName);
+ // Drop the file extension.
+ LibName = LibName.substr(0, LibName.rfind('.'));
+ if (!LibName.empty())
+ return !ExcludeLibs.count(LibName);
+ StringRef FileName = sys::path::filename(Sym->getFile()->getName());
+ return !ExcludeObjects.count(FileName);
+void coff::writeDefFile(StringRef Name) {
+ std::error_code EC;
+ raw_fd_ostream OS(Name, EC, sys::fs::F_None);
+ if (EC)
+ fatal("cannot open " + Name + ": " + EC.message());
+ OS << "EXPORTS\n";
+ for (Export &E : Config->Exports) {
+ OS << " " << E.ExportName << " "
+ << "@" << E.Ordinal;
+ if (auto *Def = dyn_cast_or_null<Defined>(E.Sym)) {
+ if (Def && Def->getChunk() &&
+ !(Def->getChunk()->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE))
+ OS << " DATA";
+ }
+ OS << "\n";
+ }
diff --git a/contrib/llvm/tools/lld/COFF/MinGW.h b/contrib/llvm/tools/lld/COFF/MinGW.h
new file mode 100644
index 000000000000..f9c5e3e5c2cc
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/MinGW.h
@@ -0,0 +1,44 @@
+//===- MinGW.h --------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "Symbols.h"
+#include "lld/Common/LLVM.h"
+namespace lld {
+namespace coff {
+// Logic for deciding what symbols to export, when exporting all
+// symbols for MinGW.
+class AutoExporter {
+ AutoExporter();
+ void initSymbolExcludes();
+ void addWholeArchive(StringRef Path);
+ llvm::StringSet<> ExcludeSymbols;
+ llvm::StringSet<> ExcludeSymbolPrefixes;
+ llvm::StringSet<> ExcludeSymbolSuffixes;
+ llvm::StringSet<> ExcludeLibs;
+ llvm::StringSet<> ExcludeObjects;
+ bool shouldExport(Defined *Sym) const;
+void writeDefFile(StringRef Name);
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Options.td b/contrib/llvm/tools/lld/COFF/Options.td
new file mode 100644
index 000000000000..acf1bc5c8b1d
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Options.td
@@ -0,0 +1,194 @@
+include "llvm/Option/OptParser.td"
+// link.exe accepts options starting with either a dash or a slash.
+// Flag that takes no arguments.
+class F<string name> : Flag<["/", "-", "-?"], name>;
+// Flag that takes one argument after ":".
+class P<string name, string help> :
+ Joined<["/", "-", "-?"], name#":">, HelpText<help>;
+// Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the
+// flag on and using it suffixed by ":no" turns it off.
+multiclass B<string name, string help_on, string help_off> {
+ def "" : F<name>, HelpText<help_on>;
+ def _no : F<name#":no">, HelpText<help_off>;
+def align : P<"align", "Section alignment">;
+def aligncomm : P<"aligncomm", "Set common symbol alignment">;
+def alternatename : P<"alternatename", "Define weak alias">;
+def base : P<"base", "Base address of the program">;
+def color_diagnostics: Flag<["--"], "color-diagnostics">,
+ HelpText<"Use colors in diagnostics">;
+def color_diagnostics_eq: Joined<["--"], "color-diagnostics=">,
+ HelpText<"Use colors in diagnostics; one of 'always', 'never', 'auto'">;
+def defaultlib : P<"defaultlib", "Add the library to the list of input files">;
+def delayload : P<"delayload", "Delay loaded DLL name">;
+def entry : P<"entry", "Name of entry point symbol">;
+def errorlimit : P<"errorlimit",
+ "Maximum number of errors to emit before stopping (0 = no limit)">;
+def export : P<"export", "Export a function">;
+// No help text because /failifmismatch is not intended to be used by the user.
+def failifmismatch : P<"failifmismatch", "">;
+def guard : P<"guard", "Control flow guard">;
+def heap : P<"heap", "Size of the heap">;
+def ignore : P<"ignore", "Specify warning codes to ignore">;
+def implib : P<"implib", "Import library name">;
+def lib : F<"lib">,
+ HelpText<"Act like lib.exe; must be first argument if present">;
+def libpath : P<"libpath", "Additional library search path">;
+def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">;
+def lldltocache : P<"lldltocache", "Path to ThinLTO cached object file directory">;
+def lldltocachepolicy : P<"lldltocachepolicy", "Pruning policy for the ThinLTO cache">;
+def lldsavetemps : F<"lldsavetemps">,
+ HelpText<"Save temporary files instead of deleting them">;
+def machine : P<"machine", "Specify target platform">;
+def merge : P<"merge", "Combine sections">;
+def mllvm : P<"mllvm", "Options to pass to LLVM">;
+def nodefaultlib : P<"nodefaultlib", "Remove a default library">;
+def opt : P<"opt", "Control optimizations">;
+def order : P<"order", "Put functions in order">;
+def out : P<"out", "Path to file to write output">;
+def natvis : P<"natvis", "Path to natvis file to embed in the PDB">;
+def no_color_diagnostics: F<"no-color-diagnostics">,
+ HelpText<"Do not use colors in diagnostics">;
+def pdb : P<"pdb", "PDB file path">;
+def pdbaltpath : P<"pdbaltpath", "PDB file path to embed in the image">;
+def section : P<"section", "Specify section attributes">;
+def stack : P<"stack", "Size of the stack">;
+def stub : P<"stub", "Specify DOS stub file">;
+def subsystem : P<"subsystem", "Specify subsystem">;
+def timestamp : P<"timestamp", "Specify the PE header timestamp">;
+def version : P<"version", "Specify a version number in the PE header">;
+def wholearchive_file : P<"wholearchive", "Include all object files from this archive">;
+def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>;
+def manifest : F<"manifest">, HelpText<"Create .manifest file">;
+def manifest_colon : P<
+ "manifest",
+ "NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image">;
+def manifestuac : P<"manifestuac", "User access control">;
+def manifestfile : P<"manifestfile", "Manifest output path, with /manifest">;
+def manifestdependency : P<
+ "manifestdependency",
+ "Attributes for <dependency> element in manifest file; implies /manifest">;
+def manifestinput : P<
+ "manifestinput",
+ "Additional manifest inputs; only valid with /manifest:embed">;
+// We cannot use multiclass P because class name "incl" is different
+// from its command line option name. We do this because "include" is
+// a reserved keyword in tablegen.
+def incl : Joined<["/", "-"], "include:">,
+ HelpText<"Force symbol to be added to symbol table as undefined one">;
+// "def" is also a keyword.
+def deffile : Joined<["/", "-"], "def:">,
+ HelpText<"Use module-definition file">;
+def debug : F<"debug">, HelpText<"Embed a symbol table in the image">;
+def debug_opt : P<"debug", "Embed a symbol table in the image with option">;
+def debugtype : P<"debugtype", "Debug Info Options">;
+def dll : F<"dll">, HelpText<"Create a DLL">;
+def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">;
+def nodefaultlib_all : F<"nodefaultlib">,
+ HelpText<"Remove all default libraries">;
+def noentry : F<"noentry">,
+ HelpText<"Don't add reference to DllMainCRTStartup; only valid with /dll">;
+def profile : F<"profile">;
+def repro : F<"Brepro">,
+ HelpText<"Use a hash of the executable as the PE header timestamp">;
+def swaprun_cd : F<"swaprun:cd">;
+def swaprun_net : F<"swaprun:net">;
+def verbose : F<"verbose">;
+def wholearchive_flag : F<"wholearchive">;
+def force : F<"force">,
+ HelpText<"Allow undefined and multiply defined symbols when creating executables">;
+def force_unresolved : F<"force:unresolved">,
+ HelpText<"Allow undefined symbols when creating executables">;
+def force_multiple : F<"force:multiple">,
+ HelpText<"Allow multiply defined symbols when creating executables">;
+defm WX : B<"WX", "Treat warnings as errors", "Don't treat warnings as errors">;
+defm allowbind : B<"allowbind", "Enable DLL binding (default)",
+ "Disable DLL binding">;
+defm allowisolation : B<"allowisolation", "Enable DLL isolation (default)",
+ "Disable DLL isolation">;
+defm appcontainer : B<"appcontainer",
+ "Image can only be run in an app container",
+ "Image can run outside an app container (default)">;
+defm dynamicbase : B<"dynamicbase", "Enable ASLR (default unless /fixed)",
+ "Disable ASLR (default when /fixed)">;
+defm fixed : B<"fixed", "Disable base relocations",
+ "Enable base relocations (default)">;
+defm highentropyva : B<"highentropyva",
+ "Enable 64-bit ASLR (default on 64-bit)",
+ "Disable 64-bit ASLR">;
+defm incremental : B<"incremental",
+ "Keep original import library if contents are unchanged",
+ "Overwrite import library even if contents are unchanged">;
+defm integritycheck : B<"integritycheck",
+ "Set FORCE_INTEGRITY bit in PE header",
+ "No effect (default)">;
+defm largeaddressaware : B<"largeaddressaware",
+ "Enable large addresses (default on 64-bit)",
+ "Disable large addresses (default on 32-bit)">;
+defm nxcompat : B<"nxcompat", "Enable data execution prevention (default)",
+ "Disable data execution provention">;
+defm safeseh : B<"safeseh",
+ "Produce an image with Safe Exception Handler (only for x86)",
+ "Don't produce an image with Safe Exception Handler">;
+defm tsaware : B<"tsaware",
+ "Create Terminal Server aware executable (default)",
+ "Create non-Terminal Server aware executable">;
+def help : F<"help">;
+def help_q : Flag<["/?", "-?"], "">, Alias<help>;
+// LLD extensions
+def export_all_symbols : F<"export-all-symbols">;
+def kill_at : F<"kill-at">;
+def lldmingw : F<"lldmingw">;
+def output_def : Joined<["/", "-"], "output-def:">;
+def pdb_source_path : P<"pdbsourcepath",
+ "Base path used to make relative source file path absolute in PDB">;
+def rsp_quoting : Joined<["--"], "rsp-quoting=">,
+ HelpText<"Quoting style for response files, 'windows' (default) or 'posix'">;
+def dash_dash_version : Flag<["--"], "version">,
+ HelpText<"Print version information">;
+// Flags for debugging
+def lldmap : F<"lldmap">;
+def lldmap_file : Joined<["/", "-"], "lldmap:">;
+def show_timing : F<"time">;
+// The flags below do nothing. They are defined only for link.exe compatibility.
+class QF<string name> : Joined<["/", "-", "-?"], name#":">;
+multiclass QB<string name> {
+ def "" : F<name>;
+ def _no : F<name#":no">;
+def functionpadmin : F<"functionpadmin">;
+def ignoreidl : F<"ignoreidl">;
+def nologo : F<"nologo">;
+def throwingnew : F<"throwingnew">;
+def editandcontinue : F<"editandcontinue">;
+def fastfail : F<"fastfail">;
+def delay : QF<"delay">;
+def errorreport : QF<"errorreport">;
+def idlout : QF<"idlout">;
+def maxilksize : QF<"maxilksize">;
+def tlbid : QF<"tlbid">;
+def tlbout : QF<"tlbout">;
+def verbose_all : QF<"verbose">;
+def guardsym : QF<"guardsym">;
diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp
new file mode 100644
index 000000000000..7757b89e2b36
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/PDB.cpp
@@ -0,0 +1,1771 @@
+//===- PDB.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "PDB.h"
+#include "Chunks.h"
+#include "Config.h"
+#include "Driver.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "Writer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Timer.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/RecordName.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
+#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/CVDebugRecord.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/JamCRC.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <memory>
+using namespace lld;
+using namespace lld::coff;
+using namespace llvm;
+using namespace llvm::codeview;
+using llvm::object::coff_section;
+static ExitOnError ExitOnErr;
+static Timer TotalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root());
+static Timer AddObjectsTimer("Add Objects", TotalPdbLinkTimer);
+static Timer TypeMergingTimer("Type Merging", AddObjectsTimer);
+static Timer SymbolMergingTimer("Symbol Merging", AddObjectsTimer);
+static Timer GlobalsLayoutTimer("Globals Stream Layout", TotalPdbLinkTimer);
+static Timer TpiStreamLayoutTimer("TPI Stream Layout", TotalPdbLinkTimer);
+static Timer DiskCommitTimer("Commit to Disk", TotalPdbLinkTimer);
+namespace {
+/// Map from type index and item index in a type server PDB to the
+/// corresponding index in the destination PDB.
+struct CVIndexMap {
+ SmallVector<TypeIndex, 0> TPIMap;
+ SmallVector<TypeIndex, 0> IPIMap;
+ bool IsTypeServerMap = false;
+ bool IsPrecompiledTypeMap = false;
+class DebugSHandler;
+class PDBLinker {
+ friend DebugSHandler;
+ PDBLinker(SymbolTable *Symtab)
+ : Alloc(), Symtab(Symtab), Builder(Alloc), TypeTable(Alloc),
+ IDTable(Alloc), GlobalTypeTable(Alloc), GlobalIDTable(Alloc) {
+ // This isn't strictly necessary, but link.exe usually puts an empty string
+ // as the first "valid" string in the string table, so we do the same in
+ // order to maintain as much byte-for-byte compatibility as possible.
+ PDBStrTab.insert("");
+ }
+ /// Emit the basic PDB structure: initial streams, headers, etc.
+ void initialize(llvm::codeview::DebugInfo *BuildId);
+ /// Add natvis files specified on the command line.
+ void addNatvisFiles();
+ /// Link CodeView from each object file in the symbol table into the PDB.
+ void addObjectsToPDB();
+ /// Link CodeView from a single object file into the target (output) PDB.
+ /// When a precompiled headers object is linked, its TPI map might be provided
+ /// externally.
+ void addObjFile(ObjFile *File, CVIndexMap *ExternIndexMap = nullptr);
+ /// Produce a mapping from the type and item indices used in the object
+ /// file to those in the destination PDB.
+ ///
+ /// If the object file uses a type server PDB (compiled with /Zi), merge TPI
+ /// and IPI from the type server PDB and return a map for it. Each unique type
+ /// server PDB is merged at most once, so this may return an existing index
+ /// mapping.
+ ///
+ /// If the object does not use a type server PDB (compiled with /Z7), we merge
+ /// all the type and item records from the .debug$S stream and fill in the
+ /// caller-provided ObjectIndexMap.
+ Expected<const CVIndexMap &> mergeDebugT(ObjFile *File,
+ CVIndexMap *ObjectIndexMap);
+ /// Reads and makes available a PDB.
+ Expected<const CVIndexMap &> maybeMergeTypeServerPDB(ObjFile *File,
+ const CVType &FirstType);
+ /// Merges a precompiled headers TPI map into the current TPI map. The
+ /// precompiled headers object will also be loaded and remapped in the
+ /// process.
+ Expected<const CVIndexMap &>
+ mergeInPrecompHeaderObj(ObjFile *File, const CVType &FirstType,
+ CVIndexMap *ObjectIndexMap);
+ /// Reads and makes available a precompiled headers object.
+ ///
+ /// This is a requirement for objects compiled with cl.exe /Yu. In that
+ /// case, the referenced object (which was compiled with /Yc) has to be loaded
+ /// first. This is mainly because the current object's TPI stream has external
+ /// references to the precompiled headers object.
+ ///
+ /// If the precompiled headers object was already loaded, this function will
+ /// simply return its (remapped) TPI map.
+ Expected<const CVIndexMap &> aquirePrecompObj(ObjFile *File,
+ PrecompRecord Precomp);
+ /// Adds a precompiled headers object signature -> TPI mapping.
+ std::pair<CVIndexMap &, bool /*already there*/>
+ registerPrecompiledHeaders(uint32_t Signature);
+ void mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap,
+ std::vector<ulittle32_t *> &StringTableRefs,
+ BinaryStreamRef SymData);
+ /// Add the section map and section contributions to the PDB.
+ void addSections(ArrayRef<OutputSection *> OutputSections,
+ ArrayRef<uint8_t> SectionTable);
+ /// Get the type table or the global type table if /DEBUG:GHASH is enabled.
+ TypeCollection &getTypeTable() {
+ if (Config->DebugGHashes)
+ return GlobalTypeTable;
+ return TypeTable;
+ }
+ /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled.
+ TypeCollection &getIDTable() {
+ if (Config->DebugGHashes)
+ return GlobalIDTable;
+ return IDTable;
+ }
+ /// Write the PDB to disk and store the Guid generated for it in *Guid.
+ void commit(codeview::GUID *Guid);
+ BumpPtrAllocator Alloc;
+ SymbolTable *Symtab;
+ pdb::PDBFileBuilder Builder;
+ /// Type records that will go into the PDB TPI stream.
+ MergingTypeTableBuilder TypeTable;
+ /// Item records that will go into the PDB IPI stream.
+ MergingTypeTableBuilder IDTable;
+ /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH)
+ GlobalTypeTableBuilder GlobalTypeTable;
+ /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH)
+ GlobalTypeTableBuilder GlobalIDTable;
+ /// PDBs use a single global string table for filenames in the file checksum
+ /// table.
+ DebugStringTableSubsection PDBStrTab;
+ llvm::SmallString<128> NativePath;
+ /// A list of other PDBs which are loaded during the linking process and which
+ /// we need to keep around since the linking operation may reference pointers
+ /// inside of these PDBs.
+ llvm::SmallVector<std::unique_ptr<pdb::NativeSession>, 2> LoadedPDBs;
+ std::vector<pdb::SecMapEntry> SectionMap;
+ /// Type index mappings of type server PDBs that we've loaded so far.
+ std::map<codeview::GUID, CVIndexMap> TypeServerIndexMappings;
+ /// Type index mappings of precompiled objects type map that we've loaded so
+ /// far.
+ std::map<uint32_t, CVIndexMap> PrecompTypeIndexMappings;
+ /// List of TypeServer PDBs which cannot be loaded.
+ /// Cached to prevent repeated load attempts.
+ std::map<codeview::GUID, std::string> MissingTypeServerPDBs;
+class DebugSHandler {
+ PDBLinker &Linker;
+ /// The object file whose .debug$S sections we're processing.
+ ObjFile &File;
+ /// The result of merging type indices.
+ const CVIndexMap &IndexMap;
+ /// The DEBUG_S_STRINGTABLE subsection. These strings are referred to by
+ /// index from other records in the .debug$S section. All of these strings
+ /// need to be added to the global PDB string table, and all references to
+ /// these strings need to have their indices re-written to refer to the
+ /// global PDB string table.
+ DebugStringTableSubsectionRef CVStrTab;
+ /// The DEBUG_S_FILECHKSMS subsection. As above, these are referred to
+ /// by other records in the .debug$S section and need to be merged into the
+ /// PDB.
+ DebugChecksumsSubsectionRef Checksums;
+ /// The DEBUG_S_FRAMEDATA subsection(s). There can be more than one of
+ /// these and they need not appear in any specific order. However, they
+ /// contain string table references which need to be re-written, so we
+ /// collect them all here and re-write them after all subsections have been
+ /// discovered and processed.
+ std::vector<DebugFrameDataSubsectionRef> NewFpoFrames;
+ /// Pointers to raw memory that we determine have string table references
+ /// that need to be re-written. We first process all .debug$S subsections
+ /// to ensure that we can handle subsections written in any order, building
+ /// up this list as we go. At the end, we use the string table (which must
+ /// have been discovered by now else it is an error) to re-write these
+ /// references.
+ std::vector<ulittle32_t *> StringTableReferences;
+ DebugSHandler(PDBLinker &Linker, ObjFile &File, const CVIndexMap &IndexMap)
+ : Linker(Linker), File(File), IndexMap(IndexMap) {}
+ void handleDebugS(lld::coff::SectionChunk &DebugS);
+ void finish();
+// Visual Studio's debugger requires absolute paths in various places in the
+// PDB to work without additional configuration:
+// https://docs.microsoft.com/en-us/visualstudio/debugger/debug-source-files-common-properties-solution-property-pages-dialog-box
+static void pdbMakeAbsolute(SmallVectorImpl<char> &FileName) {
+ // The default behavior is to produce paths that are valid within the context
+ // of the machine that you perform the link on. If the linker is running on
+ // a POSIX system, we will output absolute POSIX paths. If the linker is
+ // running on a Windows system, we will output absolute Windows paths. If the
+ // user desires any other kind of behavior, they should explicitly pass
+ // /pdbsourcepath, in which case we will treat the exact string the user
+ // passed in as the gospel and not normalize, canonicalize it.
+ if (sys::path::is_absolute(FileName, sys::path::Style::windows) ||
+ sys::path::is_absolute(FileName, sys::path::Style::posix))
+ return;
+ // It's not absolute in any path syntax. Relative paths necessarily refer to
+ // the local file system, so we can make it native without ending up with a
+ // nonsensical path.
+ if (Config->PDBSourcePath.empty()) {
+ sys::path::native(FileName);
+ sys::fs::make_absolute(FileName);
+ return;
+ }
+ // Try to guess whether /PDBSOURCEPATH is a unix path or a windows path.
+ // Since PDB's are more of a Windows thing, we make this conservative and only
+ // decide that it's a unix path if we're fairly certain. Specifically, if
+ // it starts with a forward slash.
+ SmallString<128> AbsoluteFileName = Config->PDBSourcePath;
+ sys::path::Style GuessedStyle = AbsoluteFileName.startswith("/")
+ ? sys::path::Style::posix
+ : sys::path::Style::windows;
+ sys::path::append(AbsoluteFileName, GuessedStyle, FileName);
+ sys::path::native(AbsoluteFileName, GuessedStyle);
+ sys::path::remove_dots(AbsoluteFileName, true, GuessedStyle);
+ FileName = std::move(AbsoluteFileName);
+static SectionChunk *findByName(ArrayRef<SectionChunk *> Sections,
+ StringRef Name) {
+ for (SectionChunk *C : Sections)
+ if (C->getSectionName() == Name)
+ return C;
+ return nullptr;
+static ArrayRef<uint8_t> consumeDebugMagic(ArrayRef<uint8_t> Data,
+ StringRef SecName) {
+ // First 4 bytes are section magic.
+ if (Data.size() < 4)
+ fatal(SecName + " too short");
+ if (support::endian::read32le(Data.data()) != COFF::DEBUG_SECTION_MAGIC)
+ fatal(SecName + " has an invalid magic");
+ return Data.slice(4);
+static ArrayRef<uint8_t> getDebugSection(ObjFile *File, StringRef SecName) {
+ if (SectionChunk *Sec = findByName(File->getDebugChunks(), SecName))
+ return consumeDebugMagic(Sec->getContents(), SecName);
+ return {};
+// A COFF .debug$H section is currently a clang extension. This function checks
+// if a .debug$H section is in a format that we expect / understand, so that we
+// can ignore any sections which are coincidentally also named .debug$H but do
+// not contain a format we recognize.
+static bool canUseDebugH(ArrayRef<uint8_t> DebugH) {
+ if (DebugH.size() < sizeof(object::debug_h_header))
+ return false;
+ auto *Header =
+ reinterpret_cast<const object::debug_h_header *>(DebugH.data());
+ DebugH = DebugH.drop_front(sizeof(object::debug_h_header));
+ return Header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
+ Header->Version == 0 &&
+ Header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
+ (DebugH.size() % 8 == 0);
+static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *File) {
+ SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$H");
+ if (!Sec)
+ return llvm::None;
+ ArrayRef<uint8_t> Contents = Sec->getContents();
+ if (!canUseDebugH(Contents))
+ return None;
+ return Contents;
+static ArrayRef<GloballyHashedType>
+getHashesFromDebugH(ArrayRef<uint8_t> DebugH) {
+ assert(canUseDebugH(DebugH));
+ DebugH = DebugH.drop_front(sizeof(object::debug_h_header));
+ uint32_t Count = DebugH.size() / sizeof(GloballyHashedType);
+ return {reinterpret_cast<const GloballyHashedType *>(DebugH.data()), Count};
+static void addTypeInfo(pdb::TpiStreamBuilder &TpiBuilder,
+ TypeCollection &TypeTable) {
+ // Start the TPI or IPI stream header.
+ TpiBuilder.setVersionHeader(pdb::PdbTpiV80);
+ // Flatten the in memory type table and hash each type.
+ TypeTable.ForEachRecord([&](TypeIndex TI, const CVType &Type) {
+ auto Hash = pdb::hashTypeRecord(Type);
+ if (auto E = Hash.takeError())
+ fatal("type hashing error");
+ TpiBuilder.addTypeRecord(Type.RecordData, *Hash);
+ });
+// OBJs usually start their symbol stream with a S_OBJNAME record. This record
+// also contains the signature/key of the current PCH session. The signature
+// must be same for all objects which depend on the precompiled object.
+// Recompiling the precompiled headers will generate a new PCH key and thus
+// invalidate all the dependent objects.
+static uint32_t extractPCHSignature(ObjFile *File) {
+ auto DbgIt = find_if(File->getDebugChunks(), [](SectionChunk *C) {
+ return C->getSectionName() == ".debug$S";
+ });
+ if (!DbgIt)
+ return 0;
+ ArrayRef<uint8_t> Contents =
+ consumeDebugMagic((*DbgIt)->getContents(), ".debug$S");
+ DebugSubsectionArray Subsections;
+ BinaryStreamReader Reader(Contents, support::little);
+ ExitOnErr(Reader.readArray(Subsections, Contents.size()));
+ for (const DebugSubsectionRecord &SS : Subsections) {
+ if (SS.kind() != DebugSubsectionKind::Symbols)
+ continue;
+ // If it's there, the S_OBJNAME record shall come first in the stream.
+ Expected<CVSymbol> Sym = readSymbolFromStream(SS.getRecordData(), 0);
+ if (!Sym) {
+ consumeError(Sym.takeError());
+ continue;
+ }
+ if (auto ObjName = SymbolDeserializer::deserializeAs<ObjNameSym>(Sym.get()))
+ return ObjName->Signature;
+ }
+ return 0;
+Expected<const CVIndexMap &>
+PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) {
+ ScopedTimer T(TypeMergingTimer);
+ bool IsPrecompiledHeader = false;
+ ArrayRef<uint8_t> Data = getDebugSection(File, ".debug$T");
+ if (Data.empty()) {
+ // Try again, Microsoft precompiled headers use .debug$P instead of
+ // .debug$T
+ Data = getDebugSection(File, ".debug$P");
+ IsPrecompiledHeader = true;
+ }
+ if (Data.empty())
+ return *ObjectIndexMap; // no debug info
+ // Precompiled headers objects need to save the index map for further
+ // reference by other objects which use the precompiled headers.
+ if (IsPrecompiledHeader) {
+ uint32_t PCHSignature = extractPCHSignature(File);
+ if (PCHSignature == 0)
+ fatal("No signature found for the precompiled headers OBJ (" +
+ File->getName() + ")");
+ // When a precompiled headers object comes first on the command-line, we
+ // update the mapping here. Otherwise, if an object referencing the
+ // precompiled headers object comes first, the mapping is created in
+ // aquirePrecompObj(), thus we would skip this block.
+ if (!ObjectIndexMap->IsPrecompiledTypeMap) {
+ auto R = registerPrecompiledHeaders(PCHSignature);
+ if (R.second)
+ fatal(
+ "A precompiled headers OBJ with the same signature was already "
+ "provided! (" +
+ File->getName() + ")");
+ ObjectIndexMap = &R.first;
+ }
+ }
+ BinaryByteStream Stream(Data, support::little);
+ CVTypeArray Types;
+ BinaryStreamReader Reader(Stream);
+ if (auto EC = Reader.readArray(Types, Reader.getLength()))
+ fatal("Reader::readArray failed: " + toString(std::move(EC)));
+ auto FirstType = Types.begin();
+ if (FirstType == Types.end())
+ return *ObjectIndexMap;
+ if (FirstType->kind() == LF_TYPESERVER2) {
+ // Look through type servers. If we've already seen this type server,
+ // don't merge any type information.
+ return maybeMergeTypeServerPDB(File, *FirstType);
+ } else if (FirstType->kind() == LF_PRECOMP) {
+ // This object was compiled with /Yu, so process the corresponding
+ // precompiled headers object (/Yc) first. Some type indices in the current
+ // object are referencing data in the precompiled headers object, so we need
+ // both to be loaded.
+ auto E = mergeInPrecompHeaderObj(File, *FirstType, ObjectIndexMap);
+ if (!E)
+ return E.takeError();
+ // Drop LF_PRECOMP record from the input stream, as it needs to be replaced
+ // with the precompiled headers object type stream.
+ // Note that we can't just call Types.drop_front(), as we explicitly want to
+ // rebase the stream.
+ Types.setUnderlyingStream(
+ Types.getUnderlyingStream().drop_front(FirstType->RecordData.size()));
+ }
+ // Fill in the temporary, caller-provided ObjectIndexMap.
+ if (Config->DebugGHashes) {
+ ArrayRef<GloballyHashedType> Hashes;
+ std::vector<GloballyHashedType> OwnedHashes;
+ if (Optional<ArrayRef<uint8_t>> DebugH = getDebugH(File))
+ Hashes = getHashesFromDebugH(*DebugH);
+ else {
+ OwnedHashes = GloballyHashedType::hashTypes(Types);
+ Hashes = OwnedHashes;
+ }
+ if (auto Err = mergeTypeAndIdRecords(GlobalIDTable, GlobalTypeTable,
+ ObjectIndexMap->TPIMap, Types, Hashes,
+ File->PCHSignature))
+ fatal("codeview::mergeTypeAndIdRecords failed: " +
+ toString(std::move(Err)));
+ } else {
+ if (auto Err =
+ mergeTypeAndIdRecords(IDTable, TypeTable, ObjectIndexMap->TPIMap,
+ Types, File->PCHSignature))
+ fatal("codeview::mergeTypeAndIdRecords failed: " +
+ toString(std::move(Err)));
+ }
+ return *ObjectIndexMap;
+static Expected<std::unique_ptr<pdb::NativeSession>>
+tryToLoadPDB(const codeview::GUID &GuidFromObj, StringRef TSPath) {
+ // Ensure the file exists before anything else. We want to return ENOENT,
+ // "file not found", even if the path points to a removable device (in which
+ // case the return message would be EAGAIN, "resource unavailable try again")
+ if (!llvm::sys::fs::exists(TSPath))
+ return errorCodeToError(std::error_code(ENOENT, std::generic_category()));
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(
+ TSPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
+ if (!MBOrErr)
+ return errorCodeToError(MBOrErr.getError());
+ std::unique_ptr<pdb::IPDBSession> ThisSession;
+ if (auto EC = pdb::NativeSession::createFromPdb(
+ MemoryBuffer::getMemBuffer(Driver->takeBuffer(std::move(*MBOrErr)),
+ /*RequiresNullTerminator=*/false),
+ ThisSession))
+ return std::move(EC);
+ std::unique_ptr<pdb::NativeSession> NS(
+ static_cast<pdb::NativeSession *>(ThisSession.release()));
+ pdb::PDBFile &File = NS->getPDBFile();
+ auto ExpectedInfo = File.getPDBInfoStream();
+ // All PDB Files should have an Info stream.
+ if (!ExpectedInfo)
+ return ExpectedInfo.takeError();
+ // Just because a file with a matching name was found and it was an actual
+ // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
+ // must match the GUID specified in the TypeServer2 record.
+ if (ExpectedInfo->getGuid() != GuidFromObj)
+ return make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date);
+ return std::move(NS);
+Expected<const CVIndexMap &>
+PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, const CVType &FirstType) {
+ TypeServer2Record TS;
+ if (auto EC =
+ TypeDeserializer::deserializeAs(const_cast<CVType &>(FirstType), TS))
+ fatal("error reading record: " + toString(std::move(EC)));
+ const codeview::GUID &TSId = TS.getGuid();
+ StringRef TSPath = TS.getName();
+ // First, check if the PDB has previously failed to load.
+ auto PrevErr = MissingTypeServerPDBs.find(TSId);
+ if (PrevErr != MissingTypeServerPDBs.end())
+ return createFileError(
+ TSPath,
+ make_error<StringError>(PrevErr->second, inconvertibleErrorCode()));
+ // Second, check if we already loaded a PDB with this GUID. Return the type
+ // index mapping if we have it.
+ auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()});
+ CVIndexMap &IndexMap = Insertion.first->second;
+ if (!Insertion.second)
+ return IndexMap;
+ // Mark this map as a type server map.
+ IndexMap.IsTypeServerMap = true;
+ // Check for a PDB at:
+ // 1. The given file path
+ // 2. Next to the object file or archive file
+ auto ExpectedSession = handleExpected(
+ tryToLoadPDB(TSId, TSPath),
+ [&]() {
+ StringRef LocalPath =
+ !File->ParentName.empty() ? File->ParentName : File->getName();
+ SmallString<128> Path = sys::path::parent_path(LocalPath);
+ // Currently, type server PDBs are only created by cl, which only runs
+ // on Windows, so we can assume type server paths are Windows style.
+ sys::path::append(
+ Path, sys::path::filename(TSPath, sys::path::Style::windows));
+ return tryToLoadPDB(TSId, Path);
+ },
+ [&](std::unique_ptr<ECError> EC) -> Error {
+ auto SysErr = EC->convertToErrorCode();
+ // Only re-try loading if the previous error was "No such file or
+ // directory"
+ if (SysErr.category() == std::generic_category() &&
+ SysErr.value() == ENOENT)
+ return Error::success();
+ return Error(std::move(EC));
+ });
+ if (auto E = ExpectedSession.takeError()) {
+ TypeServerIndexMappings.erase(TSId);
+ // Flatten the error to a string, for later display, if the error occurs
+ // again on the same PDB.
+ std::string ErrMsg;
+ raw_string_ostream S(ErrMsg);
+ S << E;
+ MissingTypeServerPDBs.emplace(TSId, S.str());
+ return createFileError(TSPath, std::move(E));
+ }
+ pdb::NativeSession *Session = ExpectedSession->get();
+ // Keep a strong reference to this PDB, so that it's safe to hold pointers
+ // into the file.
+ LoadedPDBs.push_back(std::move(*ExpectedSession));
+ auto ExpectedTpi = Session->getPDBFile().getPDBTpiStream();
+ if (auto E = ExpectedTpi.takeError())
+ fatal("Type server does not have TPI stream: " + toString(std::move(E)));
+ auto ExpectedIpi = Session->getPDBFile().getPDBIpiStream();
+ if (auto E = ExpectedIpi.takeError())
+ fatal("Type server does not have TPI stream: " + toString(std::move(E)));
+ if (Config->DebugGHashes) {
+ // PDBs do not actually store global hashes, so when merging a type server
+ // PDB we have to synthesize global hashes. To do this, we first synthesize
+ // global hashes for the TPI stream, since it is independent, then we
+ // synthesize hashes for the IPI stream, using the hashes for the TPI stream
+ // as inputs.
+ auto TpiHashes = GloballyHashedType::hashTypes(ExpectedTpi->typeArray());
+ auto IpiHashes =
+ GloballyHashedType::hashIds(ExpectedIpi->typeArray(), TpiHashes);
+ Optional<uint32_t> EndPrecomp;
+ // Merge TPI first, because the IPI stream will reference type indices.
+ if (auto Err = mergeTypeRecords(GlobalTypeTable, IndexMap.TPIMap,
+ ExpectedTpi->typeArray(), TpiHashes, EndPrecomp))
+ fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err)));
+ // Merge IPI.
+ if (auto Err =
+ mergeIdRecords(GlobalIDTable, IndexMap.TPIMap, IndexMap.IPIMap,
+ ExpectedIpi->typeArray(), IpiHashes))
+ fatal("codeview::mergeIdRecords failed: " + toString(std::move(Err)));
+ } else {
+ // Merge TPI first, because the IPI stream will reference type indices.
+ if (auto Err = mergeTypeRecords(TypeTable, IndexMap.TPIMap,
+ ExpectedTpi->typeArray()))
+ fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err)));
+ // Merge IPI.
+ if (auto Err = mergeIdRecords(IDTable, IndexMap.TPIMap, IndexMap.IPIMap,
+ ExpectedIpi->typeArray()))
+ fatal("codeview::mergeIdRecords failed: " + toString(std::move(Err)));
+ }
+ return IndexMap;
+Expected<const CVIndexMap &>
+PDBLinker::mergeInPrecompHeaderObj(ObjFile *File, const CVType &FirstType,
+ CVIndexMap *ObjectIndexMap) {
+ PrecompRecord Precomp;
+ if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(FirstType),
+ Precomp))
+ fatal("error reading record: " + toString(std::move(EC)));
+ auto E = aquirePrecompObj(File, Precomp);
+ if (!E)
+ return E.takeError();
+ const CVIndexMap &PrecompIndexMap = *E;
+ assert(PrecompIndexMap.IsPrecompiledTypeMap);
+ if (PrecompIndexMap.TPIMap.empty())
+ return PrecompIndexMap;
+ assert(Precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex);
+ assert(Precomp.getTypesCount() <= PrecompIndexMap.TPIMap.size());
+ // Use the previously remapped index map from the precompiled headers.
+ ObjectIndexMap->TPIMap.append(PrecompIndexMap.TPIMap.begin(),
+ PrecompIndexMap.TPIMap.begin() +
+ Precomp.getTypesCount());
+ return *ObjectIndexMap;
+static bool equals_path(StringRef path1, StringRef path2) {
+#if defined(_WIN32)
+ return path1.equals_lower(path2);
+ return path1.equals(path2);
+// Find by name an OBJ provided on the command line
+static ObjFile *findObjByName(StringRef FileNameOnly) {
+ SmallString<128> CurrentPath;
+ for (ObjFile *F : ObjFile::Instances) {
+ StringRef CurrentFileName = sys::path::filename(F->getName());
+ // Compare based solely on the file name (link.exe behavior)
+ if (equals_path(CurrentFileName, FileNameOnly))
+ return F;
+ }
+ return nullptr;
+std::pair<CVIndexMap &, bool /*already there*/>
+PDBLinker::registerPrecompiledHeaders(uint32_t Signature) {
+ auto Insertion = PrecompTypeIndexMappings.insert({Signature, CVIndexMap()});
+ CVIndexMap &IndexMap = Insertion.first->second;
+ if (!Insertion.second)
+ return {IndexMap, true};
+ // Mark this map as a precompiled types map.
+ IndexMap.IsPrecompiledTypeMap = true;
+ return {IndexMap, false};
+Expected<const CVIndexMap &>
+PDBLinker::aquirePrecompObj(ObjFile *File, PrecompRecord Precomp) {
+ // First, check if we already loaded the precompiled headers object with this
+ // signature. Return the type index mapping if we've already seen it.
+ auto R = registerPrecompiledHeaders(Precomp.getSignature());
+ if (R.second)
+ return R.first;
+ CVIndexMap &IndexMap = R.first;
+ // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
+ // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
+ // the paths embedded in the OBJs are in the Windows format.
+ SmallString<128> PrecompFileName = sys::path::filename(
+ Precomp.getPrecompFilePath(), sys::path::Style::windows);
+ // link.exe requires that a precompiled headers object must always be provided
+ // on the command-line, even if that's not necessary.
+ auto PrecompFile = findObjByName(PrecompFileName);
+ if (!PrecompFile)
+ return createFileError(
+ PrecompFileName.str(),
+ make_error<pdb::PDBError>(pdb::pdb_error_code::external_cmdline_ref));
+ addObjFile(PrecompFile, &IndexMap);
+ if (!PrecompFile->PCHSignature)
+ fatal(PrecompFile->getName() + " is not a precompiled headers object");
+ if (Precomp.getSignature() != PrecompFile->PCHSignature.getValueOr(0))
+ return createFileError(
+ Precomp.getPrecompFilePath().str(),
+ make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
+ return IndexMap;
+static bool remapTypeIndex(TypeIndex &TI, ArrayRef<TypeIndex> TypeIndexMap) {
+ if (TI.isSimple())
+ return true;
+ if (TI.toArrayIndex() >= TypeIndexMap.size())
+ return false;
+ TI = TypeIndexMap[TI.toArrayIndex()];
+ return true;
+static void remapTypesInSymbolRecord(ObjFile *File, SymbolKind SymKind,
+ MutableArrayRef<uint8_t> RecordBytes,
+ const CVIndexMap &IndexMap,
+ ArrayRef<TiReference> TypeRefs) {
+ MutableArrayRef<uint8_t> Contents =
+ RecordBytes.drop_front(sizeof(RecordPrefix));
+ for (const TiReference &Ref : TypeRefs) {
+ unsigned ByteSize = Ref.Count * sizeof(TypeIndex);
+ if (Contents.size() < Ref.Offset + ByteSize)
+ fatal("symbol record too short");
+ // This can be an item index or a type index. Choose the appropriate map.
+ ArrayRef<TypeIndex> TypeOrItemMap = IndexMap.TPIMap;
+ bool IsItemIndex = Ref.Kind == TiRefKind::IndexRef;
+ if (IsItemIndex && IndexMap.IsTypeServerMap)
+ TypeOrItemMap = IndexMap.IPIMap;
+ MutableArrayRef<TypeIndex> TIs(
+ reinterpret_cast<TypeIndex *>(Contents.data() + Ref.Offset), Ref.Count);
+ for (TypeIndex &TI : TIs) {
+ if (!remapTypeIndex(TI, TypeOrItemMap)) {
+ log("ignoring symbol record of kind 0x" + utohexstr(SymKind) + " in " +
+ File->getName() + " with bad " + (IsItemIndex ? "item" : "type") +
+ " index 0x" + utohexstr(TI.getIndex()));
+ TI = TypeIndex(SimpleTypeKind::NotTranslated);
+ continue;
+ }
+ }
+ }
+static void
+recordStringTableReferenceAtOffset(MutableArrayRef<uint8_t> Contents,
+ uint32_t Offset,
+ std::vector<ulittle32_t *> &StrTableRefs) {
+ Contents =
+ Contents.drop_front(Offset).take_front(sizeof(support::ulittle32_t));
+ ulittle32_t *Index = reinterpret_cast<ulittle32_t *>(Contents.data());
+ StrTableRefs.push_back(Index);
+static void
+recordStringTableReferences(SymbolKind Kind, MutableArrayRef<uint8_t> Contents,
+ std::vector<ulittle32_t *> &StrTableRefs) {
+ // For now we only handle S_FILESTATIC, but we may need the same logic for
+ // S_DEFRANGE and S_DEFRANGE_SUBFIELD. However, I cannot seem to generate any
+ // PDBs that contain these types of records, so because of the uncertainty
+ // they are omitted here until we can prove that it's necessary.
+ switch (Kind) {
+ case SymbolKind::S_FILESTATIC:
+ // FileStaticSym::ModFileOffset
+ recordStringTableReferenceAtOffset(Contents, 8, StrTableRefs);
+ break;
+ case SymbolKind::S_DEFRANGE:
+ case SymbolKind::S_DEFRANGE_SUBFIELD:
+ log("Not fixing up string table reference in S_DEFRANGE / "
+ break;
+ default:
+ break;
+ }
+static SymbolKind symbolKind(ArrayRef<uint8_t> RecordData) {
+ const RecordPrefix *Prefix =
+ reinterpret_cast<const RecordPrefix *>(RecordData.data());
+ return static_cast<SymbolKind>(uint16_t(Prefix->RecordKind));
+/// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32
+static void translateIdSymbols(MutableArrayRef<uint8_t> &RecordData,
+ TypeCollection &IDTable) {
+ RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(RecordData.data());
+ SymbolKind Kind = symbolKind(RecordData);
+ if (Kind == SymbolKind::S_PROC_ID_END) {
+ Prefix->RecordKind = SymbolKind::S_END;
+ return;
+ }
+ // In an object file, GPROC32_ID has an embedded reference which refers to the
+ // single object file type index namespace. This has already been translated
+ // to the PDB file's ID stream index space, but we need to convert this to a
+ // symbol that refers to the type stream index space. So we remap again from
+ // ID index space to type index space.
+ if (Kind == SymbolKind::S_GPROC32_ID || Kind == SymbolKind::S_LPROC32_ID) {
+ SmallVector<TiReference, 1> Refs;
+ auto Content = RecordData.drop_front(sizeof(RecordPrefix));
+ CVSymbol Sym(Kind, RecordData);
+ discoverTypeIndicesInSymbol(Sym, Refs);
+ assert(Refs.size() == 1);
+ assert(Refs.front().Count == 1);
+ TypeIndex *TI =
+ reinterpret_cast<TypeIndex *>(Content.data() + Refs[0].Offset);
+ // `TI` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
+ // the IPI stream, whose `FunctionType` member refers to the TPI stream.
+ // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
+ // in both cases we just need the second type index.
+ if (!TI->isSimple() && !TI->isNoneType()) {
+ CVType FuncIdData = IDTable.getType(*TI);
+ SmallVector<TypeIndex, 2> Indices;
+ discoverTypeIndices(FuncIdData, Indices);
+ assert(Indices.size() == 2);
+ *TI = Indices[1];
+ }
+ Kind = (Kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32
+ : SymbolKind::S_LPROC32;
+ Prefix->RecordKind = uint16_t(Kind);
+ }
+/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned.
+/// The object file may not be aligned.
+static MutableArrayRef<uint8_t>
+copyAndAlignSymbol(const CVSymbol &Sym, MutableArrayRef<uint8_t> &AlignedMem) {
+ size_t Size = alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb));
+ assert(Size >= 4 && "record too short");
+ assert(Size <= MaxRecordLength && "record too long");
+ assert(AlignedMem.size() >= Size && "didn't preallocate enough");
+ // Copy the symbol record and zero out any padding bytes.
+ MutableArrayRef<uint8_t> NewData = AlignedMem.take_front(Size);
+ AlignedMem = AlignedMem.drop_front(Size);
+ memcpy(NewData.data(), Sym.data().data(), Sym.length());
+ memset(NewData.data() + Sym.length(), 0, Size - Sym.length());
+ // Update the record prefix length. It should point to the beginning of the
+ // next record.
+ auto *Prefix = reinterpret_cast<RecordPrefix *>(NewData.data());
+ Prefix->RecordLen = Size - 2;
+ return NewData;
+struct ScopeRecord {
+ ulittle32_t PtrParent;
+ ulittle32_t PtrEnd;
+struct SymbolScope {
+ ScopeRecord *OpeningRecord;
+ uint32_t ScopeOffset;
+static void scopeStackOpen(SmallVectorImpl<SymbolScope> &Stack,
+ uint32_t CurOffset, CVSymbol &Sym) {
+ assert(symbolOpensScope(Sym.kind()));
+ SymbolScope S;
+ S.ScopeOffset = CurOffset;
+ S.OpeningRecord = const_cast<ScopeRecord *>(
+ reinterpret_cast<const ScopeRecord *>(Sym.content().data()));
+ S.OpeningRecord->PtrParent = Stack.empty() ? 0 : Stack.back().ScopeOffset;
+ Stack.push_back(S);
+static void scopeStackClose(SmallVectorImpl<SymbolScope> &Stack,
+ uint32_t CurOffset, ObjFile *File) {
+ if (Stack.empty()) {
+ warn("symbol scopes are not balanced in " + File->getName());
+ return;
+ }
+ SymbolScope S = Stack.pop_back_val();
+ S.OpeningRecord->PtrEnd = CurOffset;
+static bool symbolGoesInModuleStream(const CVSymbol &Sym, bool IsGlobalScope) {
+ switch (Sym.kind()) {
+ case SymbolKind::S_GDATA32:
+ case SymbolKind::S_CONSTANT:
+ // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
+ // since they are synthesized by the linker in response to S_GPROC32 and
+ // S_LPROC32, but if we do see them, don't put them in the module stream I
+ // guess.
+ case SymbolKind::S_PROCREF:
+ case SymbolKind::S_LPROCREF:
+ return false;
+ // S_UDT records go in the module stream if it is not a global S_UDT.
+ case SymbolKind::S_UDT:
+ return !IsGlobalScope;
+ // S_GDATA32 does not go in the module stream, but S_LDATA32 does.
+ case SymbolKind::S_LDATA32:
+ default:
+ return true;
+ }
+static bool symbolGoesInGlobalsStream(const CVSymbol &Sym, bool IsGlobalScope) {
+ switch (Sym.kind()) {
+ case SymbolKind::S_CONSTANT:
+ case SymbolKind::S_GDATA32:
+ // S_LDATA32 goes in both the module stream and the globals stream.
+ case SymbolKind::S_LDATA32:
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32:
+ // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
+ // since they are synthesized by the linker in response to S_GPROC32 and
+ // S_LPROC32, but if we do see them, copy them straight through.
+ case SymbolKind::S_PROCREF:
+ case SymbolKind::S_LPROCREF:
+ return true;
+ // S_UDT records go in the globals stream if it is a global S_UDT.
+ case SymbolKind::S_UDT:
+ return IsGlobalScope;
+ default:
+ return false;
+ }
+static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, uint16_t ModIndex,
+ unsigned SymOffset, const CVSymbol &Sym) {
+ switch (Sym.kind()) {
+ case SymbolKind::S_CONSTANT:
+ case SymbolKind::S_UDT:
+ case SymbolKind::S_GDATA32:
+ case SymbolKind::S_LDATA32:
+ case SymbolKind::S_PROCREF:
+ case SymbolKind::S_LPROCREF:
+ Builder.addGlobalSymbol(Sym);
+ break;
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32: {
+ SymbolRecordKind K = SymbolRecordKind::ProcRefSym;
+ if (Sym.kind() == SymbolKind::S_LPROC32)
+ K = SymbolRecordKind::LocalProcRef;
+ ProcRefSym PS(K);
+ PS.Module = ModIndex;
+ // For some reason, MSVC seems to add one to this value.
+ ++PS.Module;
+ PS.Name = getSymbolName(Sym);
+ PS.SumName = 0;
+ PS.SymOffset = SymOffset;
+ Builder.addGlobalSymbol(PS);
+ break;
+ }
+ default:
+ llvm_unreachable("Invalid symbol kind!");
+ }
+void PDBLinker::mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap,
+ std::vector<ulittle32_t *> &StringTableRefs,
+ BinaryStreamRef SymData) {
+ ArrayRef<uint8_t> SymsBuffer;
+ cantFail(SymData.readBytes(0, SymData.getLength(), SymsBuffer));
+ SmallVector<SymbolScope, 4> Scopes;
+ // Iterate every symbol to check if any need to be realigned, and if so, how
+ // much space we need to allocate for them.
+ bool NeedsRealignment = false;
+ unsigned TotalRealignedSize = 0;
+ auto EC = forEachCodeViewRecord<CVSymbol>(
+ SymsBuffer, [&](CVSymbol Sym) -> llvm::Error {
+ unsigned RealignedSize =
+ alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb));
+ NeedsRealignment |= RealignedSize != Sym.length();
+ TotalRealignedSize += RealignedSize;
+ return Error::success();
+ });
+ // If any of the symbol record lengths was corrupt, ignore them all, warn
+ // about it, and move on.
+ if (EC) {
+ warn("corrupt symbol records in " + File->getName());
+ consumeError(std::move(EC));
+ return;
+ }
+ // If any symbol needed realignment, allocate enough contiguous memory for
+ // them all. Typically symbol subsections are small enough that this will not
+ // cause fragmentation.
+ MutableArrayRef<uint8_t> AlignedSymbolMem;
+ if (NeedsRealignment) {
+ void *AlignedData =
+ Alloc.Allocate(TotalRealignedSize, alignOf(CodeViewContainer::Pdb));
+ AlignedSymbolMem = makeMutableArrayRef(
+ reinterpret_cast<uint8_t *>(AlignedData), TotalRealignedSize);
+ }
+ // Iterate again, this time doing the real work.
+ unsigned CurSymOffset = File->ModuleDBI->getNextSymbolOffset();
+ ArrayRef<uint8_t> BulkSymbols;
+ cantFail(forEachCodeViewRecord<CVSymbol>(
+ SymsBuffer, [&](CVSymbol Sym) -> llvm::Error {
+ // Align the record if required.
+ MutableArrayRef<uint8_t> RecordBytes;
+ if (NeedsRealignment) {
+ RecordBytes = copyAndAlignSymbol(Sym, AlignedSymbolMem);
+ Sym = CVSymbol(Sym.kind(), RecordBytes);
+ } else {
+ // Otherwise, we can actually mutate the symbol directly, since we
+ // copied it to apply relocations.
+ RecordBytes = makeMutableArrayRef(
+ const_cast<uint8_t *>(Sym.data().data()), Sym.length());
+ }
+ // Discover type index references in the record. Skip it if we don't
+ // know where they are.
+ SmallVector<TiReference, 32> TypeRefs;
+ if (!discoverTypeIndicesInSymbol(Sym, TypeRefs)) {
+ log("ignoring unknown symbol record with kind 0x" +
+ utohexstr(Sym.kind()));
+ return Error::success();
+ }
+ // Re-map all the type index references.
+ remapTypesInSymbolRecord(File, Sym.kind(), RecordBytes, IndexMap,
+ TypeRefs);
+ // An object file may have S_xxx_ID symbols, but these get converted to
+ // "real" symbols in a PDB.
+ translateIdSymbols(RecordBytes, getIDTable());
+ Sym = CVSymbol(symbolKind(RecordBytes), RecordBytes);
+ // If this record refers to an offset in the object file's string table,
+ // add that item to the global PDB string table and re-write the index.
+ recordStringTableReferences(Sym.kind(), RecordBytes, StringTableRefs);
+ // Fill in "Parent" and "End" fields by maintaining a stack of scopes.
+ if (symbolOpensScope(Sym.kind()))
+ scopeStackOpen(Scopes, CurSymOffset, Sym);
+ else if (symbolEndsScope(Sym.kind()))
+ scopeStackClose(Scopes, CurSymOffset, File);
+ // Add the symbol to the globals stream if necessary. Do this before
+ // adding the symbol to the module since we may need to get the next
+ // symbol offset, and writing to the module's symbol stream will update
+ // that offset.
+ if (symbolGoesInGlobalsStream(Sym, Scopes.empty()))
+ addGlobalSymbol(Builder.getGsiBuilder(),
+ File->ModuleDBI->getModuleIndex(), CurSymOffset, Sym);
+ if (symbolGoesInModuleStream(Sym, Scopes.empty())) {
+ // Add symbols to the module in bulk. If this symbol is contiguous
+ // with the previous run of symbols to add, combine the ranges. If
+ // not, close the previous range of symbols and start a new one.
+ if (Sym.data().data() == BulkSymbols.end()) {
+ BulkSymbols = makeArrayRef(BulkSymbols.data(),
+ BulkSymbols.size() + Sym.length());
+ } else {
+ File->ModuleDBI->addSymbolsInBulk(BulkSymbols);
+ BulkSymbols = RecordBytes;
+ }
+ CurSymOffset += Sym.length();
+ }
+ return Error::success();
+ }));
+ // Add any remaining symbols we've accumulated.
+ File->ModuleDBI->addSymbolsInBulk(BulkSymbols);
+// Allocate memory for a .debug$S / .debug$F section and relocate it.
+static ArrayRef<uint8_t> relocateDebugChunk(BumpPtrAllocator &Alloc,
+ SectionChunk &DebugChunk) {
+ uint8_t *Buffer = Alloc.Allocate<uint8_t>(DebugChunk.getSize());
+ assert(DebugChunk.OutputSectionOff == 0 &&
+ "debug sections should not be in output sections");
+ DebugChunk.readRelocTargets();
+ DebugChunk.writeTo(Buffer);
+ return makeArrayRef(Buffer, DebugChunk.getSize());
+static pdb::SectionContrib createSectionContrib(const Chunk *C, uint32_t Modi) {
+ OutputSection *OS = C->getOutputSection();
+ pdb::SectionContrib SC;
+ memset(&SC, 0, sizeof(SC));
+ SC.ISect = OS->SectionIndex;
+ SC.Off = C->getRVA() - OS->getRVA();
+ SC.Size = C->getSize();
+ if (auto *SecChunk = dyn_cast<SectionChunk>(C)) {
+ SC.Characteristics = SecChunk->Header->Characteristics;
+ SC.Imod = SecChunk->File->ModuleDBI->getModuleIndex();
+ ArrayRef<uint8_t> Contents = SecChunk->getContents();
+ JamCRC CRC(0);
+ ArrayRef<char> CharContents = makeArrayRef(
+ reinterpret_cast<const char *>(Contents.data()), Contents.size());
+ CRC.update(CharContents);
+ SC.DataCrc = CRC.getCRC();
+ } else {
+ SC.Characteristics = OS->Header.Characteristics;
+ // FIXME: When we start creating DBI for import libraries, use those here.
+ SC.Imod = Modi;
+ }
+ SC.RelocCrc = 0; // FIXME
+ return SC;
+static uint32_t
+translateStringTableIndex(uint32_t ObjIndex,
+ const DebugStringTableSubsectionRef &ObjStrTable,
+ DebugStringTableSubsection &PdbStrTable) {
+ auto ExpectedString = ObjStrTable.getString(ObjIndex);
+ if (!ExpectedString) {
+ warn("Invalid string table reference");
+ consumeError(ExpectedString.takeError());
+ return 0;
+ }
+ return PdbStrTable.insert(*ExpectedString);
+void DebugSHandler::handleDebugS(lld::coff::SectionChunk &DebugS) {
+ DebugSubsectionArray Subsections;
+ ArrayRef<uint8_t> RelocatedDebugContents = consumeDebugMagic(
+ relocateDebugChunk(Linker.Alloc, DebugS), DebugS.getSectionName());
+ BinaryStreamReader Reader(RelocatedDebugContents, support::little);
+ ExitOnErr(Reader.readArray(Subsections, RelocatedDebugContents.size()));
+ for (const DebugSubsectionRecord &SS : Subsections) {
+ switch (SS.kind()) {
+ case DebugSubsectionKind::StringTable: {
+ assert(!CVStrTab.valid() &&
+ "Encountered multiple string table subsections!");
+ ExitOnErr(CVStrTab.initialize(SS.getRecordData()));
+ break;
+ }
+ case DebugSubsectionKind::FileChecksums:
+ assert(!Checksums.valid() &&
+ "Encountered multiple checksum subsections!");
+ ExitOnErr(Checksums.initialize(SS.getRecordData()));
+ break;
+ case DebugSubsectionKind::Lines:
+ // We can add the relocated line table directly to the PDB without
+ // modification because the file checksum offsets will stay the same.
+ File.ModuleDBI->addDebugSubsection(SS);
+ break;
+ case DebugSubsectionKind::FrameData: {
+ // We need to re-write string table indices here, so save off all
+ // frame data subsections until we've processed the entire list of
+ // subsections so that we can be sure we have the string table.
+ DebugFrameDataSubsectionRef FDS;
+ ExitOnErr(FDS.initialize(SS.getRecordData()));
+ NewFpoFrames.push_back(std::move(FDS));
+ break;
+ }
+ case DebugSubsectionKind::Symbols: {
+ Linker.mergeSymbolRecords(&File, IndexMap, StringTableReferences,
+ SS.getRecordData());
+ break;
+ }
+ default:
+ // FIXME: Process the rest of the subsections.
+ break;
+ }
+ }
+void DebugSHandler::finish() {
+ pdb::DbiStreamBuilder &DbiBuilder = Linker.Builder.getDbiBuilder();
+ // We should have seen all debug subsections across the entire object file now
+ // which means that if a StringTable subsection and Checksums subsection were
+ // present, now is the time to handle them.
+ if (!CVStrTab.valid()) {
+ if (Checksums.valid())
+ fatal(".debug$S sections with a checksums subsection must also contain a "
+ "string table subsection");
+ if (!StringTableReferences.empty())
+ warn("No StringTable subsection was encountered, but there are string "
+ "table references");
+ return;
+ }
+ // Rewrite string table indices in the Fpo Data and symbol records to refer to
+ // the global PDB string table instead of the object file string table.
+ for (DebugFrameDataSubsectionRef &FDS : NewFpoFrames) {
+ const ulittle32_t *Reloc = FDS.getRelocPtr();
+ for (codeview::FrameData FD : FDS) {
+ FD.RvaStart += *Reloc;
+ FD.FrameFunc =
+ translateStringTableIndex(FD.FrameFunc, CVStrTab, Linker.PDBStrTab);
+ DbiBuilder.addNewFpoData(FD);
+ }
+ }
+ for (ulittle32_t *Ref : StringTableReferences)
+ *Ref = translateStringTableIndex(*Ref, CVStrTab, Linker.PDBStrTab);
+ // Make a new file checksum table that refers to offsets in the PDB-wide
+ // string table. Generally the string table subsection appears after the
+ // checksum table, so we have to do this after looping over all the
+ // subsections.
+ auto NewChecksums = make_unique<DebugChecksumsSubsection>(Linker.PDBStrTab);
+ for (FileChecksumEntry &FC : Checksums) {
+ SmallString<128> FileName =
+ ExitOnErr(CVStrTab.getString(FC.FileNameOffset));
+ pdbMakeAbsolute(FileName);
+ ExitOnErr(Linker.Builder.getDbiBuilder().addModuleSourceFile(
+ *File.ModuleDBI, FileName));
+ NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum);
+ }
+ File.ModuleDBI->addDebugSubsection(std::move(NewChecksums));
+void PDBLinker::addObjFile(ObjFile *File, CVIndexMap *ExternIndexMap) {
+ if (File->wasProcessedForPDB())
+ return;
+ // Add a module descriptor for every object file. We need to put an absolute
+ // path to the object into the PDB. If this is a plain object, we make its
+ // path absolute. If it's an object in an archive, we make the archive path
+ // absolute.
+ bool InArchive = !File->ParentName.empty();
+ SmallString<128> Path = InArchive ? File->ParentName : File->getName();
+ pdbMakeAbsolute(Path);
+ StringRef Name = InArchive ? File->getName() : StringRef(Path);
+ pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
+ File->ModuleDBI = &ExitOnErr(DbiBuilder.addModuleInfo(Name));
+ File->ModuleDBI->setObjFileName(Path);
+ auto Chunks = File->getChunks();
+ uint32_t Modi = File->ModuleDBI->getModuleIndex();
+ for (Chunk *C : Chunks) {
+ auto *SecChunk = dyn_cast<SectionChunk>(C);
+ if (!SecChunk || !SecChunk->Live)
+ continue;
+ pdb::SectionContrib SC = createSectionContrib(SecChunk, Modi);
+ File->ModuleDBI->setFirstSectionContrib(SC);
+ break;
+ }
+ // Before we can process symbol substreams from .debug$S, we need to process
+ // type information, file checksums, and the string table. Add type info to
+ // the PDB first, so that we can get the map from object file type and item
+ // indices to PDB type and item indices.
+ CVIndexMap ObjectIndexMap;
+ auto IndexMapResult =
+ mergeDebugT(File, ExternIndexMap ? ExternIndexMap : &ObjectIndexMap);
+ // If the .debug$T sections fail to merge, assume there is no debug info.
+ if (!IndexMapResult) {
+ if (!Config->WarnDebugInfoUnusable) {
+ consumeError(IndexMapResult.takeError());
+ return;
+ }
+ StringRef FileName = sys::path::filename(Path);
+ warn("Cannot use debug info for '" + FileName + "' [LNK4099]\n" +
+ ">>> failed to load reference " +
+ StringRef(toString(IndexMapResult.takeError())));
+ return;
+ }
+ ScopedTimer T(SymbolMergingTimer);
+ DebugSHandler DSH(*this, *File, *IndexMapResult);
+ // Now do all live .debug$S and .debug$F sections.
+ for (SectionChunk *DebugChunk : File->getDebugChunks()) {
+ if (!DebugChunk->Live || DebugChunk->getSize() == 0)
+ continue;
+ if (DebugChunk->getSectionName() == ".debug$S") {
+ DSH.handleDebugS(*DebugChunk);
+ continue;
+ }
+ if (DebugChunk->getSectionName() == ".debug$F") {
+ ArrayRef<uint8_t> RelocatedDebugContents =
+ relocateDebugChunk(Alloc, *DebugChunk);
+ FixedStreamArray<object::FpoData> FpoRecords;
+ BinaryStreamReader Reader(RelocatedDebugContents, support::little);
+ uint32_t Count = RelocatedDebugContents.size() / sizeof(object::FpoData);
+ ExitOnErr(Reader.readArray(FpoRecords, Count));
+ // These are already relocated and don't refer to the string table, so we
+ // can just copy it.
+ for (const object::FpoData &FD : FpoRecords)
+ DbiBuilder.addOldFpoData(FD);
+ continue;
+ }
+ }
+ // Do any post-processing now that all .debug$S sections have been processed.
+ DSH.finish();
+static PublicSym32 createPublic(Defined *Def) {
+ PublicSym32 Pub(SymbolKind::S_PUB32);
+ Pub.Name = Def->getName();
+ if (auto *D = dyn_cast<DefinedCOFF>(Def)) {
+ if (D->getCOFFSymbol().isFunctionDefinition())
+ Pub.Flags = PublicSymFlags::Function;
+ } else if (isa<DefinedImportThunk>(Def)) {
+ Pub.Flags = PublicSymFlags::Function;
+ }
+ OutputSection *OS = Def->getChunk()->getOutputSection();
+ assert(OS && "all publics should be in final image");
+ Pub.Offset = Def->getRVA() - OS->getRVA();
+ Pub.Segment = OS->SectionIndex;
+ return Pub;
+// Add all object files to the PDB. Merge .debug$T sections into IpiData and
+// TpiData.
+void PDBLinker::addObjectsToPDB() {
+ ScopedTimer T1(AddObjectsTimer);
+ for (ObjFile *File : ObjFile::Instances)
+ addObjFile(File);
+ Builder.getStringTableBuilder().setStrings(PDBStrTab);
+ T1.stop();
+ // Construct TPI and IPI stream contents.
+ ScopedTimer T2(TpiStreamLayoutTimer);
+ addTypeInfo(Builder.getTpiBuilder(), getTypeTable());
+ addTypeInfo(Builder.getIpiBuilder(), getIDTable());
+ T2.stop();
+ ScopedTimer T3(GlobalsLayoutTimer);
+ // Compute the public and global symbols.
+ auto &GsiBuilder = Builder.getGsiBuilder();
+ std::vector<PublicSym32> Publics;
+ Symtab->forEachSymbol([&Publics](Symbol *S) {
+ // Only emit defined, live symbols that have a chunk.
+ auto *Def = dyn_cast<Defined>(S);
+ if (Def && Def->isLive() && Def->getChunk())
+ Publics.push_back(createPublic(Def));
+ });
+ if (!Publics.empty()) {
+ // Sort the public symbols and add them to the stream.
+ sort(parallel::par, Publics.begin(), Publics.end(),
+ [](const PublicSym32 &L, const PublicSym32 &R) {
+ return L.Name < R.Name;
+ });
+ for (const PublicSym32 &Pub : Publics)
+ GsiBuilder.addPublicSymbol(Pub);
+ }
+void PDBLinker::addNatvisFiles() {
+ for (StringRef File : Config->NatvisFiles) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> DataOrErr =
+ MemoryBuffer::getFile(File);
+ if (!DataOrErr) {
+ warn("Cannot open input file: " + File);
+ continue;
+ }
+ Builder.addInjectedSource(File, std::move(*DataOrErr));
+ }
+static codeview::CPUType toCodeViewMachine(COFF::MachineTypes Machine) {
+ switch (Machine) {
+ return codeview::CPUType::X64;
+ return codeview::CPUType::ARM7;
+ return codeview::CPUType::ARM64;
+ return codeview::CPUType::ARMNT;
+ return codeview::CPUType::Intel80386;
+ default:
+ llvm_unreachable("Unsupported CPU Type");
+ }
+// Mimic MSVC which surrounds arguments containing whitespace with quotes.
+// Double double-quotes are handled, so that the resulting string can be
+// executed again on the cmd-line.
+static std::string quote(ArrayRef<StringRef> Args) {
+ std::string R;
+ R.reserve(256);
+ for (StringRef A : Args) {
+ if (!R.empty())
+ R.push_back(' ');
+ bool HasWS = A.find(' ') != StringRef::npos;
+ bool HasQ = A.find('"') != StringRef::npos;
+ if (HasWS || HasQ)
+ R.push_back('"');
+ if (HasQ) {
+ SmallVector<StringRef, 4> S;
+ A.split(S, '"');
+ R.append(join(S, "\"\""));
+ } else {
+ R.append(A);
+ }
+ if (HasWS || HasQ)
+ R.push_back('"');
+ }
+ return R;
+static void addCommonLinkerModuleSymbols(StringRef Path,
+ pdb::DbiModuleDescriptorBuilder &Mod,
+ BumpPtrAllocator &Allocator) {
+ ObjNameSym ONS(SymbolRecordKind::ObjNameSym);
+ Compile3Sym CS(SymbolRecordKind::Compile3Sym);
+ EnvBlockSym EBS(SymbolRecordKind::EnvBlockSym);
+ ONS.Name = "* Linker *";
+ ONS.Signature = 0;
+ CS.Machine = toCodeViewMachine(Config->Machine);
+ // Interestingly, if we set the string to, then when trying to view
+ // local variables WinDbg emits an error that private symbols are not present.
+ // By setting this to a valid MSVC linker version string, local variables are
+ // displayed properly. As such, even though it is not representative of
+ // LLVM's version information, we need this for compatibility.
+ CS.Flags = CompileSym3Flags::None;
+ CS.VersionBackendBuild = 25019;
+ CS.VersionBackendMajor = 14;
+ CS.VersionBackendMinor = 10;
+ CS.VersionBackendQFE = 0;
+ // MSVC also sets the frontend to since this is specifically for the
+ // linker module (which is by definition a backend), so we don't need to do
+ // anything here. Also, it seems we can use "LLVM Linker" for the linker name
+ // without any problems. Only the backend version has to be hardcoded to a
+ // magic number.
+ CS.VersionFrontendBuild = 0;
+ CS.VersionFrontendMajor = 0;
+ CS.VersionFrontendMinor = 0;
+ CS.VersionFrontendQFE = 0;
+ CS.Version = "LLVM Linker";
+ CS.setLanguage(SourceLanguage::Link);
+ ArrayRef<StringRef> Args = makeArrayRef(Config->Argv).drop_front();
+ std::string ArgStr = quote(Args);
+ EBS.Fields.push_back("cwd");
+ SmallString<64> cwd;
+ if (Config->PDBSourcePath.empty())
+ sys::fs::current_path(cwd);
+ else
+ cwd = Config->PDBSourcePath;
+ EBS.Fields.push_back(cwd);
+ EBS.Fields.push_back("exe");
+ SmallString<64> exe = Config->Argv[0];
+ pdbMakeAbsolute(exe);
+ EBS.Fields.push_back(exe);
+ EBS.Fields.push_back("pdb");
+ EBS.Fields.push_back(Path);
+ EBS.Fields.push_back("cmd");
+ EBS.Fields.push_back(ArgStr);
+ Mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
+ ONS, Allocator, CodeViewContainer::Pdb));
+ Mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
+ CS, Allocator, CodeViewContainer::Pdb));
+ Mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
+ EBS, Allocator, CodeViewContainer::Pdb));
+static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &Mod,
+ OutputSection &OS,
+ BumpPtrAllocator &Allocator) {
+ SectionSym Sym(SymbolRecordKind::SectionSym);
+ Sym.Alignment = 12; // 2^12 = 4KB
+ Sym.Characteristics = OS.Header.Characteristics;
+ Sym.Length = OS.getVirtualSize();
+ Sym.Name = OS.Name;
+ Sym.Rva = OS.getRVA();
+ Sym.SectionNumber = OS.SectionIndex;
+ Mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
+ Sym, Allocator, CodeViewContainer::Pdb));
+// Creates a PDB file.
+void coff::createPDB(SymbolTable *Symtab,
+ ArrayRef<OutputSection *> OutputSections,
+ ArrayRef<uint8_t> SectionTable,
+ llvm::codeview::DebugInfo *BuildId) {
+ ScopedTimer T1(TotalPdbLinkTimer);
+ PDBLinker PDB(Symtab);
+ PDB.initialize(BuildId);
+ PDB.addObjectsToPDB();
+ PDB.addSections(OutputSections, SectionTable);
+ PDB.addNatvisFiles();
+ ScopedTimer T2(DiskCommitTimer);
+ codeview::GUID Guid;
+ PDB.commit(&Guid);
+ memcpy(&BuildId->PDB70.Signature, &Guid, 16);
+void PDBLinker::initialize(llvm::codeview::DebugInfo *BuildId) {
+ ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize
+ BuildId->Signature.CVSignature = OMF::Signature::PDB70;
+ // Signature is set to a hash of the PDB contents when the PDB is done.
+ memset(BuildId->PDB70.Signature, 0, 16);
+ BuildId->PDB70.Age = 1;
+ // Create streams in MSF for predefined streams, namely
+ // PDB, TPI, DBI and IPI.
+ for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I)
+ ExitOnErr(Builder.getMsfBuilder().addStream(0));
+ // Add an Info stream.
+ auto &InfoBuilder = Builder.getInfoBuilder();
+ InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70);
+ InfoBuilder.setHashPDBContentsToGUID(true);
+ // Add an empty DBI stream.
+ pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
+ DbiBuilder.setAge(BuildId->PDB70.Age);
+ DbiBuilder.setVersionHeader(pdb::PdbDbiV70);
+ DbiBuilder.setMachineType(Config->Machine);
+ // Technically we are not link.exe 14.11, but there are known cases where
+ // debugging tools on Windows expect Microsoft-specific version numbers or
+ // they fail to work at all. Since we know we produce PDBs that are
+ // compatible with LINK 14.11, we set that version number here.
+ DbiBuilder.setBuildNumber(14, 11);
+void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections,
+ ArrayRef<uint8_t> SectionTable) {
+ // It's not entirely clear what this is, but the * Linker * module uses it.
+ pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
+ NativePath = Config->PDBPath;
+ pdbMakeAbsolute(NativePath);
+ uint32_t PdbFilePathNI = DbiBuilder.addECName(NativePath);
+ auto &LinkerModule = ExitOnErr(DbiBuilder.addModuleInfo("* Linker *"));
+ LinkerModule.setPdbFilePathNI(PdbFilePathNI);
+ addCommonLinkerModuleSymbols(NativePath, LinkerModule, Alloc);
+ // Add section contributions. They must be ordered by ascending RVA.
+ for (OutputSection *OS : OutputSections) {
+ addLinkerModuleSectionSymbol(LinkerModule, *OS, Alloc);
+ for (Chunk *C : OS->Chunks) {
+ pdb::SectionContrib SC =
+ createSectionContrib(C, LinkerModule.getModuleIndex());
+ Builder.getDbiBuilder().addSectionContrib(SC);
+ }
+ }
+ // Add Section Map stream.
+ ArrayRef<object::coff_section> Sections = {
+ (const object::coff_section *)SectionTable.data(),
+ SectionTable.size() / sizeof(object::coff_section)};
+ SectionMap = pdb::DbiStreamBuilder::createSectionMap(Sections);
+ DbiBuilder.setSectionMap(SectionMap);
+ // Add COFF section header stream.
+ ExitOnErr(
+ DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable));
+void PDBLinker::commit(codeview::GUID *Guid) {
+ // Write to a file.
+ ExitOnErr(Builder.commit(Config->PDBPath, Guid));
+static Expected<StringRef>
+getFileName(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) {
+ auto Iter = Checksums.getArray().at(FileID);
+ if (Iter == Checksums.getArray().end())
+ return make_error<CodeViewError>(cv_error_code::no_records);
+ uint32_t Offset = Iter->FileNameOffset;
+ return Strings.getString(Offset);
+static uint32_t getSecrelReloc() {
+ switch (Config->Machine) {
+ case AMD64:
+ case I386:
+ case ARMNT:
+ case ARM64:
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+// Try to find a line table for the given offset Addr into the given chunk C.
+// If a line table was found, the line table, the string and checksum tables
+// that are used to interpret the line table, and the offset of Addr in the line
+// table are stored in the output arguments. Returns whether a line table was
+// found.
+static bool findLineTable(const SectionChunk *C, uint32_t Addr,
+ DebugStringTableSubsectionRef &CVStrTab,
+ DebugChecksumsSubsectionRef &Checksums,
+ DebugLinesSubsectionRef &Lines,
+ uint32_t &OffsetInLinetable) {
+ ExitOnError ExitOnErr;
+ uint32_t SecrelReloc = getSecrelReloc();
+ for (SectionChunk *DbgC : C->File->getDebugChunks()) {
+ if (DbgC->getSectionName() != ".debug$S")
+ continue;
+ // Build a mapping of SECREL relocations in DbgC that refer to C.
+ DenseMap<uint32_t, uint32_t> Secrels;
+ for (const coff_relocation &R : DbgC->Relocs) {
+ if (R.Type != SecrelReloc)
+ continue;
+ if (auto *S = dyn_cast_or_null<DefinedRegular>(
+ C->File->getSymbols()[R.SymbolTableIndex]))
+ if (S->getChunk() == C)
+ Secrels[R.VirtualAddress] = S->getValue();
+ }
+ ArrayRef<uint8_t> Contents =
+ consumeDebugMagic(DbgC->getContents(), ".debug$S");
+ DebugSubsectionArray Subsections;
+ BinaryStreamReader Reader(Contents, support::little);
+ ExitOnErr(Reader.readArray(Subsections, Contents.size()));
+ for (const DebugSubsectionRecord &SS : Subsections) {
+ switch (SS.kind()) {
+ case DebugSubsectionKind::StringTable: {
+ assert(!CVStrTab.valid() &&
+ "Encountered multiple string table subsections!");
+ ExitOnErr(CVStrTab.initialize(SS.getRecordData()));
+ break;
+ }
+ case DebugSubsectionKind::FileChecksums:
+ assert(!Checksums.valid() &&
+ "Encountered multiple checksum subsections!");
+ ExitOnErr(Checksums.initialize(SS.getRecordData()));
+ break;
+ case DebugSubsectionKind::Lines: {
+ ArrayRef<uint8_t> Bytes;
+ auto Ref = SS.getRecordData();
+ ExitOnErr(Ref.readLongestContiguousChunk(0, Bytes));
+ size_t OffsetInDbgC = Bytes.data() - DbgC->getContents().data();
+ // Check whether this line table refers to C.
+ auto I = Secrels.find(OffsetInDbgC);
+ if (I == Secrels.end())
+ break;
+ // Check whether this line table covers Addr in C.
+ DebugLinesSubsectionRef LinesTmp;
+ ExitOnErr(LinesTmp.initialize(BinaryStreamReader(Ref)));
+ uint32_t OffsetInC = I->second + LinesTmp.header()->RelocOffset;
+ if (Addr < OffsetInC || Addr >= OffsetInC + LinesTmp.header()->CodeSize)
+ break;
+ assert(!Lines.header() &&
+ "Encountered multiple line tables for function!");
+ ExitOnErr(Lines.initialize(BinaryStreamReader(Ref)));
+ OffsetInLinetable = Addr - OffsetInC;
+ break;
+ }
+ default:
+ break;
+ }
+ if (CVStrTab.valid() && Checksums.valid() && Lines.header())
+ return true;
+ }
+ }
+ return false;
+// Use CodeView line tables to resolve a file and line number for the given
+// offset into the given chunk and return them, or {"", 0} if a line table was
+// not found.
+std::pair<StringRef, uint32_t> coff::getFileLine(const SectionChunk *C,
+ uint32_t Addr) {
+ ExitOnError ExitOnErr;
+ DebugStringTableSubsectionRef CVStrTab;
+ DebugChecksumsSubsectionRef Checksums;
+ DebugLinesSubsectionRef Lines;
+ uint32_t OffsetInLinetable;
+ if (!findLineTable(C, Addr, CVStrTab, Checksums, Lines, OffsetInLinetable))
+ return {"", 0};
+ Optional<uint32_t> NameIndex;
+ Optional<uint32_t> LineNumber;
+ for (LineColumnEntry &Entry : Lines) {
+ for (const LineNumberEntry &LN : Entry.LineNumbers) {
+ LineInfo LI(LN.Flags);
+ if (LN.Offset > OffsetInLinetable) {
+ if (!NameIndex) {
+ NameIndex = Entry.NameIndex;
+ LineNumber = LI.getStartLine();
+ }
+ StringRef Filename =
+ ExitOnErr(getFileName(CVStrTab, Checksums, *NameIndex));
+ return {Filename, *LineNumber};
+ }
+ NameIndex = Entry.NameIndex;
+ LineNumber = LI.getStartLine();
+ }
+ }
+ if (!NameIndex)
+ return {"", 0};
+ StringRef Filename = ExitOnErr(getFileName(CVStrTab, Checksums, *NameIndex));
+ return {Filename, *LineNumber};
diff --git a/contrib/llvm/tools/lld/COFF/PDB.h b/contrib/llvm/tools/lld/COFF/PDB.h
new file mode 100644
index 000000000000..ea7a9996f415
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/PDB.h
@@ -0,0 +1,38 @@
+//===- PDB.h ----------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_COFF_PDB_H
+#define LLD_COFF_PDB_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+namespace llvm {
+namespace codeview {
+union DebugInfo;
+namespace lld {
+namespace coff {
+class OutputSection;
+class SectionChunk;
+class SymbolTable;
+void createPDB(SymbolTable *Symtab,
+ llvm::ArrayRef<OutputSection *> OutputSections,
+ llvm::ArrayRef<uint8_t> SectionTable,
+ llvm::codeview::DebugInfo *BuildId);
+std::pair<llvm::StringRef, uint32_t> getFileLine(const SectionChunk *C,
+ uint32_t Addr);
diff --git a/contrib/llvm/tools/lld/COFF/README.md b/contrib/llvm/tools/lld/COFF/README.md
new file mode 100644
index 000000000000..f1bfc9c15263
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/README.md
@@ -0,0 +1 @@
+See docs/NewLLD.rst
diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.cpp b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp
new file mode 100644
index 000000000000..1a9e0455dc1d
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp
@@ -0,0 +1,548 @@
+//===- SymbolTable.cpp ----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "SymbolTable.h"
+#include "Config.h"
+#include "Driver.h"
+#include "LTO.h"
+#include "PDB.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Timer.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <utility>
+using namespace llvm;
+namespace lld {
+namespace coff {
+static Timer LTOTimer("LTO", Timer::root());
+SymbolTable *Symtab;
+void SymbolTable::addFile(InputFile *File) {
+ log("Reading " + toString(File));
+ File->parse();
+ MachineTypes MT = File->getMachineType();
+ if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
+ Config->Machine = MT;
+ } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) {
+ error(toString(File) + ": machine type " + machineToStr(MT) +
+ " conflicts with " + machineToStr(Config->Machine));
+ return;
+ }
+ if (auto *F = dyn_cast<ObjFile>(File)) {
+ ObjFile::Instances.push_back(F);
+ } else if (auto *F = dyn_cast<BitcodeFile>(File)) {
+ BitcodeFile::Instances.push_back(F);
+ } else if (auto *F = dyn_cast<ImportFile>(File)) {
+ ImportFile::Instances.push_back(F);
+ }
+ StringRef S = File->getDirectives();
+ if (S.empty())
+ return;
+ log("Directives: " + toString(File) + ": " + S);
+ Driver->parseDirectives(S);
+static void errorOrWarn(const Twine &S) {
+ if (Config->ForceUnresolved)
+ warn(S);
+ else
+ error(S);
+// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
+// This is generally the global variable or function whose definition contains
+// Addr.
+static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) {
+ DefinedRegular *Candidate = nullptr;
+ for (Symbol *S : SC->File->getSymbols()) {
+ auto *D = dyn_cast_or_null<DefinedRegular>(S);
+ if (!D || D->getChunk() != SC || D->getValue() > Addr ||
+ (Candidate && D->getValue() < Candidate->getValue()))
+ continue;
+ Candidate = D;
+ }
+ return Candidate;
+std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
+ struct Location {
+ Symbol *Sym;
+ std::pair<StringRef, uint32_t> FileLine;
+ };
+ std::vector<Location> Locations;
+ for (Chunk *C : File->getChunks()) {
+ auto *SC = dyn_cast<SectionChunk>(C);
+ if (!SC)
+ continue;
+ for (const coff_relocation &R : SC->Relocs) {
+ if (R.SymbolTableIndex != SymIndex)
+ continue;
+ std::pair<StringRef, uint32_t> FileLine =
+ getFileLine(SC, R.VirtualAddress);
+ Symbol *Sym = getSymbol(SC, R.VirtualAddress);
+ if (!FileLine.first.empty() || Sym)
+ Locations.push_back({Sym, FileLine});
+ }
+ }
+ if (Locations.empty())
+ return "\n>>> referenced by " + toString(File);
+ std::string Out;
+ llvm::raw_string_ostream OS(Out);
+ for (Location Loc : Locations) {
+ OS << "\n>>> referenced by ";
+ if (!Loc.FileLine.first.empty())
+ OS << Loc.FileLine.first << ":" << Loc.FileLine.second
+ << "\n>>> ";
+ OS << toString(File);
+ if (Loc.Sym)
+ OS << ":(" << toString(*Loc.Sym) << ')';
+ }
+ return OS.str();
+void SymbolTable::loadMinGWAutomaticImports() {
+ for (auto &I : SymMap) {
+ Symbol *Sym = I.second;
+ auto *Undef = dyn_cast<Undefined>(Sym);
+ if (!Undef)
+ continue;
+ if (!Sym->IsUsedInRegularObj)
+ continue;
+ StringRef Name = Undef->getName();
+ if (Name.startswith("__imp_"))
+ continue;
+ // If we have an undefined symbol, but we have a Lazy representing a
+ // symbol we could load from file, make sure to load that.
+ Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str()));
+ if (!L || L->PendingArchiveLoad)
+ continue;
+ log("Loading lazy " + L->getName() + " from " + L->File->getName() +
+ " for automatic import");
+ L->PendingArchiveLoad = true;
+ L->File->addMember(&L->Sym);
+ }
+bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) {
+ if (Name.startswith("__imp_"))
+ return false;
+ Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str()));
+ if (!Imp)
+ return false;
+ // Replace the reference directly to a variable with a reference
+ // to the import address table instead. This obviously isn't right,
+ // but we mark the symbol as IsRuntimePseudoReloc, and a later pass
+ // will add runtime pseudo relocations for every relocation against
+ // this Symbol. The runtime pseudo relocation framework expects the
+ // reference itself to point at the IAT entry.
+ size_t ImpSize = 0;
+ if (isa<DefinedImportData>(Imp)) {
+ log("Automatically importing " + Name + " from " +
+ cast<DefinedImportData>(Imp)->getDLLName());
+ ImpSize = sizeof(DefinedImportData);
+ } else if (isa<DefinedRegular>(Imp)) {
+ log("Automatically importing " + Name + " from " +
+ toString(cast<DefinedRegular>(Imp)->File));
+ ImpSize = sizeof(DefinedRegular);
+ } else {
+ warn("unable to automatically import " + Name + " from " + Imp->getName() +
+ " from " + toString(cast<DefinedRegular>(Imp)->File) +
+ "; unexpected symbol type");
+ return false;
+ }
+ Sym->replaceKeepingName(Imp, ImpSize);
+ Sym->IsRuntimePseudoReloc = true;
+ // There may exist symbols named .refptr.<name> which only consist
+ // of a single pointer to <name>. If it turns out <name> is
+ // automatically imported, we don't need to keep the .refptr.<name>
+ // pointer at all, but redirect all accesses to it to the IAT entry
+ // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
+ DefinedRegular *Refptr =
+ dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str()));
+ if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) {
+ SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk());
+ if (SC && SC->Relocs.size() == 1 && *SC->symbols().begin() == Sym) {
+ log("Replacing .refptr." + Name + " with " + Imp->getName());
+ Refptr->getChunk()->Live = false;
+ Refptr->replaceKeepingName(Imp, ImpSize);
+ }
+ }
+ return true;
+void SymbolTable::reportRemainingUndefines() {
+ SmallPtrSet<Symbol *, 8> Undefs;
+ DenseMap<Symbol *, Symbol *> LocalImports;
+ for (auto &I : SymMap) {
+ Symbol *Sym = I.second;
+ auto *Undef = dyn_cast<Undefined>(Sym);
+ if (!Undef)
+ continue;
+ if (!Sym->IsUsedInRegularObj)
+ continue;
+ StringRef Name = Undef->getName();
+ // A weak alias may have been resolved, so check for that.
+ if (Defined *D = Undef->getWeakAlias()) {
+ // We want to replace Sym with D. However, we can't just blindly
+ // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
+ // internal symbol, and internal symbols are stored as "unparented"
+ // Symbols. For that reason we need to check which type of symbol we
+ // are dealing with and copy the correct number of bytes.
+ if (isa<DefinedRegular>(D))
+ memcpy(Sym, D, sizeof(DefinedRegular));
+ else if (isa<DefinedAbsolute>(D))
+ memcpy(Sym, D, sizeof(DefinedAbsolute));
+ else
+ memcpy(Sym, D, sizeof(SymbolUnion));
+ continue;
+ }
+ // If we can resolve a symbol by removing __imp_ prefix, do that.
+ // This odd rule is for compatibility with MSVC linker.
+ if (Name.startswith("__imp_")) {
+ Symbol *Imp = find(Name.substr(strlen("__imp_")));
+ if (Imp && isa<Defined>(Imp)) {
+ auto *D = cast<Defined>(Imp);
+ replaceSymbol<DefinedLocalImport>(Sym, Name, D);
+ LocalImportChunks.push_back(cast<DefinedLocalImport>(Sym)->getChunk());
+ LocalImports[Sym] = D;
+ continue;
+ }
+ }
+ // We don't want to report missing Microsoft precompiled headers symbols.
+ // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
+ if (Name.contains("_PchSym_"))
+ continue;
+ if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name))
+ continue;
+ // Remaining undefined symbols are not fatal if /force is specified.
+ // They are replaced with dummy defined symbols.
+ if (Config->ForceUnresolved)
+ replaceSymbol<DefinedAbsolute>(Sym, Name, 0);
+ Undefs.insert(Sym);
+ }
+ if (Undefs.empty() && LocalImports.empty())
+ return;
+ for (Symbol *B : Config->GCRoot) {
+ if (Undefs.count(B))
+ errorOrWarn("<root>: undefined symbol: " + toString(*B));
+ if (Config->WarnLocallyDefinedImported)
+ if (Symbol *Imp = LocalImports.lookup(B))
+ warn("<root>: locally defined symbol imported: " + toString(*Imp) +
+ " (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
+ }
+ for (ObjFile *File : ObjFile::Instances) {
+ size_t SymIndex = (size_t)-1;
+ for (Symbol *Sym : File->getSymbols()) {
+ ++SymIndex;
+ if (!Sym)
+ continue;
+ if (Undefs.count(Sym))
+ errorOrWarn("undefined symbol: " + toString(*Sym) +
+ getSymbolLocations(File, SymIndex));
+ if (Config->WarnLocallyDefinedImported)
+ if (Symbol *Imp = LocalImports.lookup(Sym))
+ warn(toString(File) +
+ ": locally defined symbol imported: " + toString(*Imp) +
+ " (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
+ }
+ }
+std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
+ bool Inserted = false;
+ Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
+ if (!Sym) {
+ Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
+ Sym->IsUsedInRegularObj = false;
+ Sym->PendingArchiveLoad = false;
+ Inserted = true;
+ }
+ return {Sym, Inserted};
+std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) {
+ std::pair<Symbol *, bool> Result = insert(Name);
+ if (!File || !isa<BitcodeFile>(File))
+ Result.first->IsUsedInRegularObj = true;
+ return Result;
+Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F,
+ bool IsWeakAlias) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name, F);
+ if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) {
+ replaceSymbol<Undefined>(S, Name);
+ return S;
+ }
+ if (auto *L = dyn_cast<Lazy>(S)) {
+ if (!S->PendingArchiveLoad) {
+ S->PendingArchiveLoad = true;
+ L->File->addMember(&L->Sym);
+ }
+ }
+ return S;
+void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
+ StringRef Name = Sym.getName();
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name);
+ if (WasInserted) {
+ replaceSymbol<Lazy>(S, F, Sym);
+ return;
+ }
+ auto *U = dyn_cast<Undefined>(S);
+ if (!U || U->WeakAlias || S->PendingArchiveLoad)
+ return;
+ S->PendingArchiveLoad = true;
+ F->addMember(&Sym);
+void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
+ std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " +
+ toString(Existing->getFile()) + " and in " +
+ toString(NewFile);
+ if (Config->ForceMultiple)
+ warn(Msg);
+ else
+ error(Msg);
+Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, nullptr);
+ S->IsUsedInRegularObj = true;
+ if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
+ replaceSymbol<DefinedAbsolute>(S, N, Sym);
+ else if (!isa<DefinedCOFF>(S))
+ reportDuplicate(S, nullptr);
+ return S;
+Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, nullptr);
+ S->IsUsedInRegularObj = true;
+ if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
+ replaceSymbol<DefinedAbsolute>(S, N, VA);
+ else if (!isa<DefinedCOFF>(S))
+ reportDuplicate(S, nullptr);
+ return S;
+Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, nullptr);
+ S->IsUsedInRegularObj = true;
+ if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
+ replaceSymbol<DefinedSynthetic>(S, N, C);
+ else if (!isa<DefinedCOFF>(S))
+ reportDuplicate(S, nullptr);
+ return S;
+Symbol *SymbolTable::addRegular(InputFile *F, StringRef N,
+ const coff_symbol_generic *Sym,
+ SectionChunk *C) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, F);
+ if (WasInserted || !isa<DefinedRegular>(S))
+ replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false,
+ /*IsExternal*/ true, Sym, C);
+ else
+ reportDuplicate(S, F);
+ return S;
+std::pair<Symbol *, bool>
+SymbolTable::addComdat(InputFile *F, StringRef N,
+ const coff_symbol_generic *Sym) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, F);
+ if (WasInserted || !isa<DefinedRegular>(S)) {
+ replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true,
+ /*IsExternal*/ true, Sym, nullptr);
+ return {S, true};
+ }
+ if (!cast<DefinedRegular>(S)->isCOMDAT())
+ reportDuplicate(S, F);
+ return {S, false};
+Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
+ const coff_symbol_generic *Sym, CommonChunk *C) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, F);
+ if (WasInserted || !isa<DefinedCOFF>(S))
+ replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
+ else if (auto *DC = dyn_cast<DefinedCommon>(S))
+ if (Size > DC->getSize())
+ replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
+ return S;
+Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, nullptr);
+ S->IsUsedInRegularObj = true;
+ if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
+ replaceSymbol<DefinedImportData>(S, N, F);
+ return S;
+ }
+ reportDuplicate(S, F);
+ return nullptr;
+Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID,
+ uint16_t Machine) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name, nullptr);
+ S->IsUsedInRegularObj = true;
+ if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
+ replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine);
+ return S;
+ }
+ reportDuplicate(S, ID->File);
+ return nullptr;
+std::vector<Chunk *> SymbolTable::getChunks() {
+ std::vector<Chunk *> Res;
+ for (ObjFile *File : ObjFile::Instances) {
+ ArrayRef<Chunk *> V = File->getChunks();
+ Res.insert(Res.end(), V.begin(), V.end());
+ }
+ return Res;
+Symbol *SymbolTable::find(StringRef Name) {
+ return SymMap.lookup(CachedHashStringRef(Name));
+Symbol *SymbolTable::findUnderscore(StringRef Name) {
+ if (Config->Machine == I386)
+ return find(("_" + Name).str());
+ return find(Name);
+StringRef SymbolTable::findByPrefix(StringRef Prefix) {
+ for (auto Pair : SymMap) {
+ StringRef Name = Pair.first.val();
+ if (Name.startswith(Prefix))
+ return Name;
+ }
+ return "";
+StringRef SymbolTable::findMangle(StringRef Name) {
+ if (Symbol *Sym = find(Name))
+ if (!isa<Undefined>(Sym))
+ return Name;
+ if (Config->Machine != I386)
+ return findByPrefix(("?" + Name + "@@Y").str());
+ if (!Name.startswith("_"))
+ return "";
+ // Search for x86 stdcall function.
+ StringRef S = findByPrefix((Name + "@").str());
+ if (!S.empty())
+ return S;
+ // Search for x86 fastcall function.
+ S = findByPrefix(("@" + Name.substr(1) + "@").str());
+ if (!S.empty())
+ return S;
+ // Search for x86 vectorcall function.
+ S = findByPrefix((Name.substr(1) + "@@").str());
+ if (!S.empty())
+ return S;
+ // Search for x86 C++ non-member function.
+ return findByPrefix(("?" + Name.substr(1) + "@@Y").str());
+void SymbolTable::mangleMaybe(Symbol *B) {
+ auto *U = dyn_cast<Undefined>(B);
+ if (!U || U->WeakAlias)
+ return;
+ StringRef Alias = findMangle(U->getName());
+ if (!Alias.empty()) {
+ log(U->getName() + " aliased to " + Alias);
+ U->WeakAlias = addUndefined(Alias);
+ }
+Symbol *SymbolTable::addUndefined(StringRef Name) {
+ return addUndefined(Name, nullptr, false);
+std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
+ LTO.reset(new BitcodeCompiler);
+ for (BitcodeFile *F : BitcodeFile::Instances)
+ LTO->add(*F);
+ return LTO->compile();
+void SymbolTable::addCombinedLTOObjects() {
+ if (BitcodeFile::Instances.empty())
+ return;
+ ScopedTimer T(LTOTimer);
+ for (StringRef Object : compileBitcodeFiles()) {
+ auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp"));
+ Obj->parse();
+ ObjFile::Instances.push_back(Obj);
+ }
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.h b/contrib/llvm/tools/lld/COFF/SymbolTable.h
new file mode 100644
index 000000000000..00e55dbb7a02
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/SymbolTable.h
@@ -0,0 +1,131 @@
+//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "LTO.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+struct LTOCodeGenerator;
+namespace lld {
+namespace coff {
+class Chunk;
+class CommonChunk;
+class Defined;
+class DefinedAbsolute;
+class DefinedRelative;
+class Lazy;
+class SectionChunk;
+class Symbol;
+// SymbolTable is a bucket of all known symbols, including defined,
+// undefined, or lazy symbols (the last one is symbols in archive
+// files whose archive members are not yet loaded).
+// We put all symbols of all files to a SymbolTable, and the
+// SymbolTable selects the "best" symbols if there are name
+// conflicts. For example, obviously, a defined symbol is better than
+// an undefined symbol. Or, if there's a conflict between a lazy and a
+// undefined, it'll read an archive member to read a real definition
+// to replace the lazy symbol. The logic is implemented in the
+// add*() functions, which are called by input files as they are parsed.
+// There is one add* function per symbol type.
+class SymbolTable {
+ void addFile(InputFile *File);
+ // Try to resolve any undefined symbols and update the symbol table
+ // accordingly, then print an error message for any remaining undefined
+ // symbols.
+ void reportRemainingUndefines();
+ void loadMinGWAutomaticImports();
+ bool handleMinGWAutomaticImport(Symbol *Sym, StringRef Name);
+ // Returns a list of chunks of selected symbols.
+ std::vector<Chunk *> getChunks();
+ // Returns a symbol for a given name. Returns a nullptr if not found.
+ Symbol *find(StringRef Name);
+ Symbol *findUnderscore(StringRef Name);
+ // Occasionally we have to resolve an undefined symbol to its
+ // mangled symbol. This function tries to find a mangled name
+ // for U from the symbol table, and if found, set the symbol as
+ // a weak alias for U.
+ void mangleMaybe(Symbol *B);
+ StringRef findMangle(StringRef Name);
+ // Build a set of COFF objects representing the combined contents of
+ // BitcodeFiles and add them to the symbol table. Called after all files are
+ // added and before the writer writes results to a file.
+ void addCombinedLTOObjects();
+ std::vector<StringRef> compileBitcodeFiles();
+ // Creates an Undefined symbol for a given name.
+ Symbol *addUndefined(StringRef Name);
+ Symbol *addSynthetic(StringRef N, Chunk *C);
+ Symbol *addAbsolute(StringRef N, uint64_t VA);
+ Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias);
+ void addLazy(ArchiveFile *F, const Archive::Symbol Sym);
+ Symbol *addAbsolute(StringRef N, COFFSymbolRef S);
+ Symbol *addRegular(InputFile *F, StringRef N,
+ const llvm::object::coff_symbol_generic *S = nullptr,
+ SectionChunk *C = nullptr);
+ std::pair<Symbol *, bool>
+ addComdat(InputFile *F, StringRef N,
+ const llvm::object::coff_symbol_generic *S = nullptr);
+ Symbol *addCommon(InputFile *F, StringRef N, uint64_t Size,
+ const llvm::object::coff_symbol_generic *S = nullptr,
+ CommonChunk *C = nullptr);
+ Symbol *addImportData(StringRef N, ImportFile *F);
+ Symbol *addImportThunk(StringRef Name, DefinedImportData *S,
+ uint16_t Machine);
+ void reportDuplicate(Symbol *Existing, InputFile *NewFile);
+ // A list of chunks which to be added to .rdata.
+ std::vector<Chunk *> LocalImportChunks;
+ // Iterates symbols in non-determinstic hash table order.
+ template <typename T> void forEachSymbol(T Callback) {
+ for (auto &Pair : SymMap)
+ Callback(Pair.second);
+ }
+ /// Inserts symbol if not already present.
+ std::pair<Symbol *, bool> insert(StringRef Name);
+ /// Same as insert(Name), but also sets IsUsedInRegularObj.
+ std::pair<Symbol *, bool> insert(StringRef Name, InputFile *F);
+ StringRef findByPrefix(StringRef Prefix);
+ llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap;
+ std::unique_ptr<BitcodeCompiler> LTO;
+extern SymbolTable *Symtab;
+std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex);
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Symbols.cpp b/contrib/llvm/tools/lld/COFF/Symbols.cpp
new file mode 100644
index 000000000000..ccaf86417f10
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Symbols.cpp
@@ -0,0 +1,107 @@
+//===- Symbols.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Symbols.h"
+#include "InputFiles.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::object;
+// Returns a symbol name for an error message.
+std::string lld::toString(coff::Symbol &B) {
+ if (Optional<std::string> S = lld::demangleMSVC(B.getName()))
+ return ("\"" + *S + "\" (" + B.getName() + ")").str();
+ return B.getName();
+namespace lld {
+namespace coff {
+StringRef Symbol::getName() {
+ // COFF symbol names are read lazily for a performance reason.
+ // Non-external symbol names are never used by the linker except for logging
+ // or debugging. Their internal references are resolved not by name but by
+ // symbol index. And because they are not external, no one can refer them by
+ // name. Object files contain lots of non-external symbols, and creating
+ // StringRefs for them (which involves lots of strlen() on the string table)
+ // is a waste of time.
+ if (Name.empty()) {
+ auto *D = cast<DefinedCOFF>(this);
+ cast<ObjFile>(D->File)->getCOFFObj()->getSymbolName(D->Sym, Name);
+ }
+ return Name;
+InputFile *Symbol::getFile() {
+ if (auto *Sym = dyn_cast<DefinedCOFF>(this))
+ return Sym->File;
+ if (auto *Sym = dyn_cast<Lazy>(this))
+ return Sym->File;
+ return nullptr;
+bool Symbol::isLive() const {
+ if (auto *R = dyn_cast<DefinedRegular>(this))
+ return R->getChunk()->Live;
+ if (auto *Imp = dyn_cast<DefinedImportData>(this))
+ return Imp->File->Live;
+ if (auto *Imp = dyn_cast<DefinedImportThunk>(this))
+ return Imp->WrappedSym->File->ThunkLive;
+ // Assume any other kind of symbol is live.
+ return true;
+// MinGW specific.
+void Symbol::replaceKeepingName(Symbol *Other, size_t Size) {
+ StringRef OrigName = Name;
+ memcpy(this, Other, Size);
+ Name = OrigName;
+COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
+ size_t SymSize = cast<ObjFile>(File)->getCOFFObj()->getSymbolTableEntrySize();
+ if (SymSize == sizeof(coff_symbol16))
+ return COFFSymbolRef(reinterpret_cast<const coff_symbol16 *>(Sym));
+ assert(SymSize == sizeof(coff_symbol32));
+ return COFFSymbolRef(reinterpret_cast<const coff_symbol32 *>(Sym));
+uint16_t DefinedAbsolute::NumOutputSections;
+static Chunk *makeImportThunk(DefinedImportData *S, uint16_t Machine) {
+ if (Machine == AMD64)
+ return make<ImportThunkChunkX64>(S);
+ if (Machine == I386)
+ return make<ImportThunkChunkX86>(S);
+ if (Machine == ARM64)
+ return make<ImportThunkChunkARM64>(S);
+ assert(Machine == ARMNT);
+ return make<ImportThunkChunkARM>(S);
+DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S,
+ uint16_t Machine)
+ : Defined(DefinedImportThunkKind, Name), WrappedSym(S),
+ Data(makeImportThunk(S, Machine)) {}
+Defined *Undefined::getWeakAlias() {
+ // A weak alias may be a weak alias to another symbol, so check recursively.
+ for (Symbol *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias)
+ if (auto *D = dyn_cast<Defined>(A))
+ return D;
+ return nullptr;
+} // namespace coff
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Symbols.h b/contrib/llvm/tools/lld/COFF/Symbols.h
new file mode 100644
index 000000000000..4a8693e22e3c
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Symbols.h
@@ -0,0 +1,436 @@
+//===- Symbols.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Chunks.h"
+#include "Config.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Common/Memory.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include <atomic>
+#include <memory>
+#include <vector>
+namespace lld {
+namespace coff {
+using llvm::object::Archive;
+using llvm::object::COFFSymbolRef;
+using llvm::object::coff_import_header;
+using llvm::object::coff_symbol_generic;
+class ArchiveFile;
+class InputFile;
+class ObjFile;
+class SymbolTable;
+// The base class for real symbol classes.
+class Symbol {
+ enum Kind {
+ // The order of these is significant. We start with the regular defined
+ // symbols as those are the most prevalent and the zero tag is the cheapest
+ // to set. Among the defined kinds, the lower the kind is preferred over
+ // the higher kind when testing whether one symbol should take precedence
+ // over another.
+ DefinedRegularKind = 0,
+ DefinedCommonKind,
+ DefinedLocalImportKind,
+ DefinedImportThunkKind,
+ DefinedImportDataKind,
+ DefinedAbsoluteKind,
+ DefinedSyntheticKind,
+ UndefinedKind,
+ LazyKind,
+ LastDefinedCOFFKind = DefinedCommonKind,
+ LastDefinedKind = DefinedSyntheticKind,
+ };
+ Kind kind() const { return static_cast<Kind>(SymbolKind); }
+ // Returns true if this is an external symbol.
+ bool isExternal() { return IsExternal; }
+ // Returns the symbol name.
+ StringRef getName();
+ void replaceKeepingName(Symbol *Other, size_t Size);
+ // Returns the file from which this symbol was created.
+ InputFile *getFile();
+ // Indicates that this symbol will be included in the final image. Only valid
+ // after calling markLive.
+ bool isLive() const;
+ friend SymbolTable;
+ explicit Symbol(Kind K, StringRef N = "")
+ : SymbolKind(K), IsExternal(true), IsCOMDAT(false),
+ WrittenToSymtab(false), PendingArchiveLoad(false), IsGCRoot(false),
+ IsRuntimePseudoReloc(false), Name(N) {}
+ const unsigned SymbolKind : 8;
+ unsigned IsExternal : 1;
+ // This bit is used by the \c DefinedRegular subclass.
+ unsigned IsCOMDAT : 1;
+ // This bit is used by Writer::createSymbolAndStringTable() to prevent
+ // symbols from being written to the symbol table more than once.
+ unsigned WrittenToSymtab : 1;
+ // True if this symbol was referenced by a regular (non-bitcode) object.
+ unsigned IsUsedInRegularObj : 1;
+ // True if we've seen both a lazy and an undefined symbol with this symbol
+ // name, which means that we have enqueued an archive member load and should
+ // not load any more archive members to resolve the same symbol.
+ unsigned PendingArchiveLoad : 1;
+ /// True if we've already added this symbol to the list of GC roots.
+ unsigned IsGCRoot : 1;
+ unsigned IsRuntimePseudoReloc : 1;
+ StringRef Name;
+// The base class for any defined symbols, including absolute symbols,
+// etc.
+class Defined : public Symbol {
+ Defined(Kind K, StringRef N) : Symbol(K, N) {}
+ static bool classof(const Symbol *S) { return S->kind() <= LastDefinedKind; }
+ // Returns the RVA (relative virtual address) of this symbol. The
+ // writer sets and uses RVAs.
+ uint64_t getRVA();
+ // Returns the chunk containing this symbol. Absolute symbols and __ImageBase
+ // do not have chunks, so this may return null.
+ Chunk *getChunk();
+// Symbols defined via a COFF object file or bitcode file. For COFF files, this
+// stores a coff_symbol_generic*, and names of internal symbols are lazily
+// loaded through that. For bitcode files, Sym is nullptr and the name is stored
+// as a StringRef.
+class DefinedCOFF : public Defined {
+ friend Symbol;
+ DefinedCOFF(Kind K, InputFile *F, StringRef N, const coff_symbol_generic *S)
+ : Defined(K, N), File(F), Sym(S) {}
+ static bool classof(const Symbol *S) {
+ return S->kind() <= LastDefinedCOFFKind;
+ }
+ InputFile *getFile() { return File; }
+ COFFSymbolRef getCOFFSymbol();
+ InputFile *File;
+ const coff_symbol_generic *Sym;
+// Regular defined symbols read from object file symbol tables.
+class DefinedRegular : public DefinedCOFF {
+ DefinedRegular(InputFile *F, StringRef N, bool IsCOMDAT,
+ bool IsExternal = false,
+ const coff_symbol_generic *S = nullptr,
+ SectionChunk *C = nullptr)
+ : DefinedCOFF(DefinedRegularKind, F, N, S), Data(C ? &C->Repl : nullptr) {
+ this->IsExternal = IsExternal;
+ this->IsCOMDAT = IsCOMDAT;
+ }
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedRegularKind;
+ }
+ uint64_t getRVA() const { return (*Data)->getRVA() + Sym->Value; }
+ bool isCOMDAT() const { return IsCOMDAT; }
+ SectionChunk *getChunk() const { return *Data; }
+ uint32_t getValue() const { return Sym->Value; }
+ SectionChunk **Data;
+class DefinedCommon : public DefinedCOFF {
+ DefinedCommon(InputFile *F, StringRef N, uint64_t Size,
+ const coff_symbol_generic *S = nullptr,
+ CommonChunk *C = nullptr)
+ : DefinedCOFF(DefinedCommonKind, F, N, S), Data(C), Size(Size) {
+ this->IsExternal = true;
+ }
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedCommonKind;
+ }
+ uint64_t getRVA() { return Data->getRVA(); }
+ CommonChunk *getChunk() { return Data; }
+ friend SymbolTable;
+ uint64_t getSize() const { return Size; }
+ CommonChunk *Data;
+ uint64_t Size;
+// Absolute symbols.
+class DefinedAbsolute : public Defined {
+ DefinedAbsolute(StringRef N, COFFSymbolRef S)
+ : Defined(DefinedAbsoluteKind, N), VA(S.getValue()) {
+ IsExternal = S.isExternal();
+ }
+ DefinedAbsolute(StringRef N, uint64_t V)
+ : Defined(DefinedAbsoluteKind, N), VA(V) {}
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedAbsoluteKind;
+ }
+ uint64_t getRVA() { return VA - Config->ImageBase; }
+ void setVA(uint64_t V) { VA = V; }
+ // Section index relocations against absolute symbols resolve to
+ // this 16 bit number, and it is the largest valid section index
+ // plus one. This variable keeps it.
+ static uint16_t NumOutputSections;
+ uint64_t VA;
+// This symbol is used for linker-synthesized symbols like __ImageBase and
+// __safe_se_handler_table.
+class DefinedSynthetic : public Defined {
+ explicit DefinedSynthetic(StringRef Name, Chunk *C)
+ : Defined(DefinedSyntheticKind, Name), C(C) {}
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedSyntheticKind;
+ }
+ // A null chunk indicates that this is __ImageBase. Otherwise, this is some
+ // other synthesized chunk, like SEHTableChunk.
+ uint32_t getRVA() { return C ? C->getRVA() : 0; }
+ Chunk *getChunk() { return C; }
+ Chunk *C;
+// This class represents a symbol defined in an archive file. It is
+// created from an archive file header, and it knows how to load an
+// object file from an archive to replace itself with a defined
+// symbol. If the resolver finds both Undefined and Lazy for
+// the same name, it will ask the Lazy to load a file.
+class Lazy : public Symbol {
+ Lazy(ArchiveFile *F, const Archive::Symbol S)
+ : Symbol(LazyKind, S.getName()), File(F), Sym(S) {}
+ static bool classof(const Symbol *S) { return S->kind() == LazyKind; }
+ ArchiveFile *File;
+ friend SymbolTable;
+ const Archive::Symbol Sym;
+// Undefined symbols.
+class Undefined : public Symbol {
+ explicit Undefined(StringRef N) : Symbol(UndefinedKind, N) {}
+ static bool classof(const Symbol *S) { return S->kind() == UndefinedKind; }
+ // An undefined symbol can have a fallback symbol which gives an
+ // undefined symbol a second chance if it would remain undefined.
+ // If it remains undefined, it'll be replaced with whatever the
+ // Alias pointer points to.
+ Symbol *WeakAlias = nullptr;
+ // If this symbol is external weak, try to resolve it to a defined
+ // symbol by searching the chain of fallback symbols. Returns the symbol if
+ // successful, otherwise returns null.
+ Defined *getWeakAlias();
+// Windows-specific classes.
+// This class represents a symbol imported from a DLL. This has two
+// names for internal use and external use. The former is used for
+// name resolution, and the latter is used for the import descriptor
+// table in an output. The former has "__imp_" prefix.
+class DefinedImportData : public Defined {
+ DefinedImportData(StringRef N, ImportFile *F)
+ : Defined(DefinedImportDataKind, N), File(F) {
+ }
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedImportDataKind;
+ }
+ uint64_t getRVA() { return File->Location->getRVA(); }
+ Chunk *getChunk() { return File->Location; }
+ void setLocation(Chunk *AddressTable) { File->Location = AddressTable; }
+ StringRef getDLLName() { return File->DLLName; }
+ StringRef getExternalName() { return File->ExternalName; }
+ uint16_t getOrdinal() { return File->Hdr->OrdinalHint; }
+ ImportFile *File;
+// This class represents a symbol for a jump table entry which jumps
+// to a function in a DLL. Linker are supposed to create such symbols
+// without "__imp_" prefix for all function symbols exported from
+// DLLs, so that you can call DLL functions as regular functions with
+// a regular name. A function pointer is given as a DefinedImportData.
+class DefinedImportThunk : public Defined {
+ DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine);
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedImportThunkKind;
+ }
+ uint64_t getRVA() { return Data->getRVA(); }
+ Chunk *getChunk() { return Data; }
+ DefinedImportData *WrappedSym;
+ Chunk *Data;
+// If you have a symbol "foo" in your object file, a symbol name
+// "__imp_foo" becomes automatically available as a pointer to "foo".
+// This class is for such automatically-created symbols.
+// Yes, this is an odd feature. We didn't intend to implement that.
+// This is here just for compatibility with MSVC.
+class DefinedLocalImport : public Defined {
+ DefinedLocalImport(StringRef N, Defined *S)
+ : Defined(DefinedLocalImportKind, N), Data(make<LocalImportChunk>(S)) {}
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedLocalImportKind;
+ }
+ uint64_t getRVA() { return Data->getRVA(); }
+ Chunk *getChunk() { return Data; }
+ LocalImportChunk *Data;
+inline uint64_t Defined::getRVA() {
+ switch (kind()) {
+ case DefinedAbsoluteKind:
+ return cast<DefinedAbsolute>(this)->getRVA();
+ case DefinedSyntheticKind:
+ return cast<DefinedSynthetic>(this)->getRVA();
+ case DefinedImportDataKind:
+ return cast<DefinedImportData>(this)->getRVA();
+ case DefinedImportThunkKind:
+ return cast<DefinedImportThunk>(this)->getRVA();
+ case DefinedLocalImportKind:
+ return cast<DefinedLocalImport>(this)->getRVA();
+ case DefinedCommonKind:
+ return cast<DefinedCommon>(this)->getRVA();
+ case DefinedRegularKind:
+ return cast<DefinedRegular>(this)->getRVA();
+ case LazyKind:
+ case UndefinedKind:
+ llvm_unreachable("Cannot get the address for an undefined symbol.");
+ }
+ llvm_unreachable("unknown symbol kind");
+inline Chunk *Defined::getChunk() {
+ switch (kind()) {
+ case DefinedRegularKind:
+ return cast<DefinedRegular>(this)->getChunk();
+ case DefinedAbsoluteKind:
+ return nullptr;
+ case DefinedSyntheticKind:
+ return cast<DefinedSynthetic>(this)->getChunk();
+ case DefinedImportDataKind:
+ return cast<DefinedImportData>(this)->getChunk();
+ case DefinedImportThunkKind:
+ return cast<DefinedImportThunk>(this)->getChunk();
+ case DefinedLocalImportKind:
+ return cast<DefinedLocalImport>(this)->getChunk();
+ case DefinedCommonKind:
+ return cast<DefinedCommon>(this)->getChunk();
+ case LazyKind:
+ case UndefinedKind:
+ llvm_unreachable("Cannot get the chunk of an undefined symbol.");
+ }
+ llvm_unreachable("unknown symbol kind");
+// A buffer class that is large enough to hold any Symbol-derived
+// object. We allocate memory using this class and instantiate a symbol
+// using the placement new.
+union SymbolUnion {
+ alignas(DefinedRegular) char A[sizeof(DefinedRegular)];
+ alignas(DefinedCommon) char B[sizeof(DefinedCommon)];
+ alignas(DefinedAbsolute) char C[sizeof(DefinedAbsolute)];
+ alignas(DefinedSynthetic) char D[sizeof(DefinedSynthetic)];
+ alignas(Lazy) char E[sizeof(Lazy)];
+ alignas(Undefined) char F[sizeof(Undefined)];
+ alignas(DefinedImportData) char G[sizeof(DefinedImportData)];
+ alignas(DefinedImportThunk) char H[sizeof(DefinedImportThunk)];
+ alignas(DefinedLocalImport) char I[sizeof(DefinedLocalImport)];
+template <typename T, typename... ArgT>
+void replaceSymbol(Symbol *S, ArgT &&... Arg) {
+ static_assert(std::is_trivially_destructible<T>(),
+ "Symbol types must be trivially destructible");
+ static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small");
+ static_assert(alignof(T) <= alignof(SymbolUnion),
+ "SymbolUnion not aligned enough");
+ assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
+ "Not a Symbol");
+ new (S) T(std::forward<ArgT>(Arg)...);
+} // namespace coff
+std::string toString(coff::Symbol &B);
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp
new file mode 100644
index 000000000000..6acfaf9a4454
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Writer.cpp
@@ -0,0 +1,1745 @@
+//===- Writer.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Writer.h"
+#include "Config.h"
+#include "DLL.h"
+#include "InputFiles.h"
+#include "MapFile.h"
+#include "PDB.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Timer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/xxhash.h"
+#include <algorithm>
+#include <cstdio>
+#include <map>
+#include <memory>
+#include <utility>
+using namespace llvm;
+using namespace llvm::COFF;
+using namespace llvm::object;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::coff;
+/* To re-generate DOSProgram:
+$ cat > /tmp/DOSProgram.asm
+org 0
+ ; Copy cs to ds.
+ push cs
+ pop ds
+ ; Point ds:dx at the $-terminated string.
+ mov dx, str
+ ; Int 21/AH=09h: Write string to standard output.
+ mov ah, 0x9
+ int 0x21
+ ; Int 21/AH=4Ch: Exit with return code (in AL).
+ mov ax, 0x4C01
+ int 0x21
+ db 'This program cannot be run in DOS mode.$'
+align 8, db 0
+$ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin
+$ xxd -i /tmp/DOSProgram.bin
+static unsigned char DOSProgram[] = {
+ 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c,
+ 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72,
+ 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65,
+ 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20,
+ 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00
+static_assert(sizeof(DOSProgram) % 8 == 0,
+ "DOSProgram size must be multiple of 8");
+static const int SectorSize = 512;
+static const int DOSStubSize = sizeof(dos_header) + sizeof(DOSProgram);
+static_assert(DOSStubSize % 8 == 0, "DOSStub size must be multiple of 8");
+static const int NumberOfDataDirectory = 16;
+namespace {
+class DebugDirectoryChunk : public Chunk {
+ DebugDirectoryChunk(const std::vector<Chunk *> &R, bool WriteRepro)
+ : Records(R), WriteRepro(WriteRepro) {}
+ size_t getSize() const override {
+ return (Records.size() + int(WriteRepro)) * sizeof(debug_directory);
+ }
+ void writeTo(uint8_t *B) const override {
+ auto *D = reinterpret_cast<debug_directory *>(B + OutputSectionOff);
+ for (const Chunk *Record : Records) {
+ OutputSection *OS = Record->getOutputSection();
+ uint64_t Offs = OS->getFileOff() + (Record->getRVA() - OS->getRVA());
+ fillEntry(D, COFF::IMAGE_DEBUG_TYPE_CODEVIEW, Record->getSize(),
+ Record->getRVA(), Offs);
+ ++D;
+ }
+ if (WriteRepro) {
+ // FIXME: The COFF spec allows either a 0-sized entry to just say
+ // "the timestamp field is really a hash", or a 4-byte size field
+ // followed by that many bytes containing a longer hash (with the
+ // lowest 4 bytes usually being the timestamp in little-endian order).
+ // Consider storing the full 8 bytes computed by xxHash64 here.
+ fillEntry(D, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0);
+ }
+ }
+ void setTimeDateStamp(uint32_t TimeDateStamp) {
+ for (support::ulittle32_t *TDS : TimeDateStamps)
+ *TDS = TimeDateStamp;
+ }
+ void fillEntry(debug_directory *D, COFF::DebugType DebugType, size_t Size,
+ uint64_t RVA, uint64_t Offs) const {
+ D->Characteristics = 0;
+ D->TimeDateStamp = 0;
+ D->MajorVersion = 0;
+ D->MinorVersion = 0;
+ D->Type = DebugType;
+ D->SizeOfData = Size;
+ D->AddressOfRawData = RVA;
+ D->PointerToRawData = Offs;
+ TimeDateStamps.push_back(&D->TimeDateStamp);
+ }
+ mutable std::vector<support::ulittle32_t *> TimeDateStamps;
+ const std::vector<Chunk *> &Records;
+ bool WriteRepro;
+class CVDebugRecordChunk : public Chunk {
+ size_t getSize() const override {
+ return sizeof(codeview::DebugInfo) + Config->PDBAltPath.size() + 1;
+ }
+ void writeTo(uint8_t *B) const override {
+ // Save off the DebugInfo entry to backfill the file signature (build id)
+ // in Writer::writeBuildId
+ BuildId = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff);
+ // variable sized field (PDB Path)
+ char *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*BuildId));
+ if (!Config->PDBAltPath.empty())
+ memcpy(P, Config->PDBAltPath.data(), Config->PDBAltPath.size());
+ P[Config->PDBAltPath.size()] = '\0';
+ }
+ mutable codeview::DebugInfo *BuildId = nullptr;
+// The writer writes a SymbolTable result to a file.
+class Writer {
+ Writer() : Buffer(errorHandler().OutputBuffer) {}
+ void run();
+ void createSections();
+ void createMiscChunks();
+ void createImportTables();
+ void appendImportThunks();
+ void locateImportTables(
+ std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map);
+ void createExportTable();
+ void mergeSections();
+ void readRelocTargets();
+ void removeUnusedSections();
+ void assignAddresses();
+ void finalizeAddresses();
+ void removeEmptySections();
+ void createSymbolAndStringTable();
+ void openFile(StringRef OutputPath);
+ template <typename PEHeaderTy> void writeHeader();
+ void createSEHTable();
+ void createRuntimePseudoRelocs();
+ void insertCtorDtorSymbols();
+ void createGuardCFTables();
+ void markSymbolsForRVATable(ObjFile *File,
+ ArrayRef<SectionChunk *> SymIdxChunks,
+ SymbolRVASet &TableSymbols);
+ void maybeAddRVATable(SymbolRVASet TableSymbols, StringRef TableSym,
+ StringRef CountSym);
+ void setSectionPermissions();
+ void writeSections();
+ void writeBuildId();
+ void sortExceptionTable();
+ void sortCRTSectionChunks(std::vector<Chunk *> &Chunks);
+ llvm::Optional<coff_symbol16> createSymbol(Defined *D);
+ size_t addEntryToStringTable(StringRef Str);
+ OutputSection *findSection(StringRef Name);
+ void addBaserels();
+ void addBaserelBlocks(std::vector<Baserel> &V);
+ uint32_t getSizeOfInitializedData();
+ std::map<StringRef, std::vector<DefinedImportData *>> binImports();
+ std::unique_ptr<FileOutputBuffer> &Buffer;
+ std::vector<OutputSection *> OutputSections;
+ std::vector<char> Strtab;
+ std::vector<llvm::object::coff_symbol16> OutputSymtab;
+ IdataContents Idata;
+ Chunk *ImportTableStart = nullptr;
+ uint64_t ImportTableSize = 0;
+ Chunk *IATStart = nullptr;
+ uint64_t IATSize = 0;
+ DelayLoadContents DelayIdata;
+ EdataContents Edata;
+ bool SetNoSEHCharacteristic = false;
+ DebugDirectoryChunk *DebugDirectory = nullptr;
+ std::vector<Chunk *> DebugRecords;
+ CVDebugRecordChunk *BuildId = nullptr;
+ ArrayRef<uint8_t> SectionTable;
+ uint64_t FileSize;
+ uint32_t PointerToSymbolTable = 0;
+ uint64_t SizeOfImage;
+ uint64_t SizeOfHeaders;
+ OutputSection *TextSec;
+ OutputSection *RdataSec;
+ OutputSection *BuildidSec;
+ OutputSection *DataSec;
+ OutputSection *PdataSec;
+ OutputSection *IdataSec;
+ OutputSection *EdataSec;
+ OutputSection *DidatSec;
+ OutputSection *RsrcSec;
+ OutputSection *RelocSec;
+ OutputSection *CtorsSec;
+ OutputSection *DtorsSec;
+ // The first and last .pdata sections in the output file.
+ //
+ // We need to keep track of the location of .pdata in whichever section it
+ // gets merged into so that we can sort its contents and emit a correct data
+ // directory entry for the exception table. This is also the case for some
+ // other sections (such as .edata) but because the contents of those sections
+ // are entirely linker-generated we can keep track of their locations using
+ // the chunks that the linker creates. All .pdata chunks come from input
+ // files, so we need to keep track of them separately.
+ Chunk *FirstPdata = nullptr;
+ Chunk *LastPdata;
+} // anonymous namespace
+namespace lld {
+namespace coff {
+static Timer CodeLayoutTimer("Code Layout", Timer::root());
+static Timer DiskCommitTimer("Commit Output File", Timer::root());
+void writeResult() { Writer().run(); }
+void OutputSection::addChunk(Chunk *C) {
+ Chunks.push_back(C);
+ C->setOutputSection(this);
+void OutputSection::insertChunkAtStart(Chunk *C) {
+ Chunks.insert(Chunks.begin(), C);
+ C->setOutputSection(this);
+void OutputSection::setPermissions(uint32_t C) {
+ Header.Characteristics &= ~PermMask;
+ Header.Characteristics |= C;
+void OutputSection::merge(OutputSection *Other) {
+ for (Chunk *C : Other->Chunks)
+ C->setOutputSection(this);
+ Chunks.insert(Chunks.end(), Other->Chunks.begin(), Other->Chunks.end());
+ Other->Chunks.clear();
+// Write the section header to a given buffer.
+void OutputSection::writeHeaderTo(uint8_t *Buf) {
+ auto *Hdr = reinterpret_cast<coff_section *>(Buf);
+ *Hdr = Header;
+ if (StringTableOff) {
+ // If name is too long, write offset into the string table as a name.
+ sprintf(Hdr->Name, "/%d", StringTableOff);
+ } else {
+ assert(!Config->Debug || Name.size() <= COFF::NameSize ||
+ (Hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0);
+ strncpy(Hdr->Name, Name.data(),
+ std::min(Name.size(), (size_t)COFF::NameSize));
+ }
+} // namespace coff
+} // namespace lld
+// Check whether the target address S is in range from a relocation
+// of type RelType at address P.
+static bool isInRange(uint16_t RelType, uint64_t S, uint64_t P, int Margin) {
+ if (Config->Machine == ARMNT) {
+ int64_t Diff = AbsoluteDifference(S, P + 4) + Margin;
+ switch (RelType) {
+ return isInt<21>(Diff);
+ return isInt<25>(Diff);
+ default:
+ return true;
+ }
+ } else if (Config->Machine == ARM64) {
+ int64_t Diff = AbsoluteDifference(S, P) + Margin;
+ switch (RelType) {
+ return isInt<28>(Diff);
+ return isInt<21>(Diff);
+ return isInt<16>(Diff);
+ default:
+ return true;
+ }
+ } else {
+ llvm_unreachable("Unexpected architecture");
+ }
+// Return the last thunk for the given target if it is in range,
+// or create a new one.
+static std::pair<Defined *, bool>
+getThunk(DenseMap<uint64_t, Defined *> &LastThunks, Defined *Target, uint64_t P,
+ uint16_t Type, int Margin) {
+ Defined *&LastThunk = LastThunks[Target->getRVA()];
+ if (LastThunk && isInRange(Type, LastThunk->getRVA(), P, Margin))
+ return {LastThunk, false};
+ Chunk *C;
+ switch (Config->Machine) {
+ case ARMNT:
+ C = make<RangeExtensionThunkARM>(Target);
+ break;
+ case ARM64:
+ C = make<RangeExtensionThunkARM64>(Target);
+ break;
+ default:
+ llvm_unreachable("Unexpected architecture");
+ }
+ Defined *D = make<DefinedSynthetic>("", C);
+ LastThunk = D;
+ return {D, true};
+// This checks all relocations, and for any relocation which isn't in range
+// it adds a thunk after the section chunk that contains the relocation.
+// If the latest thunk for the specific target is in range, that is used
+// instead of creating a new thunk. All range checks are done with the
+// specified margin, to make sure that relocations that originally are in
+// range, but only barely, also get thunks - in case other added thunks makes
+// the target go out of range.
+// After adding thunks, we verify that all relocations are in range (with
+// no extra margin requirements). If this failed, we restart (throwing away
+// the previously created thunks) and retry with a wider margin.
+static bool createThunks(OutputSection *OS, int Margin) {
+ bool AddressesChanged = false;
+ DenseMap<uint64_t, Defined *> LastThunks;
+ size_t ThunksSize = 0;
+ // Recheck Chunks.size() each iteration, since we can insert more
+ // elements into it.
+ for (size_t I = 0; I != OS->Chunks.size(); ++I) {
+ SectionChunk *SC = dyn_cast_or_null<SectionChunk>(OS->Chunks[I]);
+ if (!SC)
+ continue;
+ size_t ThunkInsertionSpot = I + 1;
+ // Try to get a good enough estimate of where new thunks will be placed.
+ // Offset this by the size of the new thunks added so far, to make the
+ // estimate slightly better.
+ size_t ThunkInsertionRVA = SC->getRVA() + SC->getSize() + ThunksSize;
+ for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) {
+ const coff_relocation &Rel = SC->Relocs[J];
+ Symbol *&RelocTarget = SC->RelocTargets[J];
+ // The estimate of the source address P should be pretty accurate,
+ // but we don't know whether the target Symbol address should be
+ // offset by ThunkSize or not (or by some of ThunksSize but not all of
+ // it), giving us some uncertainty once we have added one thunk.
+ uint64_t P = SC->getRVA() + Rel.VirtualAddress + ThunksSize;
+ Defined *Sym = dyn_cast_or_null<Defined>(RelocTarget);
+ if (!Sym)
+ continue;
+ uint64_t S = Sym->getRVA();
+ if (isInRange(Rel.Type, S, P, Margin))
+ continue;
+ // If the target isn't in range, hook it up to an existing or new
+ // thunk.
+ Defined *Thunk;
+ bool WasNew;
+ std::tie(Thunk, WasNew) = getThunk(LastThunks, Sym, P, Rel.Type, Margin);
+ if (WasNew) {
+ Chunk *ThunkChunk = Thunk->getChunk();
+ ThunkChunk->setRVA(
+ ThunkInsertionRVA); // Estimate of where it will be located.
+ ThunkChunk->setOutputSection(OS);
+ OS->Chunks.insert(OS->Chunks.begin() + ThunkInsertionSpot, ThunkChunk);
+ ThunkInsertionSpot++;
+ ThunksSize += ThunkChunk->getSize();
+ ThunkInsertionRVA += ThunkChunk->getSize();
+ AddressesChanged = true;
+ }
+ RelocTarget = Thunk;
+ }
+ }
+ return AddressesChanged;
+// Verify that all relocations are in range, with no extra margin requirements.
+static bool verifyRanges(const std::vector<Chunk *> Chunks) {
+ for (Chunk *C : Chunks) {
+ SectionChunk *SC = dyn_cast_or_null<SectionChunk>(C);
+ if (!SC)
+ continue;
+ for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) {
+ const coff_relocation &Rel = SC->Relocs[J];
+ Symbol *RelocTarget = SC->RelocTargets[J];
+ Defined *Sym = dyn_cast_or_null<Defined>(RelocTarget);
+ if (!Sym)
+ continue;
+ uint64_t P = SC->getRVA() + Rel.VirtualAddress;
+ uint64_t S = Sym->getRVA();
+ if (!isInRange(Rel.Type, S, P, 0))
+ return false;
+ }
+ }
+ return true;
+// Assign addresses and add thunks if necessary.
+void Writer::finalizeAddresses() {
+ assignAddresses();
+ if (Config->Machine != ARMNT && Config->Machine != ARM64)
+ return;
+ size_t OrigNumChunks = 0;
+ for (OutputSection *Sec : OutputSections) {
+ Sec->OrigChunks = Sec->Chunks;
+ OrigNumChunks += Sec->Chunks.size();
+ }
+ int Pass = 0;
+ int Margin = 1024 * 100;
+ while (true) {
+ // First check whether we need thunks at all, or if the previous pass of
+ // adding them turned out ok.
+ bool RangesOk = true;
+ size_t NumChunks = 0;
+ for (OutputSection *Sec : OutputSections) {
+ if (!verifyRanges(Sec->Chunks)) {
+ RangesOk = false;
+ break;
+ }
+ NumChunks += Sec->Chunks.size();
+ }
+ if (RangesOk) {
+ if (Pass > 0)
+ log("Added " + Twine(NumChunks - OrigNumChunks) + " thunks with " +
+ "margin " + Twine(Margin) + " in " + Twine(Pass) + " passes");
+ return;
+ }
+ if (Pass >= 10)
+ fatal("adding thunks hasn't converged after " + Twine(Pass) + " passes");
+ if (Pass > 0) {
+ // If the previous pass didn't work out, reset everything back to the
+ // original conditions before retrying with a wider margin. This should
+ // ideally never happen under real circumstances.
+ for (OutputSection *Sec : OutputSections) {
+ Sec->Chunks = Sec->OrigChunks;
+ for (Chunk *C : Sec->Chunks)
+ C->resetRelocTargets();
+ }
+ Margin *= 2;
+ }
+ // Try adding thunks everywhere where it is needed, with a margin
+ // to avoid things going out of range due to the added thunks.
+ bool AddressesChanged = false;
+ for (OutputSection *Sec : OutputSections)
+ AddressesChanged |= createThunks(Sec, Margin);
+ // If the verification above thought we needed thunks, we should have
+ // added some.
+ assert(AddressesChanged);
+ // Recalculate the layout for the whole image (and verify the ranges at
+ // the start of the next round).
+ assignAddresses();
+ Pass++;
+ }
+// The main function of the writer.
+void Writer::run() {
+ ScopedTimer T1(CodeLayoutTimer);
+ createImportTables();
+ createSections();
+ createMiscChunks();
+ appendImportThunks();
+ createExportTable();
+ mergeSections();
+ readRelocTargets();
+ removeUnusedSections();
+ finalizeAddresses();
+ removeEmptySections();
+ setSectionPermissions();
+ createSymbolAndStringTable();
+ if (FileSize > UINT32_MAX)
+ fatal("image size (" + Twine(FileSize) + ") " +
+ "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")");
+ openFile(Config->OutputFile);
+ if (Config->is64()) {
+ writeHeader<pe32plus_header>();
+ } else {
+ writeHeader<pe32_header>();
+ }
+ writeSections();
+ sortExceptionTable();
+ T1.stop();
+ if (!Config->PDBPath.empty() && Config->Debug) {
+ assert(BuildId);
+ createPDB(Symtab, OutputSections, SectionTable, BuildId->BuildId);
+ }
+ writeBuildId();
+ writeMapFile(OutputSections);
+ ScopedTimer T2(DiskCommitTimer);
+ if (auto E = Buffer->commit())
+ fatal("failed to write the output file: " + toString(std::move(E)));
+static StringRef getOutputSectionName(StringRef Name) {
+ StringRef S = Name.split('$').first;
+ // Treat a later period as a separator for MinGW, for sections like
+ // ".ctors.01234".
+ return S.substr(0, S.find('.', 1));
+// For /order.
+static void sortBySectionOrder(std::vector<Chunk *> &Chunks) {
+ auto GetPriority = [](const Chunk *C) {
+ if (auto *Sec = dyn_cast<SectionChunk>(C))
+ if (Sec->Sym)
+ return Config->Order.lookup(Sec->Sym->getName());
+ return 0;
+ };
+ std::stable_sort(Chunks.begin(), Chunks.end(),
+ [=](const Chunk *A, const Chunk *B) {
+ return GetPriority(A) < GetPriority(B);
+ });
+// Sort concrete section chunks from GNU import libraries.
+// GNU binutils doesn't use short import files, but instead produces import
+// libraries that consist of object files, with section chunks for the .idata$*
+// sections. These are linked just as regular static libraries. Each import
+// library consists of one header object, one object file for every imported
+// symbol, and one trailer object. In order for the .idata tables/lists to
+// be formed correctly, the section chunks within each .idata$* section need
+// to be grouped by library, and sorted alphabetically within each library
+// (which makes sure the header comes first and the trailer last).
+static bool fixGnuImportChunks(
+ std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map) {
+ // Make sure all .idata$* section chunks are mapped as RDATA in order to
+ // be sorted into the same sections as our own synthesized .idata chunks.
+ for (auto &Pair : Map) {
+ StringRef SectionName = Pair.first.first;
+ uint32_t OutChars = Pair.first.second;
+ if (!SectionName.startswith(".idata"))
+ continue;
+ if (OutChars == RDATA)
+ continue;
+ std::vector<Chunk *> &SrcVect = Pair.second;
+ std::vector<Chunk *> &DestVect = Map[{SectionName, RDATA}];
+ DestVect.insert(DestVect.end(), SrcVect.begin(), SrcVect.end());
+ SrcVect.clear();
+ }
+ bool HasIdata = false;
+ // Sort all .idata$* chunks, grouping chunks from the same library,
+ // with alphabetical ordering of the object fils within a library.
+ for (auto &Pair : Map) {
+ StringRef SectionName = Pair.first.first;
+ if (!SectionName.startswith(".idata"))
+ continue;
+ std::vector<Chunk *> &Chunks = Pair.second;
+ if (!Chunks.empty())
+ HasIdata = true;
+ std::stable_sort(Chunks.begin(), Chunks.end(), [&](Chunk *S, Chunk *T) {
+ SectionChunk *SC1 = dyn_cast_or_null<SectionChunk>(S);
+ SectionChunk *SC2 = dyn_cast_or_null<SectionChunk>(T);
+ if (!SC1 || !SC2) {
+ // if SC1, order them ascending. If SC2 or both null,
+ // S is not less than T.
+ return SC1 != nullptr;
+ }
+ // Make a string with "libraryname/objectfile" for sorting, achieving
+ // both grouping by library and sorting of objects within a library,
+ // at once.
+ std::string Key1 =
+ (SC1->File->ParentName + "/" + SC1->File->getName()).str();
+ std::string Key2 =
+ (SC2->File->ParentName + "/" + SC2->File->getName()).str();
+ return Key1 < Key2;
+ });
+ }
+ return HasIdata;
+// Add generated idata chunks, for imported symbols and DLLs, and a
+// terminator in .idata$2.
+static void addSyntheticIdata(
+ IdataContents &Idata,
+ std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map) {
+ Idata.create();
+ // Add the .idata content in the right section groups, to allow
+ // chunks from other linked in object files to be grouped together.
+ // See Microsoft PE/COFF spec 5.4 for details.
+ auto Add = [&](StringRef N, std::vector<Chunk *> &V) {
+ std::vector<Chunk *> &DestVect = Map[{N, RDATA}];
+ DestVect.insert(DestVect.end(), V.begin(), V.end());
+ };
+ // The loader assumes a specific order of data.
+ // Add each type in the correct order.
+ Add(".idata$2", Idata.Dirs);
+ Add(".idata$4", Idata.Lookups);
+ Add(".idata$5", Idata.Addresses);
+ Add(".idata$6", Idata.Hints);
+ Add(".idata$7", Idata.DLLNames);
+// Locate the first Chunk and size of the import directory list and the
+// IAT.
+void Writer::locateImportTables(
+ std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map) {
+ std::vector<Chunk *> &ImportTables = Map[{".idata$2", RDATA}];
+ if (!ImportTables.empty())
+ ImportTableStart = ImportTables.front();
+ for (Chunk *C : ImportTables)
+ ImportTableSize += C->getSize();
+ std::vector<Chunk *> &IAT = Map[{".idata$5", RDATA}];
+ if (!IAT.empty())
+ IATStart = IAT.front();
+ for (Chunk *C : IAT)
+ IATSize += C->getSize();
+// Create output section objects and add them to OutputSections.
+void Writer::createSections() {
+ // First, create the builtin sections.
+ const uint32_t CODE = IMAGE_SCN_CNT_CODE;
+ const uint32_t R = IMAGE_SCN_MEM_READ;
+ const uint32_t W = IMAGE_SCN_MEM_WRITE;
+ const uint32_t X = IMAGE_SCN_MEM_EXECUTE;
+ SmallDenseMap<std::pair<StringRef, uint32_t>, OutputSection *> Sections;
+ auto CreateSection = [&](StringRef Name, uint32_t OutChars) {
+ OutputSection *&Sec = Sections[{Name, OutChars}];
+ if (!Sec) {
+ Sec = make<OutputSection>(Name, OutChars);
+ OutputSections.push_back(Sec);
+ }
+ return Sec;
+ };
+ // Try to match the section order used by link.exe.
+ TextSec = CreateSection(".text", CODE | R | X);
+ CreateSection(".bss", BSS | R | W);
+ RdataSec = CreateSection(".rdata", DATA | R);
+ BuildidSec = CreateSection(".buildid", DATA | R);
+ DataSec = CreateSection(".data", DATA | R | W);
+ PdataSec = CreateSection(".pdata", DATA | R);
+ IdataSec = CreateSection(".idata", DATA | R);
+ EdataSec = CreateSection(".edata", DATA | R);
+ DidatSec = CreateSection(".didat", DATA | R);
+ RsrcSec = CreateSection(".rsrc", DATA | R);
+ RelocSec = CreateSection(".reloc", DATA | DISCARDABLE | R);
+ CtorsSec = CreateSection(".ctors", DATA | R | W);
+ DtorsSec = CreateSection(".dtors", DATA | R | W);
+ // Then bin chunks by name and output characteristics.
+ std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> Map;
+ for (Chunk *C : Symtab->getChunks()) {
+ auto *SC = dyn_cast<SectionChunk>(C);
+ if (SC && !SC->Live) {
+ if (Config->Verbose)
+ SC->printDiscardedMessage();
+ continue;
+ }
+ Map[{C->getSectionName(), C->getOutputCharacteristics()}].push_back(C);
+ }
+ // Even in non MinGW cases, we might need to link against GNU import
+ // libraries.
+ bool HasIdata = fixGnuImportChunks(Map);
+ if (!Idata.empty())
+ HasIdata = true;
+ if (HasIdata)
+ addSyntheticIdata(Idata, Map);
+ // Process an /order option.
+ if (!Config->Order.empty())
+ for (auto &Pair : Map)
+ sortBySectionOrder(Pair.second);
+ if (HasIdata)
+ locateImportTables(Map);
+ // Then create an OutputSection for each section.
+ // '$' and all following characters in input section names are
+ // discarded when determining output section. So, .text$foo
+ // contributes to .text, for example. See PE/COFF spec 3.2.
+ for (auto &Pair : Map) {
+ StringRef Name = getOutputSectionName(Pair.first.first);
+ uint32_t OutChars = Pair.first.second;
+ if (Name == ".CRT") {
+ // In link.exe, there is a special case for the I386 target where .CRT
+ // sections are treated as if they have output characteristics DATA | R if
+ // their characteristics are DATA | R | W. This implements the same
+ // special case for all architectures.
+ OutChars = DATA | R;
+ log("Processing section " + Pair.first.first + " -> " + Name);
+ sortCRTSectionChunks(Pair.second);
+ }
+ OutputSection *Sec = CreateSection(Name, OutChars);
+ std::vector<Chunk *> &Chunks = Pair.second;
+ for (Chunk *C : Chunks)
+ Sec->addChunk(C);
+ }
+ // Finally, move some output sections to the end.
+ auto SectionOrder = [&](OutputSection *S) {
+ // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because
+ // the loader cannot handle holes. Stripping can remove other discardable ones
+ // than .reloc, which is first of them (created early).
+ if (S->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
+ return 2;
+ // .rsrc should come at the end of the non-discardable sections because its
+ // size may change by the Win32 UpdateResources() function, causing
+ // subsequent sections to move (see https://crbug.com/827082).
+ if (S == RsrcSec)
+ return 1;
+ return 0;
+ };
+ std::stable_sort(OutputSections.begin(), OutputSections.end(),
+ [&](OutputSection *S, OutputSection *T) {
+ return SectionOrder(S) < SectionOrder(T);
+ });
+void Writer::createMiscChunks() {
+ for (auto &P : MergeChunk::Instances)
+ RdataSec->addChunk(P.second);
+ // Create thunks for locally-dllimported symbols.
+ if (!Symtab->LocalImportChunks.empty()) {
+ for (Chunk *C : Symtab->LocalImportChunks)
+ RdataSec->addChunk(C);
+ }
+ // Create Debug Information Chunks
+ OutputSection *DebugInfoSec = Config->MinGW ? BuildidSec : RdataSec;
+ if (Config->Debug || Config->Repro) {
+ DebugDirectory = make<DebugDirectoryChunk>(DebugRecords, Config->Repro);
+ DebugInfoSec->addChunk(DebugDirectory);
+ }
+ if (Config->Debug) {
+ // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We
+ // output a PDB no matter what, and this chunk provides the only means of
+ // allowing a debugger to match a PDB and an executable. So we need it even
+ // if we're ultimately not going to write CodeView data to the PDB.
+ BuildId = make<CVDebugRecordChunk>();
+ DebugRecords.push_back(BuildId);
+ for (Chunk *C : DebugRecords)
+ DebugInfoSec->addChunk(C);
+ }
+ // Create SEH table. x86-only.
+ if (Config->Machine == I386)
+ createSEHTable();
+ // Create /guard:cf tables if requested.
+ if (Config->GuardCF != GuardCFLevel::Off)
+ createGuardCFTables();
+ if (Config->MinGW) {
+ createRuntimePseudoRelocs();
+ insertCtorDtorSymbols();
+ }
+// Create .idata section for the DLL-imported symbol table.
+// The format of this section is inherently Windows-specific.
+// IdataContents class abstracted away the details for us,
+// so we just let it create chunks and add them to the section.
+void Writer::createImportTables() {
+ // Initialize DLLOrder so that import entries are ordered in
+ // the same order as in the command line. (That affects DLL
+ // initialization order, and this ordering is MSVC-compatible.)
+ for (ImportFile *File : ImportFile::Instances) {
+ if (!File->Live)
+ continue;
+ std::string DLL = StringRef(File->DLLName).lower();
+ if (Config->DLLOrder.count(DLL) == 0)
+ Config->DLLOrder[DLL] = Config->DLLOrder.size();
+ if (File->ImpSym && !isa<DefinedImportData>(File->ImpSym))
+ fatal(toString(*File->ImpSym) + " was replaced");
+ DefinedImportData *ImpSym = cast_or_null<DefinedImportData>(File->ImpSym);
+ if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) {
+ if (!File->ThunkSym)
+ fatal("cannot delay-load " + toString(File) +
+ " due to import of data: " + toString(*ImpSym));
+ DelayIdata.add(ImpSym);
+ } else {
+ Idata.add(ImpSym);
+ }
+ }
+void Writer::appendImportThunks() {
+ if (ImportFile::Instances.empty())
+ return;
+ for (ImportFile *File : ImportFile::Instances) {
+ if (!File->Live)
+ continue;
+ if (!File->ThunkSym)
+ continue;
+ if (!isa<DefinedImportThunk>(File->ThunkSym))
+ fatal(toString(*File->ThunkSym) + " was replaced");
+ DefinedImportThunk *Thunk = cast<DefinedImportThunk>(File->ThunkSym);
+ if (File->ThunkLive)
+ TextSec->addChunk(Thunk->getChunk());
+ }
+ if (!DelayIdata.empty()) {
+ Defined *Helper = cast<Defined>(Config->DelayLoadHelper);
+ DelayIdata.create(Helper);
+ for (Chunk *C : DelayIdata.getChunks())
+ DidatSec->addChunk(C);
+ for (Chunk *C : DelayIdata.getDataChunks())
+ DataSec->addChunk(C);
+ for (Chunk *C : DelayIdata.getCodeChunks())
+ TextSec->addChunk(C);
+ }
+void Writer::createExportTable() {
+ if (Config->Exports.empty())
+ return;
+ for (Chunk *C : Edata.Chunks)
+ EdataSec->addChunk(C);
+void Writer::removeUnusedSections() {
+ // Remove sections that we can be sure won't get content, to avoid
+ // allocating space for their section headers.
+ auto IsUnused = [this](OutputSection *S) {
+ if (S == RelocSec)
+ return false; // This section is populated later.
+ // MergeChunks have zero size at this point, as their size is finalized
+ // later. Only remove sections that have no Chunks at all.
+ return S->Chunks.empty();
+ };
+ OutputSections.erase(
+ std::remove_if(OutputSections.begin(), OutputSections.end(), IsUnused),
+ OutputSections.end());
+// The Windows loader doesn't seem to like empty sections,
+// so we remove them if any.
+void Writer::removeEmptySections() {
+ auto IsEmpty = [](OutputSection *S) { return S->getVirtualSize() == 0; };
+ OutputSections.erase(
+ std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty),
+ OutputSections.end());
+ uint32_t Idx = 1;
+ for (OutputSection *Sec : OutputSections)
+ Sec->SectionIndex = Idx++;
+size_t Writer::addEntryToStringTable(StringRef Str) {
+ assert(Str.size() > COFF::NameSize);
+ size_t OffsetOfEntry = Strtab.size() + 4; // +4 for the size field
+ Strtab.insert(Strtab.end(), Str.begin(), Str.end());
+ Strtab.push_back('\0');
+ return OffsetOfEntry;
+Optional<coff_symbol16> Writer::createSymbol(Defined *Def) {
+ coff_symbol16 Sym;
+ switch (Def->kind()) {
+ case Symbol::DefinedAbsoluteKind:
+ Sym.Value = Def->getRVA();
+ Sym.SectionNumber = IMAGE_SYM_ABSOLUTE;
+ break;
+ case Symbol::DefinedSyntheticKind:
+ // Relative symbols are unrepresentable in a COFF symbol table.
+ return None;
+ default: {
+ // Don't write symbols that won't be written to the output to the symbol
+ // table.
+ Chunk *C = Def->getChunk();
+ if (!C)
+ return None;
+ OutputSection *OS = C->getOutputSection();
+ if (!OS)
+ return None;
+ Sym.Value = Def->getRVA() - OS->getRVA();
+ Sym.SectionNumber = OS->SectionIndex;
+ break;
+ }
+ }
+ StringRef Name = Def->getName();
+ if (Name.size() > COFF::NameSize) {
+ Sym.Name.Offset.Zeroes = 0;
+ Sym.Name.Offset.Offset = addEntryToStringTable(Name);
+ } else {
+ memset(Sym.Name.ShortName, 0, COFF::NameSize);
+ memcpy(Sym.Name.ShortName, Name.data(), Name.size());
+ }
+ if (auto *D = dyn_cast<DefinedCOFF>(Def)) {
+ COFFSymbolRef Ref = D->getCOFFSymbol();
+ Sym.Type = Ref.getType();
+ Sym.StorageClass = Ref.getStorageClass();
+ } else {
+ }
+ Sym.NumberOfAuxSymbols = 0;
+ return Sym;
+void Writer::createSymbolAndStringTable() {
+ // PE/COFF images are limited to 8 byte section names. Longer names can be
+ // supported by writing a non-standard string table, but this string table is
+ // not mapped at runtime and the long names will therefore be inaccessible.
+ // link.exe always truncates section names to 8 bytes, whereas binutils always
+ // preserves long section names via the string table. LLD adopts a hybrid
+ // solution where discardable sections have long names preserved and
+ // non-discardable sections have their names truncated, to ensure that any
+ // section which is mapped at runtime also has its name mapped at runtime.
+ for (OutputSection *Sec : OutputSections) {
+ if (Sec->Name.size() <= COFF::NameSize)
+ continue;
+ if ((Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0)
+ continue;
+ Sec->setStringTableOff(addEntryToStringTable(Sec->Name));
+ }
+ if (Config->DebugDwarf || Config->DebugSymtab) {
+ for (ObjFile *File : ObjFile::Instances) {
+ for (Symbol *B : File->getSymbols()) {
+ auto *D = dyn_cast_or_null<Defined>(B);
+ if (!D || D->WrittenToSymtab)
+ continue;
+ D->WrittenToSymtab = true;
+ if (Optional<coff_symbol16> Sym = createSymbol(D))
+ OutputSymtab.push_back(*Sym);
+ }
+ }
+ }
+ if (OutputSymtab.empty() && Strtab.empty())
+ return;
+ // We position the symbol table to be adjacent to the end of the last section.
+ uint64_t FileOff = FileSize;
+ PointerToSymbolTable = FileOff;
+ FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
+ FileOff += 4 + Strtab.size();
+ FileSize = alignTo(FileOff, SectorSize);
+void Writer::mergeSections() {
+ if (!PdataSec->Chunks.empty()) {
+ FirstPdata = PdataSec->Chunks.front();
+ LastPdata = PdataSec->Chunks.back();
+ }
+ for (auto &P : Config->Merge) {
+ StringRef ToName = P.second;
+ if (P.first == ToName)
+ continue;
+ StringSet<> Names;
+ while (1) {
+ if (!Names.insert(ToName).second)
+ fatal("/merge: cycle found for section '" + P.first + "'");
+ auto I = Config->Merge.find(ToName);
+ if (I == Config->Merge.end())
+ break;
+ ToName = I->second;
+ }
+ OutputSection *From = findSection(P.first);
+ OutputSection *To = findSection(ToName);
+ if (!From)
+ continue;
+ if (!To) {
+ From->Name = ToName;
+ continue;
+ }
+ To->merge(From);
+ }
+// Visits all sections to initialize their relocation targets.
+void Writer::readRelocTargets() {
+ for (OutputSection *Sec : OutputSections)
+ for_each(parallel::par, Sec->Chunks.begin(), Sec->Chunks.end(),
+ [&](Chunk *C) { C->readRelocTargets(); });
+// Visits all sections to assign incremental, non-overlapping RVAs and
+// file offsets.
+void Writer::assignAddresses() {
+ SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
+ sizeof(data_directory) * NumberOfDataDirectory +
+ sizeof(coff_section) * OutputSections.size();
+ SizeOfHeaders +=
+ Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
+ SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize);
+ uint64_t RVA = PageSize; // The first page is kept unmapped.
+ FileSize = SizeOfHeaders;
+ for (OutputSection *Sec : OutputSections) {
+ if (Sec == RelocSec)
+ addBaserels();
+ uint64_t RawSize = 0, VirtualSize = 0;
+ Sec->Header.VirtualAddress = RVA;
+ for (Chunk *C : Sec->Chunks) {
+ VirtualSize = alignTo(VirtualSize, C->Alignment);
+ C->setRVA(RVA + VirtualSize);
+ C->OutputSectionOff = VirtualSize;
+ C->finalizeContents();
+ VirtualSize += C->getSize();
+ if (C->hasData())
+ RawSize = alignTo(VirtualSize, SectorSize);
+ }
+ if (VirtualSize > UINT32_MAX)
+ error("section larger than 4 GiB: " + Sec->Name);
+ Sec->Header.VirtualSize = VirtualSize;
+ Sec->Header.SizeOfRawData = RawSize;
+ if (RawSize != 0)
+ Sec->Header.PointerToRawData = FileSize;
+ RVA += alignTo(VirtualSize, PageSize);
+ FileSize += alignTo(RawSize, SectorSize);
+ }
+ SizeOfImage = alignTo(RVA, PageSize);
+template <typename PEHeaderTy> void Writer::writeHeader() {
+ // Write DOS header. For backwards compatibility, the first part of a PE/COFF
+ // executable consists of an MS-DOS MZ executable. If the executable is run
+ // under DOS, that program gets run (usually to just print an error message).
+ // When run under Windows, the loader looks at AddressOfNewExeHeader and uses
+ // the PE header instead.
+ uint8_t *Buf = Buffer->getBufferStart();
+ auto *DOS = reinterpret_cast<dos_header *>(Buf);
+ Buf += sizeof(dos_header);
+ DOS->Magic[0] = 'M';
+ DOS->Magic[1] = 'Z';
+ DOS->UsedBytesInTheLastPage = DOSStubSize % 512;
+ DOS->FileSizeInPages = divideCeil(DOSStubSize, 512);
+ DOS->HeaderSizeInParagraphs = sizeof(dos_header) / 16;
+ DOS->AddressOfRelocationTable = sizeof(dos_header);
+ DOS->AddressOfNewExeHeader = DOSStubSize;
+ // Write DOS program.
+ memcpy(Buf, DOSProgram, sizeof(DOSProgram));
+ Buf += sizeof(DOSProgram);
+ // Write PE magic
+ memcpy(Buf, PEMagic, sizeof(PEMagic));
+ Buf += sizeof(PEMagic);
+ // Write COFF header
+ auto *COFF = reinterpret_cast<coff_file_header *>(Buf);
+ Buf += sizeof(*COFF);
+ COFF->Machine = Config->Machine;
+ COFF->NumberOfSections = OutputSections.size();
+ if (Config->LargeAddressAware)
+ if (!Config->is64())
+ COFF->Characteristics |= IMAGE_FILE_32BIT_MACHINE;
+ if (Config->DLL)
+ COFF->Characteristics |= IMAGE_FILE_DLL;
+ if (!Config->Relocatable)
+ COFF->SizeOfOptionalHeader =
+ sizeof(PEHeaderTy) + sizeof(data_directory) * NumberOfDataDirectory;
+ // Write PE header
+ auto *PE = reinterpret_cast<PEHeaderTy *>(Buf);
+ Buf += sizeof(*PE);
+ PE->Magic = Config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32;
+ // If {Major,Minor}LinkerVersion is left at 0.0, then for some
+ // reason signing the resulting PE file with Authenticode produces a
+ // signature that fails to validate on Windows 7 (but is OK on 10).
+ // Set it to 14.0, which is what VS2015 outputs, and which avoids
+ // that problem.
+ PE->MajorLinkerVersion = 14;
+ PE->MinorLinkerVersion = 0;
+ PE->ImageBase = Config->ImageBase;
+ PE->SectionAlignment = PageSize;
+ PE->FileAlignment = SectorSize;
+ PE->MajorImageVersion = Config->MajorImageVersion;
+ PE->MinorImageVersion = Config->MinorImageVersion;
+ PE->MajorOperatingSystemVersion = Config->MajorOSVersion;
+ PE->MinorOperatingSystemVersion = Config->MinorOSVersion;
+ PE->MajorSubsystemVersion = Config->MajorOSVersion;
+ PE->MinorSubsystemVersion = Config->MinorOSVersion;
+ PE->Subsystem = Config->Subsystem;
+ PE->SizeOfImage = SizeOfImage;
+ PE->SizeOfHeaders = SizeOfHeaders;
+ if (!Config->NoEntry) {
+ Defined *Entry = cast<Defined>(Config->Entry);
+ PE->AddressOfEntryPoint = Entry->getRVA();
+ // Pointer to thumb code must have the LSB set, so adjust it.
+ if (Config->Machine == ARMNT)
+ PE->AddressOfEntryPoint |= 1;
+ }
+ PE->SizeOfStackReserve = Config->StackReserve;
+ PE->SizeOfStackCommit = Config->StackCommit;
+ PE->SizeOfHeapReserve = Config->HeapReserve;
+ PE->SizeOfHeapCommit = Config->HeapCommit;
+ if (Config->AppContainer)
+ if (Config->DynamicBase)
+ if (Config->HighEntropyVA)
+ if (!Config->AllowBind)
+ if (Config->NxCompat)
+ if (!Config->AllowIsolation)
+ if (Config->GuardCF != GuardCFLevel::Off)
+ if (Config->IntegrityCheck)
+ if (SetNoSEHCharacteristic)
+ if (Config->TerminalServerAware)
+ PE->NumberOfRvaAndSize = NumberOfDataDirectory;
+ if (TextSec->getVirtualSize()) {
+ PE->BaseOfCode = TextSec->getRVA();
+ PE->SizeOfCode = TextSec->getRawSize();
+ }
+ PE->SizeOfInitializedData = getSizeOfInitializedData();
+ // Write data directory
+ auto *Dir = reinterpret_cast<data_directory *>(Buf);
+ Buf += sizeof(*Dir) * NumberOfDataDirectory;
+ if (!Config->Exports.empty()) {
+ Dir[EXPORT_TABLE].RelativeVirtualAddress = Edata.getRVA();
+ Dir[EXPORT_TABLE].Size = Edata.getSize();
+ }
+ if (ImportTableStart) {
+ Dir[IMPORT_TABLE].RelativeVirtualAddress = ImportTableStart->getRVA();
+ Dir[IMPORT_TABLE].Size = ImportTableSize;
+ }
+ if (IATStart) {
+ Dir[IAT].RelativeVirtualAddress = IATStart->getRVA();
+ Dir[IAT].Size = IATSize;
+ }
+ if (RsrcSec->getVirtualSize()) {
+ Dir[RESOURCE_TABLE].RelativeVirtualAddress = RsrcSec->getRVA();
+ Dir[RESOURCE_TABLE].Size = RsrcSec->getVirtualSize();
+ }
+ if (FirstPdata) {
+ Dir[EXCEPTION_TABLE].RelativeVirtualAddress = FirstPdata->getRVA();
+ LastPdata->getRVA() + LastPdata->getSize() - FirstPdata->getRVA();
+ }
+ if (RelocSec->getVirtualSize()) {
+ Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = RelocSec->getRVA();
+ Dir[BASE_RELOCATION_TABLE].Size = RelocSec->getVirtualSize();
+ }
+ if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) {
+ if (Defined *B = dyn_cast<Defined>(Sym)) {
+ Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA();
+ Dir[TLS_TABLE].Size = Config->is64()
+ ? sizeof(object::coff_tls_directory64)
+ : sizeof(object::coff_tls_directory32);
+ }
+ }
+ if (DebugDirectory) {
+ Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA();
+ Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize();
+ }
+ if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) {
+ if (auto *B = dyn_cast<DefinedRegular>(Sym)) {
+ SectionChunk *SC = B->getChunk();
+ assert(B->getRVA() >= SC->getRVA());
+ uint64_t OffsetInChunk = B->getRVA() - SC->getRVA();
+ if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize())
+ fatal("_load_config_used is malformed");
+ ArrayRef<uint8_t> SecContents = SC->getContents();
+ uint32_t LoadConfigSize =
+ *reinterpret_cast<const ulittle32_t *>(&SecContents[OffsetInChunk]);
+ if (OffsetInChunk + LoadConfigSize > SC->getSize())
+ fatal("_load_config_used is too large");
+ Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA();
+ Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize;
+ }
+ }
+ if (!DelayIdata.empty()) {
+ Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress =
+ DelayIdata.getDirRVA();
+ Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize();
+ }
+ // Write section table
+ for (OutputSection *Sec : OutputSections) {
+ Sec->writeHeaderTo(Buf);
+ Buf += sizeof(coff_section);
+ }
+ SectionTable = ArrayRef<uint8_t>(
+ Buf - OutputSections.size() * sizeof(coff_section), Buf);
+ if (OutputSymtab.empty() && Strtab.empty())
+ return;
+ COFF->PointerToSymbolTable = PointerToSymbolTable;
+ uint32_t NumberOfSymbols = OutputSymtab.size();
+ COFF->NumberOfSymbols = NumberOfSymbols;
+ auto *SymbolTable = reinterpret_cast<coff_symbol16 *>(
+ Buffer->getBufferStart() + COFF->PointerToSymbolTable);
+ for (size_t I = 0; I != NumberOfSymbols; ++I)
+ SymbolTable[I] = OutputSymtab[I];
+ // Create the string table, it follows immediately after the symbol table.
+ // The first 4 bytes is length including itself.
+ Buf = reinterpret_cast<uint8_t *>(&SymbolTable[NumberOfSymbols]);
+ write32le(Buf, Strtab.size() + 4);
+ if (!Strtab.empty())
+ memcpy(Buf + 4, Strtab.data(), Strtab.size());
+void Writer::openFile(StringRef Path) {
+ Buffer = CHECK(
+ FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable),
+ "failed to open " + Path);
+void Writer::createSEHTable() {
+ // Set the no SEH characteristic on x86 binaries unless we find exception
+ // handlers.
+ SetNoSEHCharacteristic = true;
+ SymbolRVASet Handlers;
+ for (ObjFile *File : ObjFile::Instances) {
+ // FIXME: We should error here instead of earlier unless /safeseh:no was
+ // passed.
+ if (!File->hasSafeSEH())
+ return;
+ markSymbolsForRVATable(File, File->getSXDataChunks(), Handlers);
+ }
+ // Remove the "no SEH" characteristic if all object files were built with
+ // safeseh, we found some exception handlers, and there is a load config in
+ // the object.
+ SetNoSEHCharacteristic =
+ Handlers.empty() || !Symtab->findUnderscore("_load_config_used");
+ maybeAddRVATable(std::move(Handlers), "__safe_se_handler_table",
+ "__safe_se_handler_count");
+// Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set
+// cannot contain duplicates. Therefore, the set is uniqued by Chunk and the
+// symbol's offset into that Chunk.
+static void addSymbolToRVASet(SymbolRVASet &RVASet, Defined *S) {
+ Chunk *C = S->getChunk();
+ if (auto *SC = dyn_cast<SectionChunk>(C))
+ C = SC->Repl; // Look through ICF replacement.
+ uint32_t Off = S->getRVA() - (C ? C->getRVA() : 0);
+ RVASet.insert({C, Off});
+// Given a symbol, add it to the GFIDs table if it is a live, defined, function
+// symbol in an executable section.
+static void maybeAddAddressTakenFunction(SymbolRVASet &AddressTakenSyms,
+ Symbol *S) {
+ auto *D = dyn_cast_or_null<DefinedCOFF>(S);
+ // Ignore undefined symbols and references to non-functions (e.g. globals and
+ // labels).
+ if (!D ||
+ D->getCOFFSymbol().getComplexType() != COFF::IMAGE_SYM_DTYPE_FUNCTION)
+ return;
+ // Mark the symbol as address taken if it's in an executable section.
+ Chunk *RefChunk = D->getChunk();
+ OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr;
+ if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE)
+ addSymbolToRVASet(AddressTakenSyms, D);
+// Visit all relocations from all section contributions of this object file and
+// mark the relocation target as address-taken.
+static void markSymbolsWithRelocations(ObjFile *File,
+ SymbolRVASet &UsedSymbols) {
+ for (Chunk *C : File->getChunks()) {
+ // We only care about live section chunks. Common chunks and other chunks
+ // don't generally contain relocations.
+ SectionChunk *SC = dyn_cast<SectionChunk>(C);
+ if (!SC || !SC->Live)
+ continue;
+ for (const coff_relocation &Reloc : SC->Relocs) {
+ if (Config->Machine == I386 && Reloc.Type == COFF::IMAGE_REL_I386_REL32)
+ // Ignore relative relocations on x86. On x86_64 they can't be ignored
+ // since they're also used to compute absolute addresses.
+ continue;
+ Symbol *Ref = SC->File->getSymbol(Reloc.SymbolTableIndex);
+ maybeAddAddressTakenFunction(UsedSymbols, Ref);
+ }
+ }
+// Create the guard function id table. This is a table of RVAs of all
+// address-taken functions. It is sorted and uniqued, just like the safe SEH
+// table.
+void Writer::createGuardCFTables() {
+ SymbolRVASet AddressTakenSyms;
+ SymbolRVASet LongJmpTargets;
+ for (ObjFile *File : ObjFile::Instances) {
+ // If the object was compiled with /guard:cf, the address taken symbols
+ // are in .gfids$y sections, and the longjmp targets are in .gljmp$y
+ // sections. If the object was not compiled with /guard:cf, we assume there
+ // were no setjmp targets, and that all code symbols with relocations are
+ // possibly address-taken.
+ if (File->hasGuardCF()) {
+ markSymbolsForRVATable(File, File->getGuardFidChunks(), AddressTakenSyms);
+ markSymbolsForRVATable(File, File->getGuardLJmpChunks(), LongJmpTargets);
+ } else {
+ markSymbolsWithRelocations(File, AddressTakenSyms);
+ }
+ }
+ // Mark the image entry as address-taken.
+ if (Config->Entry)
+ maybeAddAddressTakenFunction(AddressTakenSyms, Config->Entry);
+ // Mark exported symbols in executable sections as address-taken.
+ for (Export &E : Config->Exports)
+ maybeAddAddressTakenFunction(AddressTakenSyms, E.Sym);
+ // Ensure sections referenced in the gfid table are 16-byte aligned.
+ for (const ChunkAndOffset &C : AddressTakenSyms)
+ if (C.InputChunk->Alignment < 16)
+ C.InputChunk->Alignment = 16;
+ maybeAddRVATable(std::move(AddressTakenSyms), "__guard_fids_table",
+ "__guard_fids_count");
+ // Add the longjmp target table unless the user told us not to.
+ if (Config->GuardCF == GuardCFLevel::Full)
+ maybeAddRVATable(std::move(LongJmpTargets), "__guard_longjmp_table",
+ "__guard_longjmp_count");
+ // Set __guard_flags, which will be used in the load config to indicate that
+ // /guard:cf was enabled.
+ uint32_t GuardFlags = uint32_t(coff_guard_flags::CFInstrumented) |
+ uint32_t(coff_guard_flags::HasFidTable);
+ if (Config->GuardCF == GuardCFLevel::Full)
+ GuardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable);
+ Symbol *FlagSym = Symtab->findUnderscore("__guard_flags");
+ cast<DefinedAbsolute>(FlagSym)->setVA(GuardFlags);
+// Take a list of input sections containing symbol table indices and add those
+// symbols to an RVA table. The challenge is that symbol RVAs are not known and
+// depend on the table size, so we can't directly build a set of integers.
+void Writer::markSymbolsForRVATable(ObjFile *File,
+ ArrayRef<SectionChunk *> SymIdxChunks,
+ SymbolRVASet &TableSymbols) {
+ for (SectionChunk *C : SymIdxChunks) {
+ // Skip sections discarded by linker GC. This comes up when a .gfids section
+ // is associated with something like a vtable and the vtable is discarded.
+ // In this case, the associated gfids section is discarded, and we don't
+ // mark the virtual member functions as address-taken by the vtable.
+ if (!C->Live)
+ continue;
+ // Validate that the contents look like symbol table indices.
+ ArrayRef<uint8_t> Data = C->getContents();
+ if (Data.size() % 4 != 0) {
+ warn("ignoring " + C->getSectionName() +
+ " symbol table index section in object " + toString(File));
+ continue;
+ }
+ // Read each symbol table index and check if that symbol was included in the
+ // final link. If so, add it to the table symbol set.
+ ArrayRef<ulittle32_t> SymIndices(
+ reinterpret_cast<const ulittle32_t *>(Data.data()), Data.size() / 4);
+ ArrayRef<Symbol *> ObjSymbols = File->getSymbols();
+ for (uint32_t SymIndex : SymIndices) {
+ if (SymIndex >= ObjSymbols.size()) {
+ warn("ignoring invalid symbol table index in section " +
+ C->getSectionName() + " in object " + toString(File));
+ continue;
+ }
+ if (Symbol *S = ObjSymbols[SymIndex]) {
+ if (S->isLive())
+ addSymbolToRVASet(TableSymbols, cast<Defined>(S));
+ }
+ }
+ }
+// Replace the absolute table symbol with a synthetic symbol pointing to
+// TableChunk so that we can emit base relocations for it and resolve section
+// relative relocations.
+void Writer::maybeAddRVATable(SymbolRVASet TableSymbols, StringRef TableSym,
+ StringRef CountSym) {
+ if (TableSymbols.empty())
+ return;
+ RVATableChunk *TableChunk = make<RVATableChunk>(std::move(TableSymbols));
+ RdataSec->addChunk(TableChunk);
+ Symbol *T = Symtab->findUnderscore(TableSym);
+ Symbol *C = Symtab->findUnderscore(CountSym);
+ replaceSymbol<DefinedSynthetic>(T, T->getName(), TableChunk);
+ cast<DefinedAbsolute>(C)->setVA(TableChunk->getSize() / 4);
+// MinGW specific. Gather all relocations that are imported from a DLL even
+// though the code didn't expect it to, produce the table that the runtime
+// uses for fixing them up, and provide the synthetic symbols that the
+// runtime uses for finding the table.
+void Writer::createRuntimePseudoRelocs() {
+ std::vector<RuntimePseudoReloc> Rels;
+ for (Chunk *C : Symtab->getChunks()) {
+ auto *SC = dyn_cast<SectionChunk>(C);
+ if (!SC || !SC->Live)
+ continue;
+ SC->getRuntimePseudoRelocs(Rels);
+ }
+ if (!Rels.empty())
+ log("Writing " + Twine(Rels.size()) + " runtime pseudo relocations");
+ PseudoRelocTableChunk *Table = make<PseudoRelocTableChunk>(Rels);
+ RdataSec->addChunk(Table);
+ EmptyChunk *EndOfList = make<EmptyChunk>();
+ RdataSec->addChunk(EndOfList);
+ Symbol *HeadSym = Symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__");
+ Symbol *EndSym = Symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__");
+ replaceSymbol<DefinedSynthetic>(HeadSym, HeadSym->getName(), Table);
+ replaceSymbol<DefinedSynthetic>(EndSym, EndSym->getName(), EndOfList);
+// MinGW specific.
+// The MinGW .ctors and .dtors lists have sentinels at each end;
+// a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end.
+// There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__
+// and __DTOR_LIST__ respectively.
+void Writer::insertCtorDtorSymbols() {
+ AbsolutePointerChunk *CtorListHead = make<AbsolutePointerChunk>(-1);
+ AbsolutePointerChunk *CtorListEnd = make<AbsolutePointerChunk>(0);
+ AbsolutePointerChunk *DtorListHead = make<AbsolutePointerChunk>(-1);
+ AbsolutePointerChunk *DtorListEnd = make<AbsolutePointerChunk>(0);
+ CtorsSec->insertChunkAtStart(CtorListHead);
+ CtorsSec->addChunk(CtorListEnd);
+ DtorsSec->insertChunkAtStart(DtorListHead);
+ DtorsSec->addChunk(DtorListEnd);
+ Symbol *CtorListSym = Symtab->findUnderscore("__CTOR_LIST__");
+ Symbol *DtorListSym = Symtab->findUnderscore("__DTOR_LIST__");
+ replaceSymbol<DefinedSynthetic>(CtorListSym, CtorListSym->getName(),
+ CtorListHead);
+ replaceSymbol<DefinedSynthetic>(DtorListSym, DtorListSym->getName(),
+ DtorListHead);
+// Handles /section options to allow users to overwrite
+// section attributes.
+void Writer::setSectionPermissions() {
+ for (auto &P : Config->Section) {
+ StringRef Name = P.first;
+ uint32_t Perm = P.second;
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Name == Name)
+ Sec->setPermissions(Perm);
+ }
+// Write section contents to a mmap'ed file.
+void Writer::writeSections() {
+ // Record the number of sections to apply section index relocations
+ // against absolute symbols. See applySecIdx in Chunks.cpp..
+ DefinedAbsolute::NumOutputSections = OutputSections.size();
+ uint8_t *Buf = Buffer->getBufferStart();
+ for (OutputSection *Sec : OutputSections) {
+ uint8_t *SecBuf = Buf + Sec->getFileOff();
+ // Fill gaps between functions in .text with INT3 instructions
+ // instead of leaving as NUL bytes (which can be interpreted as
+ // ADD instructions).
+ if (Sec->Header.Characteristics & IMAGE_SCN_CNT_CODE)
+ memset(SecBuf, 0xCC, Sec->getRawSize());
+ for_each(parallel::par, Sec->Chunks.begin(), Sec->Chunks.end(),
+ [&](Chunk *C) { C->writeTo(SecBuf); });
+ }
+void Writer::writeBuildId() {
+ // There are two important parts to the build ID.
+ // 1) If building with debug info, the COFF debug directory contains a
+ // timestamp as well as a Guid and Age of the PDB.
+ // 2) In all cases, the PE COFF file header also contains a timestamp.
+ // For reproducibility, instead of a timestamp we want to use a hash of the
+ // PE contents.
+ if (Config->Debug) {
+ assert(BuildId && "BuildId is not set!");
+ // BuildId->BuildId was filled in when the PDB was written.
+ }
+ // At this point the only fields in the COFF file which remain unset are the
+ // "timestamp" in the COFF file header, and the ones in the coff debug
+ // directory. Now we can hash the file and write that hash to the various
+ // timestamp fields in the file.
+ StringRef OutputFileData(
+ reinterpret_cast<const char *>(Buffer->getBufferStart()),
+ Buffer->getBufferSize());
+ uint32_t Timestamp = Config->Timestamp;
+ uint64_t Hash = 0;
+ bool GenerateSyntheticBuildId =
+ Config->MinGW && Config->Debug && Config->PDBPath.empty();
+ if (Config->Repro || GenerateSyntheticBuildId)
+ Hash = xxHash64(OutputFileData);
+ if (Config->Repro)
+ Timestamp = static_cast<uint32_t>(Hash);
+ if (GenerateSyntheticBuildId) {
+ // For MinGW builds without a PDB file, we still generate a build id
+ // to allow associating a crash dump to the executable.
+ BuildId->BuildId->PDB70.CVSignature = OMF::Signature::PDB70;
+ BuildId->BuildId->PDB70.Age = 1;
+ memcpy(BuildId->BuildId->PDB70.Signature, &Hash, 8);
+ // xxhash only gives us 8 bytes, so put some fixed data in the other half.
+ memcpy(&BuildId->BuildId->PDB70.Signature[8], "LLD PDB.", 8);
+ }
+ if (DebugDirectory)
+ DebugDirectory->setTimeDateStamp(Timestamp);
+ uint8_t *Buf = Buffer->getBufferStart();
+ Buf += DOSStubSize + sizeof(PEMagic);
+ object::coff_file_header *CoffHeader =
+ reinterpret_cast<coff_file_header *>(Buf);
+ CoffHeader->TimeDateStamp = Timestamp;
+// Sort .pdata section contents according to PE/COFF spec 5.5.
+void Writer::sortExceptionTable() {
+ if (!FirstPdata)
+ return;
+ // We assume .pdata contains function table entries only.
+ auto BufAddr = [&](Chunk *C) {
+ return Buffer->getBufferStart() + C->getOutputSection()->getFileOff() +
+ C->getRVA() - C->getOutputSection()->getRVA();
+ };
+ uint8_t *Begin = BufAddr(FirstPdata);
+ uint8_t *End = BufAddr(LastPdata) + LastPdata->getSize();
+ if (Config->Machine == AMD64) {
+ struct Entry { ulittle32_t Begin, End, Unwind; };
+ sort(parallel::par, (Entry *)Begin, (Entry *)End,
+ [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
+ return;
+ }
+ if (Config->Machine == ARMNT || Config->Machine == ARM64) {
+ struct Entry { ulittle32_t Begin, Unwind; };
+ sort(parallel::par, (Entry *)Begin, (Entry *)End,
+ [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
+ return;
+ }
+ errs() << "warning: don't know how to handle .pdata.\n";
+// The CRT section contains, among other things, the array of function
+// pointers that initialize every global variable that is not trivially
+// constructed. The CRT calls them one after the other prior to invoking
+// main().
+// As per C++ spec, 3.6.2/2.3,
+// "Variables with ordered initialization defined within a single
+// translation unit shall be initialized in the order of their definitions
+// in the translation unit"
+// It is therefore critical to sort the chunks containing the function
+// pointers in the order that they are listed in the object file (top to
+// bottom), otherwise global objects might not be initialized in the
+// correct order.
+void Writer::sortCRTSectionChunks(std::vector<Chunk *> &Chunks) {
+ auto SectionChunkOrder = [](const Chunk *A, const Chunk *B) {
+ auto SA = dyn_cast<SectionChunk>(A);
+ auto SB = dyn_cast<SectionChunk>(B);
+ assert(SA && SB && "Non-section chunks in CRT section!");
+ StringRef SAObj = SA->File->MB.getBufferIdentifier();
+ StringRef SBObj = SB->File->MB.getBufferIdentifier();
+ return SAObj == SBObj && SA->getSectionNumber() < SB->getSectionNumber();
+ };
+ std::stable_sort(Chunks.begin(), Chunks.end(), SectionChunkOrder);
+ if (Config->Verbose) {
+ for (auto &C : Chunks) {
+ auto SC = dyn_cast<SectionChunk>(C);
+ log(" " + SC->File->MB.getBufferIdentifier().str() +
+ ", SectionID: " + Twine(SC->getSectionNumber()));
+ }
+ }
+OutputSection *Writer::findSection(StringRef Name) {
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Name == Name)
+ return Sec;
+ return nullptr;
+uint32_t Writer::getSizeOfInitializedData() {
+ uint32_t Res = 0;
+ for (OutputSection *S : OutputSections)
+ if (S->Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
+ Res += S->getRawSize();
+ return Res;
+// Add base relocations to .reloc section.
+void Writer::addBaserels() {
+ if (!Config->Relocatable)
+ return;
+ RelocSec->Chunks.clear();
+ std::vector<Baserel> V;
+ for (OutputSection *Sec : OutputSections) {
+ if (Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
+ continue;
+ // Collect all locations for base relocations.
+ for (Chunk *C : Sec->Chunks)
+ C->getBaserels(&V);
+ // Add the addresses to .reloc section.
+ if (!V.empty())
+ addBaserelBlocks(V);
+ V.clear();
+ }
+// Add addresses to .reloc section. Note that addresses are grouped by page.
+void Writer::addBaserelBlocks(std::vector<Baserel> &V) {
+ const uint32_t Mask = ~uint32_t(PageSize - 1);
+ uint32_t Page = V[0].RVA & Mask;
+ size_t I = 0, J = 1;
+ for (size_t E = V.size(); J < E; ++J) {
+ uint32_t P = V[J].RVA & Mask;
+ if (P == Page)
+ continue;
+ RelocSec->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J));
+ I = J;
+ Page = P;
+ }
+ if (I == J)
+ return;
+ RelocSec->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J));
diff --git a/contrib/llvm/tools/lld/COFF/Writer.h b/contrib/llvm/tools/lld/COFF/Writer.h
new file mode 100644
index 000000000000..727582480c91
--- /dev/null
+++ b/contrib/llvm/tools/lld/COFF/Writer.h
@@ -0,0 +1,75 @@
+//===- Writer.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Chunks.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/COFF.h"
+#include <chrono>
+#include <cstdint>
+#include <vector>
+namespace lld {
+namespace coff {
+static const int PageSize = 4096;
+void writeResult();
+// OutputSection represents a section in an output file. It's a
+// container of chunks. OutputSection and Chunk are 1:N relationship.
+// Chunks cannot belong to more than one OutputSections. The writer
+// creates multiple OutputSections and assign them unique,
+// non-overlapping file offsets and RVAs.
+class OutputSection {
+ OutputSection(llvm::StringRef N, uint32_t Chars) : Name(N) {
+ Header.Characteristics = Chars;
+ }
+ void addChunk(Chunk *C);
+ void insertChunkAtStart(Chunk *C);
+ void merge(OutputSection *Other);
+ void addPermissions(uint32_t C);
+ void setPermissions(uint32_t C);
+ uint64_t getRVA() { return Header.VirtualAddress; }
+ uint64_t getFileOff() { return Header.PointerToRawData; }
+ void writeHeaderTo(uint8_t *Buf);
+ // Returns the size of this section in an executable memory image.
+ // This may be smaller than the raw size (the raw size is multiple
+ // of disk sector size, so there may be padding at end), or may be
+ // larger (if that's the case, the loader reserves spaces after end
+ // of raw data).
+ uint64_t getVirtualSize() { return Header.VirtualSize; }
+ // Returns the size of the section in the output file.
+ uint64_t getRawSize() { return Header.SizeOfRawData; }
+ // Set offset into the string table storing this section name.
+ // Used only when the name is longer than 8 bytes.
+ void setStringTableOff(uint32_t V) { StringTableOff = V; }
+ // N.B. The section index is one based.
+ uint32_t SectionIndex = 0;
+ llvm::StringRef Name;
+ llvm::object::coff_section Header = {};
+ std::vector<Chunk *> Chunks;
+ std::vector<Chunk *> OrigChunks;
+ uint32_t StringTableOff = 0;
diff --git a/contrib/llvm/tools/lld/Common/Args.cpp b/contrib/llvm/tools/lld/Common/Args.cpp
new file mode 100644
index 000000000000..3f0671d72a66
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Args.cpp
@@ -0,0 +1,73 @@
+//===- Args.cpp -----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/Args.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/Path.h"
+using namespace llvm;
+using namespace lld;
+int lld::args::getInteger(opt::InputArgList &Args, unsigned Key, int Default) {
+ auto *A = Args.getLastArg(Key);
+ if (!A)
+ return Default;
+ int V;
+ if (to_integer(A->getValue(), V, 10))
+ return V;
+ StringRef Spelling = Args.getArgString(A->getIndex());
+ error(Spelling + ": number expected, but got '" + A->getValue() + "'");
+ return 0;
+std::vector<StringRef> lld::args::getStrings(opt::InputArgList &Args, int Id) {
+ std::vector<StringRef> V;
+ for (auto *Arg : Args.filtered(Id))
+ V.push_back(Arg->getValue());
+ return V;
+uint64_t lld::args::getZOptionValue(opt::InputArgList &Args, int Id,
+ StringRef Key, uint64_t Default) {
+ for (auto *Arg : Args.filtered_reverse(Id)) {
+ std::pair<StringRef, StringRef> KV = StringRef(Arg->getValue()).split('=');
+ if (KV.first == Key) {
+ uint64_t Result = Default;
+ if (!to_integer(KV.second, Result))
+ error("invalid " + Key + ": " + KV.second);
+ return Result;
+ }
+ }
+ return Default;
+std::vector<StringRef> lld::args::getLines(MemoryBufferRef MB) {
+ SmallVector<StringRef, 0> Arr;
+ MB.getBuffer().split(Arr, '\n');
+ std::vector<StringRef> Ret;
+ for (StringRef S : Arr) {
+ S = S.trim();
+ if (!S.empty() && S[0] != '#')
+ Ret.push_back(S);
+ }
+ return Ret;
+StringRef lld::args::getFilenameWithoutExe(StringRef Path) {
+ if (Path.endswith_lower(".exe"))
+ return sys::path::stem(Path);
+ return sys::path::filename(Path);
diff --git a/contrib/llvm/tools/lld/Common/CMakeLists.txt b/contrib/llvm/tools/lld/Common/CMakeLists.txt
new file mode 100644
index 000000000000..a45fe209f06f
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/CMakeLists.txt
@@ -0,0 +1,33 @@
+ set(tablegen_deps intrinsics_gen)
+ Args.cpp
+ ErrorHandler.cpp
+ Memory.cpp
+ Reproduce.cpp
+ Strings.cpp
+ TargetOptionsCommandFlags.cpp
+ Threads.cpp
+ Timer.cpp
+ Version.cpp
+ ${LLD_INCLUDE_DIR}/lld/Common
+ Codegen
+ Core
+ Demangle
+ MC
+ Option
+ Support
+ Target
+ ${tablegen_deps}
+ )
diff --git a/contrib/llvm/tools/lld/Common/ErrorHandler.cpp b/contrib/llvm/tools/lld/Common/ErrorHandler.cpp
new file mode 100644
index 000000000000..c059516daf94
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/ErrorHandler.cpp
@@ -0,0 +1,144 @@
+//===- ErrorHandler.cpp ---------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Threads.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <mutex>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+using namespace llvm;
+using namespace lld;
+// The functions defined in this file can be called from multiple threads,
+// but outs() or errs() are not thread-safe. We protect them using a mutex.
+static std::mutex Mu;
+// Prints "\n" or does nothing, depending on Msg contents of
+// the previous call of this function.
+static void newline(raw_ostream *ErrorOS, const Twine &Msg) {
+ // True if the previous error message contained "\n".
+ // We want to separate multi-line error messages with a newline.
+ static bool Flag;
+ if (Flag)
+ *ErrorOS << "\n";
+ Flag = StringRef(Msg.str()).contains('\n');
+ErrorHandler &lld::errorHandler() {
+ static ErrorHandler Handler;
+ return Handler;
+void lld::exitLld(int Val) {
+ // Delete any temporary file, while keeping the memory mapping open.
+ if (errorHandler().OutputBuffer)
+ errorHandler().OutputBuffer->discard();
+ // Dealloc/destroy ManagedStatic variables before calling
+ // _exit(). In a non-LTO build, this is a nop. In an LTO
+ // build allows us to get the output of -time-passes.
+ llvm_shutdown();
+ outs().flush();
+ errs().flush();
+ _exit(Val);
+void lld::diagnosticHandler(const DiagnosticInfo &DI) {
+ SmallString<128> S;
+ raw_svector_ostream OS(S);
+ DiagnosticPrinterRawOStream DP(OS);
+ DI.print(DP);
+ switch (DI.getSeverity()) {
+ case DS_Error:
+ error(S);
+ break;
+ case DS_Warning:
+ warn(S);
+ break;
+ case DS_Remark:
+ case DS_Note:
+ message(S);
+ break;
+ }
+void lld::checkError(Error E) {
+ handleAllErrors(std::move(E),
+ [&](ErrorInfoBase &EIB) { error(EIB.message()); });
+void ErrorHandler::print(StringRef S, raw_ostream::Colors C) {
+ *ErrorOS << LogName << ": ";
+ if (ColorDiagnostics) {
+ ErrorOS->changeColor(C, true);
+ *ErrorOS << S;
+ ErrorOS->resetColor();
+ } else {
+ *ErrorOS << S;
+ }
+void ErrorHandler::log(const Twine &Msg) {
+ if (Verbose) {
+ std::lock_guard<std::mutex> Lock(Mu);
+ *ErrorOS << LogName << ": " << Msg << "\n";
+ }
+void ErrorHandler::message(const Twine &Msg) {
+ std::lock_guard<std::mutex> Lock(Mu);
+ outs() << Msg << "\n";
+ outs().flush();
+void ErrorHandler::warn(const Twine &Msg) {
+ if (FatalWarnings) {
+ error(Msg);
+ return;
+ }
+ std::lock_guard<std::mutex> Lock(Mu);
+ newline(ErrorOS, Msg);
+ print("warning: ", raw_ostream::MAGENTA);
+ *ErrorOS << Msg << "\n";
+void ErrorHandler::error(const Twine &Msg) {
+ std::lock_guard<std::mutex> Lock(Mu);
+ newline(ErrorOS, Msg);
+ if (ErrorLimit == 0 || ErrorCount < ErrorLimit) {
+ print("error: ", raw_ostream::RED);
+ *ErrorOS << Msg << "\n";
+ } else if (ErrorCount == ErrorLimit) {
+ print("error: ", raw_ostream::RED);
+ *ErrorOS << ErrorLimitExceededMsg << "\n";
+ if (ExitEarly)
+ exitLld(1);
+ }
+ ++ErrorCount;
+void ErrorHandler::fatal(const Twine &Msg) {
+ error(Msg);
+ exitLld(1);
diff --git a/contrib/llvm/tools/lld/Common/Memory.cpp b/contrib/llvm/tools/lld/Common/Memory.cpp
new file mode 100644
index 000000000000..efc5bcc2218b
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Memory.cpp
@@ -0,0 +1,23 @@
+//===- Memory.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/Memory.h"
+using namespace llvm;
+using namespace lld;
+BumpPtrAllocator lld::BAlloc;
+StringSaver lld::Saver{BAlloc};
+std::vector<SpecificAllocBase *> lld::SpecificAllocBase::Instances;
+void lld::freeArena() {
+ for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances)
+ Alloc->reset();
+ BAlloc.Reset();
diff --git a/contrib/llvm/tools/lld/Common/Reproduce.cpp b/contrib/llvm/tools/lld/Common/Reproduce.cpp
new file mode 100644
index 000000000000..7be4ea6bb98b
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Reproduce.cpp
@@ -0,0 +1,66 @@
+//===- Reproduce.cpp - Utilities for creating reproducers -----------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/Reproduce.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+using namespace lld;
+using namespace llvm;
+using namespace llvm::sys;
+// Makes a given pathname an absolute path first, and then remove
+// beginning /. For example, "../foo.o" is converted to "home/john/foo.o",
+// assuming that the current directory is "/home/john/bar".
+// Returned string is a forward slash separated path even on Windows to avoid
+// a mess with backslash-as-escape and backslash-as-path-separator.
+std::string lld::relativeToRoot(StringRef Path) {
+ SmallString<128> Abs = Path;
+ if (fs::make_absolute(Abs))
+ return Path;
+ path::remove_dots(Abs, /*remove_dot_dot=*/true);
+ // This is Windows specific. root_name() returns a drive letter
+ // (e.g. "c:") or a UNC name (//net). We want to keep it as part
+ // of the result.
+ SmallString<128> Res;
+ StringRef Root = path::root_name(Abs);
+ if (Root.endswith(":"))
+ Res = Root.drop_back();
+ else if (Root.startswith("//"))
+ Res = Root.substr(2);
+ path::append(Res, path::relative_path(Abs));
+ return path::convert_to_slash(Res);
+// Quote a given string if it contains a space character.
+std::string lld::quote(StringRef S) {
+ if (S.contains(' '))
+ return ("\"" + S + "\"").str();
+ return S;
+std::string lld::rewritePath(StringRef S) {
+ if (fs::exists(S))
+ return relativeToRoot(S);
+ return S;
+std::string lld::toString(const opt::Arg &Arg) {
+ std::string K = Arg.getSpelling();
+ if (Arg.getNumValues() == 0)
+ return K;
+ std::string V = quote(Arg.getValue());
+ if (Arg.getOption().getRenderStyle() == opt::Option::RenderJoinedStyle)
+ return K + V;
+ return K + " " + V;
diff --git a/contrib/llvm/tools/lld/Common/Strings.cpp b/contrib/llvm/tools/lld/Common/Strings.cpp
new file mode 100644
index 000000000000..6f74865b7f42
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Strings.cpp
@@ -0,0 +1,104 @@
+//===- Strings.cpp -------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/Strings.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/LLVM.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/GlobPattern.h"
+#include <algorithm>
+#include <mutex>
+#include <vector>
+using namespace llvm;
+using namespace lld;
+// Returns the demangled C++ symbol name for Name.
+Optional<std::string> lld::demangleItanium(StringRef Name) {
+ // itaniumDemangle can be used to demangle strings other than symbol
+ // names which do not necessarily start with "_Z". Name can be
+ // either a C or C++ symbol. Don't call itaniumDemangle if the name
+ // does not look like a C++ symbol name to avoid getting unexpected
+ // result for a C symbol that happens to match a mangled type name.
+ if (!Name.startswith("_Z"))
+ return None;
+ char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
+ if (!Buf)
+ return None;
+ std::string S(Buf);
+ free(Buf);
+ return S;
+Optional<std::string> lld::demangleMSVC(StringRef Name) {
+ std::string Prefix;
+ if (Name.consume_front("__imp_"))
+ Prefix = "__declspec(dllimport) ";
+ // Demangle only C++ names.
+ if (!Name.startswith("?"))
+ return None;
+ char *Buf = microsoftDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
+ if (!Buf)
+ return None;
+ std::string S(Buf);
+ free(Buf);
+ return Prefix + S;
+StringMatcher::StringMatcher(ArrayRef<StringRef> Pat) {
+ for (StringRef S : Pat) {
+ Expected<GlobPattern> Pat = GlobPattern::create(S);
+ if (!Pat)
+ error(toString(Pat.takeError()));
+ else
+ Patterns.push_back(*Pat);
+ }
+bool StringMatcher::match(StringRef S) const {
+ for (const GlobPattern &Pat : Patterns)
+ if (Pat.match(S))
+ return true;
+ return false;
+// Converts a hex string (e.g. "deadbeef") to a vector.
+std::vector<uint8_t> lld::parseHex(StringRef S) {
+ std::vector<uint8_t> Hex;
+ while (!S.empty()) {
+ StringRef B = S.substr(0, 2);
+ S = S.substr(2);
+ uint8_t H;
+ if (!to_integer(B, H, 16)) {
+ error("not a hexadecimal value: " + B);
+ return {};
+ }
+ Hex.push_back(H);
+ }
+ return Hex;
+// Returns true if S is valid as a C language identifier.
+bool lld::isValidCIdentifier(StringRef S) {
+ return !S.empty() && (isAlpha(S[0]) || S[0] == '_') &&
+ std::all_of(S.begin() + 1, S.end(),
+ [](char C) { return C == '_' || isAlnum(C); });
+// Write the contents of the a buffer to a file
+void lld::saveBuffer(StringRef Buffer, const Twine &Path) {
+ std::error_code EC;
+ raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None);
+ if (EC)
+ error("cannot create " + Path + ": " + EC.message());
+ OS << Buffer;
diff --git a/contrib/llvm/tools/lld/Common/TargetOptionsCommandFlags.cpp b/contrib/llvm/tools/lld/Common/TargetOptionsCommandFlags.cpp
new file mode 100644
index 000000000000..7a3fc510704f
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/TargetOptionsCommandFlags.cpp
@@ -0,0 +1,35 @@
+//===-- TargetOptionsCommandFlags.cpp ---------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file exists as a place for global variables defined in LLVM's
+// CodeGen/CommandFlags.inc. By putting the resulting object file in
+// an archive and linking with it, the definitions will automatically be
+// included when needed and skipped when already present.
+#include "lld/Common/TargetOptionsCommandFlags.h"
+#include "llvm/CodeGen/CommandFlags.inc"
+#include "llvm/Target/TargetOptions.h"
+// Define an externally visible version of
+// InitTargetOptionsFromCodeGenFlags, so that its functionality can be
+// used without having to include llvm/CodeGen/CommandFlags.inc, which
+// would lead to multiple definitions of the command line flags.
+llvm::TargetOptions lld::InitTargetOptionsFromCodeGenFlags() {
+ return ::InitTargetOptionsFromCodeGenFlags();
+llvm::Optional<llvm::CodeModel::Model> lld::GetCodeModelFromCMModel() {
+ return getCodeModel();
+std::string lld::GetCPUStr() { return ::getCPUStr(); }
+std::vector<std::string> lld::GetMAttrs() { return ::MAttrs; }
diff --git a/contrib/llvm/tools/lld/Common/Threads.cpp b/contrib/llvm/tools/lld/Common/Threads.cpp
new file mode 100644
index 000000000000..c64b8c38b909
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Threads.cpp
@@ -0,0 +1,12 @@
+//===- Threads.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/Threads.h"
+bool lld::ThreadsEnabled = true;
diff --git a/contrib/llvm/tools/lld/Common/Timer.cpp b/contrib/llvm/tools/lld/Common/Timer.cpp
new file mode 100644
index 000000000000..89f9829b47cf
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Timer.cpp
@@ -0,0 +1,80 @@
+//===- Timer.cpp ----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/Timer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/Format.h"
+using namespace lld;
+using namespace llvm;
+ScopedTimer::ScopedTimer(Timer &T) : T(&T) { T.start(); }
+void ScopedTimer::stop() {
+ if (!T)
+ return;
+ T->stop();
+ T = nullptr;
+ScopedTimer::~ScopedTimer() { stop(); }
+Timer::Timer(llvm::StringRef Name) : Name(Name), Parent(nullptr) {}
+Timer::Timer(llvm::StringRef Name, Timer &Parent)
+ : Name(Name), Parent(&Parent) {}
+void Timer::start() {
+ if (Parent && Total.count() == 0)
+ Parent->Children.push_back(this);
+ StartTime = std::chrono::high_resolution_clock::now();
+void Timer::stop() {
+ Total += (std::chrono::high_resolution_clock::now() - StartTime);
+Timer &Timer::root() {
+ static Timer RootTimer("Total Link Time");
+ return RootTimer;
+void Timer::print() {
+ double TotalDuration = static_cast<double>(root().millis());
+ // We want to print the grand total under all the intermediate phases, so we
+ // print all children first, then print the total under that.
+ for (const auto &Child : Children)
+ Child->print(1, TotalDuration);
+ message(std::string(49, '-'));
+ root().print(0, root().millis(), false);
+double Timer::millis() const {
+ return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
+ Total)
+ .count();
+void Timer::print(int Depth, double TotalDuration, bool Recurse) const {
+ double P = 100.0 * millis() / TotalDuration;
+ SmallString<32> Str;
+ llvm::raw_svector_ostream Stream(Str);
+ std::string S = std::string(Depth * 2, ' ') + Name + std::string(":");
+ Stream << format("%-30s%5d ms (%5.1f%%)", S.c_str(), (int)millis(), P);
+ message(Str);
+ if (Recurse) {
+ for (const auto &Child : Children)
+ Child->print(Depth + 1, TotalDuration);
+ }
diff --git a/contrib/llvm/tools/lld/Common/Version.cpp b/contrib/llvm/tools/lld/Common/Version.cpp
new file mode 100644
index 000000000000..6226c9a2fac6
--- /dev/null
+++ b/contrib/llvm/tools/lld/Common/Version.cpp
@@ -0,0 +1,43 @@
+//===- lib/Common/Version.cpp - LLD Version Number ---------------*- C++-=====//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file defines several version-related utility functions for LLD.
+#include "lld/Common/Version.h"
+using namespace llvm;
+// Returns an SVN repository path, which is usually "trunk".
+static std::string getRepositoryPath() {
+ size_t Pos = S.find("lld/");
+ if (Pos != StringRef::npos)
+ return S.substr(Pos + 4);
+ return S;
+// Returns an SVN repository name, e.g., " (trunk 284614)"
+// or an empty string if no repository info is available.
+static std::string getRepository() {
+ std::string Repo = getRepositoryPath();
+ std::string Rev = LLD_REVISION_STRING;
+ if (Repo.empty() && Rev.empty())
+ return "";
+ if (!Repo.empty() && !Rev.empty())
+ return " (" + Repo + " " + Rev + ")";
+ return " (" + Repo + Rev + ")";
+// Returns a version string, e.g., "LLD 4.0 (lld/trunk 284614)".
+std::string lld::getLLDVersion() {
+ return "LLD " + std::string(LLD_VERSION_STRING) + getRepository();
diff --git a/contrib/llvm/tools/lld/ELF/AArch64ErrataFix.cpp b/contrib/llvm/tools/lld/ELF/AArch64ErrataFix.cpp
new file mode 100644
index 000000000000..ac753cb58265
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/AArch64ErrataFix.cpp
@@ -0,0 +1,652 @@
+//===- AArch64ErrataFix.cpp -----------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements Section Patching for the purpose of working around
+// errata in CPUs. The general principle is that an erratum sequence of one or
+// more instructions is detected in the instruction stream, one of the
+// instructions in the sequence is replaced with a branch to a patch sequence
+// of replacement instructions. At the end of the replacement sequence the
+// patch branches back to the instruction stream.
+// This technique is only suitable for fixing an erratum when:
+// - There is a set of necessary conditions required to trigger the erratum that
+// can be detected at static link time.
+// - There is a set of replacement instructions that can be used to remove at
+// least one of the necessary conditions that trigger the erratum.
+// - We can overwrite an instruction in the erratum sequence with a branch to
+// the replacement sequence.
+// - We can place the replacement sequence within range of the branch.
+// FIXME:
+// - The implementation here only supports one patch, the AArch64 Cortex-53
+// errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 versions of the core.
+// To keep the initial version simple there is no support for multiple
+// architectures or selection of different patches.
+#include "AArch64ErrataFix.h"
+#include "Config.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::elf;
+// Helper functions to identify instructions and conditions needed to trigger
+// the Cortex-A53-843419 erratum.
+// ADRP
+// | 1 | immlo (2) | 1 | 0 0 0 0 | immhi (19) | Rd (5) |
+static bool isADRP(uint32_t Instr) {
+ return (Instr & 0x9f000000) == 0x90000000;
+// Load and store bit patterns from ARMv8-A ARM ARM.
+// Instructions appear in order of appearance starting from table in
+// C4.1.3 Loads and Stores.
+// All loads and stores have 1 (at bit postion 27), (0 at bit position 25).
+// | op0 x op1 (2) | 1 op2 0 op3 (2) | x | op4 (5) | xxxx | op5 (2) | x (10) |
+static bool isLoadStoreClass(uint32_t Instr) {
+ return (Instr & 0x0a000000) == 0x08000000;
+// LDN/STN multiple no offset
+// | 0 Q 00 | 1100 | 0 L 00 | 0000 | opcode (4) | size (2) | Rn (5) | Rt (5) |
+// LDN/STN multiple post-indexed
+// | 0 Q 00 | 1100 | 1 L 0 | Rm (5)| opcode (4) | size (2) | Rn (5) | Rt (5) |
+// L == 0 for stores.
+// Utility routine to decode opcode field of LDN/STN multiple structure
+// instructions to find the ST1 instructions.
+// opcode == 0010 ST1 4 registers.
+// opcode == 0110 ST1 3 registers.
+// opcode == 0111 ST1 1 register.
+// opcode == 1010 ST1 2 registers.
+static bool isST1MultipleOpcode(uint32_t Instr) {
+ return (Instr & 0x0000f000) == 0x00002000 ||
+ (Instr & 0x0000f000) == 0x00006000 ||
+ (Instr & 0x0000f000) == 0x00007000 ||
+ (Instr & 0x0000f000) == 0x0000a000;
+static bool isST1Multiple(uint32_t Instr) {
+ return (Instr & 0xbfff0000) == 0x0c000000 && isST1MultipleOpcode(Instr);
+// Writes to Rn (writeback).
+static bool isST1MultiplePost(uint32_t Instr) {
+ return (Instr & 0xbfe00000) == 0x0c800000 && isST1MultipleOpcode(Instr);
+// LDN/STN single no offset
+// | 0 Q 00 | 1101 | 0 L R 0 | 0000 | opc (3) S | size (2) | Rn (5) | Rt (5)|
+// LDN/STN single post-indexed
+// | 0 Q 00 | 1101 | 1 L R | Rm (5) | opc (3) S | size (2) | Rn (5) | Rt (5)|
+// L == 0 for stores
+// Utility routine to decode opcode field of LDN/STN single structure
+// instructions to find the ST1 instructions.
+// R == 0 for ST1 and ST3, R == 1 for ST2 and ST4.
+// opcode == 000 ST1 8-bit.
+// opcode == 010 ST1 16-bit.
+// opcode == 100 ST1 32 or 64-bit (Size determines which).
+static bool isST1SingleOpcode(uint32_t Instr) {
+ return (Instr & 0x0040e000) == 0x00000000 ||
+ (Instr & 0x0040e000) == 0x00004000 ||
+ (Instr & 0x0040e000) == 0x00008000;
+static bool isST1Single(uint32_t Instr) {
+ return (Instr & 0xbfff0000) == 0x0d000000 && isST1SingleOpcode(Instr);
+// Writes to Rn (writeback).
+static bool isST1SinglePost(uint32_t Instr) {
+ return (Instr & 0xbfe00000) == 0x0d800000 && isST1SingleOpcode(Instr);
+static bool isST1(uint32_t Instr) {
+ return isST1Multiple(Instr) || isST1MultiplePost(Instr) ||
+ isST1Single(Instr) || isST1SinglePost(Instr);
+// Load/store exclusive
+// | size (2) 00 | 1000 | o2 L o1 | Rs (5) | o0 | Rt2 (5) | Rn (5) | Rt (5) |
+// L == 0 for Stores.
+static bool isLoadStoreExclusive(uint32_t Instr) {
+ return (Instr & 0x3f000000) == 0x08000000;
+static bool isLoadExclusive(uint32_t Instr) {
+ return (Instr & 0x3f400000) == 0x08400000;
+// Load register literal
+// | opc (2) 01 | 1 V 00 | imm19 | Rt (5) |
+static bool isLoadLiteral(uint32_t Instr) {
+ return (Instr & 0x3b000000) == 0x18000000;
+// Load/store no-allocate pair
+// (offset)
+// | opc (2) 10 | 1 V 00 | 0 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) |
+// L == 0 for stores.
+// Never writes to register
+static bool isSTNP(uint32_t Instr) {
+ return (Instr & 0x3bc00000) == 0x28000000;
+// Load/store register pair
+// (post-indexed)
+// | opc (2) 10 | 1 V 00 | 1 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) |
+// L == 0 for stores, V == 0 for Scalar, V == 1 for Simd/FP
+// Writes to Rn.
+static bool isSTPPost(uint32_t Instr) {
+ return (Instr & 0x3bc00000) == 0x28800000;
+// (offset)
+// | opc (2) 10 | 1 V 01 | 0 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) |
+static bool isSTPOffset(uint32_t Instr) {
+ return (Instr & 0x3bc00000) == 0x29000000;
+// (pre-index)
+// | opc (2) 10 | 1 V 01 | 1 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) |
+// Writes to Rn.
+static bool isSTPPre(uint32_t Instr) {
+ return (Instr & 0x3bc00000) == 0x29800000;
+static bool isSTP(uint32_t Instr) {
+ return isSTPPost(Instr) || isSTPOffset(Instr) || isSTPPre(Instr);
+// Load/store register (unscaled immediate)
+// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 00 | Rn (5) | Rt (5) |
+// V == 0 for Scalar, V == 1 for Simd/FP.
+static bool isLoadStoreUnscaled(uint32_t Instr) {
+ return (Instr & 0x3b000c00) == 0x38000000;
+// Load/store register (immediate post-indexed)
+// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 01 | Rn (5) | Rt (5) |
+static bool isLoadStoreImmediatePost(uint32_t Instr) {
+ return (Instr & 0x3b200c00) == 0x38000400;
+// Load/store register (unprivileged)
+// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 10 | Rn (5) | Rt (5) |
+static bool isLoadStoreUnpriv(uint32_t Instr) {
+ return (Instr & 0x3b200c00) == 0x38000800;
+// Load/store register (immediate pre-indexed)
+// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 11 | Rn (5) | Rt (5) |
+static bool isLoadStoreImmediatePre(uint32_t Instr) {
+ return (Instr & 0x3b200c00) == 0x38000c00;
+// Load/store register (register offset)
+// | size (2) 11 | 1 V 00 | opc (2) 1 | Rm (5) | option (3) S | 10 | Rn | Rt |
+static bool isLoadStoreRegisterOff(uint32_t Instr) {
+ return (Instr & 0x3b200c00) == 0x38200800;
+// Load/store register (unsigned immediate)
+// | size (2) 11 | 1 V 01 | opc (2) | imm12 | Rn (5) | Rt (5) |
+static bool isLoadStoreRegisterUnsigned(uint32_t Instr) {
+ return (Instr & 0x3b000000) == 0x39000000;
+// Rt is always in bit position 0 - 4.
+static uint32_t getRt(uint32_t Instr) { return (Instr & 0x1f); }
+// Rn is always in bit position 5 - 9.
+static uint32_t getRn(uint32_t Instr) { return (Instr >> 5) & 0x1f; }
+// C4.1.2 Branches, Exception Generating and System instructions
+// | op0 (3) 1 | 01 op1 (4) | x (22) |
+// op0 == 010 101 op1 == 0xxx Conditional Branch.
+// op0 == 110 101 op1 == 1xxx Unconditional Branch Register.
+// op0 == x00 101 op1 == xxxx Unconditional Branch immediate.
+// op0 == x01 101 op1 == 0xxx Compare and branch immediate.
+// op0 == x01 101 op1 == 1xxx Test and branch immediate.
+static bool isBranch(uint32_t Instr) {
+ return ((Instr & 0xfe000000) == 0xd6000000) || // Cond branch.
+ ((Instr & 0xfe000000) == 0x54000000) || // Uncond branch reg.
+ ((Instr & 0x7c000000) == 0x14000000) || // Uncond branch imm.
+ ((Instr & 0x7c000000) == 0x34000000); // Compare and test branch.
+static bool isV8SingleRegisterNonStructureLoadStore(uint32_t Instr) {
+ return isLoadStoreUnscaled(Instr) || isLoadStoreImmediatePost(Instr) ||
+ isLoadStoreUnpriv(Instr) || isLoadStoreImmediatePre(Instr) ||
+ isLoadStoreRegisterOff(Instr) || isLoadStoreRegisterUnsigned(Instr);
+// Note that this function refers to v8.0 only and does not include the
+// additional load and store instructions added for in later revisions of
+// the architecture such as the Atomic memory operations introduced
+// in v8.1.
+static bool isV8NonStructureLoad(uint32_t Instr) {
+ if (isLoadExclusive(Instr))
+ return true;
+ if (isLoadLiteral(Instr))
+ return true;
+ else if (isV8SingleRegisterNonStructureLoadStore(Instr)) {
+ // For Load and Store single register, Loads are derived from a
+ // combination of the Size, V and Opc fields.
+ uint32_t Size = (Instr >> 30) & 0xff;
+ uint32_t V = (Instr >> 26) & 0x1;
+ uint32_t Opc = (Instr >> 22) & 0x3;
+ // For the load and store instructions that we are decoding.
+ // Opc == 0 are all stores.
+ // Opc == 1 with a couple of exceptions are loads. The exceptions are:
+ // Size == 00 (0), V == 1, Opc == 10 (2) which is a store and
+ // Size == 11 (3), V == 0, Opc == 10 (2) which is a prefetch.
+ return Opc != 0 && !(Size == 0 && V == 1 && Opc == 2) &&
+ !(Size == 3 && V == 0 && Opc == 2);
+ }
+ return false;
+// The following decode instructions are only complete up to the instructions
+// needed for errata 843419.
+// Instruction with writeback updates the index register after the load/store.
+static bool hasWriteback(uint32_t Instr) {
+ return isLoadStoreImmediatePre(Instr) || isLoadStoreImmediatePost(Instr) ||
+ isSTPPre(Instr) || isSTPPost(Instr) || isST1SinglePost(Instr) ||
+ isST1MultiplePost(Instr);
+// For the load and store class of instructions, a load can write to the
+// destination register, a load and a store can write to the base register when
+// the instruction has writeback.
+static bool doesLoadStoreWriteToReg(uint32_t Instr, uint32_t Reg) {
+ return (isV8NonStructureLoad(Instr) && getRt(Instr) == Reg) ||
+ (hasWriteback(Instr) && getRn(Instr) == Reg);
+// Scanner for Cortex-A53 errata 843419
+// Full details are available in the Cortex A53 MPCore revision 0 Software
+// Developers Errata Notice (ARM-EPM-048406).
+// The instruction sequence that triggers the erratum is common in compiled
+// AArch64 code, however it is sensitive to the offset of the sequence within
+// a 4k page. This means that by scanning and fixing the patch after we have
+// assigned addresses we only need to disassemble and fix instances of the
+// sequence in the range of affected offsets.
+// In summary the erratum conditions are a series of 4 instructions:
+// 1.) An ADRP instruction that writes to register Rn with low 12 bits of
+// address of instruction either 0xff8 or 0xffc.
+// 2.) A load or store instruction that can be:
+// - A single register load or store, of either integer or vector registers.
+// - An STP or STNP, of either integer or vector registers.
+// - An Advanced SIMD ST1 store instruction.
+// - Must not write to Rn, but may optionally read from it.
+// 3.) An optional instruction that is not a branch and does not write to Rn.
+// 4.) A load or store from the Load/store register (unsigned immediate) class
+// that uses Rn as the base address register.
+// Note that we do not attempt to scan for Sequence 2 as described in the
+// Software Developers Errata Notice as this has been assessed to be extremely
+// unlikely to occur in compiled code. This matches gold and ld.bfd behavior.
+// Return true if the Instruction sequence Adrp, Instr2, and Instr4 match
+// the erratum sequence. The Adrp, Instr2 and Instr4 correspond to 1.), 2.),
+// and 4.) in the Scanner for Cortex-A53 errata comment above.
+static bool is843419ErratumSequence(uint32_t Instr1, uint32_t Instr2,
+ uint32_t Instr4) {
+ if (!isADRP(Instr1))
+ return false;
+ uint32_t Rn = getRt(Instr1);
+ return isLoadStoreClass(Instr2) &&
+ (isLoadStoreExclusive(Instr2) || isLoadLiteral(Instr2) ||
+ isV8SingleRegisterNonStructureLoadStore(Instr2) || isSTP(Instr2) ||
+ isSTNP(Instr2) || isST1(Instr2)) &&
+ !doesLoadStoreWriteToReg(Instr2, Rn) &&
+ isLoadStoreRegisterUnsigned(Instr4) && getRn(Instr4) == Rn;
+// Scan the instruction sequence starting at Offset Off from the base of
+// InputSection IS. We update Off in this function rather than in the caller as
+// we can skip ahead much further into the section when we know how many
+// instructions we've scanned.
+// Return the offset of the load or store instruction in IS that we want to
+// patch or 0 if no patch required.
+static uint64_t scanCortexA53Errata843419(InputSection *IS, uint64_t &Off,
+ uint64_t Limit) {
+ uint64_t ISAddr = IS->getVA(0);
+ // Advance Off so that (ISAddr + Off) modulo 0x1000 is at least 0xff8.
+ uint64_t InitialPageOff = (ISAddr + Off) & 0xfff;
+ if (InitialPageOff < 0xff8)
+ Off += 0xff8 - InitialPageOff;
+ bool OptionalAllowed = Limit - Off > 12;
+ if (Off >= Limit || Limit - Off < 12) {
+ // Need at least 3 4-byte sized instructions to trigger erratum.
+ Off = Limit;
+ return 0;
+ }
+ uint64_t PatchOff = 0;
+ const uint8_t *Buf = IS->data().begin();
+ const ulittle32_t *InstBuf = reinterpret_cast<const ulittle32_t *>(Buf + Off);
+ uint32_t Instr1 = *InstBuf++;
+ uint32_t Instr2 = *InstBuf++;
+ uint32_t Instr3 = *InstBuf++;
+ if (is843419ErratumSequence(Instr1, Instr2, Instr3)) {
+ PatchOff = Off + 8;
+ } else if (OptionalAllowed && !isBranch(Instr3)) {
+ uint32_t Instr4 = *InstBuf++;
+ if (is843419ErratumSequence(Instr1, Instr2, Instr4))
+ PatchOff = Off + 12;
+ }
+ if (((ISAddr + Off) & 0xfff) == 0xff8)
+ Off += 4;
+ else
+ Off += 0xffc;
+ return PatchOff;
+class lld::elf::Patch843419Section : public SyntheticSection {
+ Patch843419Section(InputSection *P, uint64_t Off);
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return 8; }
+ uint64_t getLDSTAddr() const;
+ // The Section we are patching.
+ const InputSection *Patchee;
+ // The offset of the instruction in the Patchee section we are patching.
+ uint64_t PatcheeOffset;
+ // A label for the start of the Patch that we can use as a relocation target.
+ Symbol *PatchSym;
+lld::elf::Patch843419Section::Patch843419Section(InputSection *P, uint64_t Off)
+ ".text.patch"),
+ Patchee(P), PatcheeOffset(Off) {
+ this->Parent = P->getParent();
+ PatchSym = addSyntheticLocal(
+ Saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC, 0,
+ getSize(), *this);
+ addSyntheticLocal(Saver.save("$x"), STT_NOTYPE, 0, 0, *this);
+uint64_t lld::elf::Patch843419Section::getLDSTAddr() const {
+ return Patchee->getVA(PatcheeOffset);
+void lld::elf::Patch843419Section::writeTo(uint8_t *Buf) {
+ // Copy the instruction that we will be replacing with a branch in the
+ // Patchee Section.
+ write32le(Buf, read32le(Patchee->data().begin() + PatcheeOffset));
+ // Apply any relocation transferred from the original PatcheeSection.
+ // For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc
+ // also adds OutSecOff so we need to subtract to avoid double counting.
+ this->relocateAlloc(Buf - OutSecOff, Buf - OutSecOff + getSize());
+ // Return address is the next instruction after the one we have just copied.
+ uint64_t S = getLDSTAddr() + 4;
+ uint64_t P = PatchSym->getVA() + 4;
+ Target->relocateOne(Buf + 4, R_AARCH64_JUMP26, S - P);
+void AArch64Err843419Patcher::init() {
+ // The AArch64 ABI permits data in executable sections. We must avoid scanning
+ // this data as if it were instructions to avoid false matches. We use the
+ // mapping symbols in the InputObjects to identify this data, caching the
+ // results in SectionMap so we don't have to recalculate it each pass.
+ // The ABI Section 4.5.4 Mapping symbols; defines local symbols that describe
+ // half open intervals [Symbol Value, Next Symbol Value) of code and data
+ // within sections. If there is no next symbol then the half open interval is
+ // [Symbol Value, End of section). The type, code or data, is determined by
+ // the mapping symbol name, $x for code, $d for data.
+ auto IsCodeMapSymbol = [](const Symbol *B) {
+ return B->getName() == "$x" || B->getName().startswith("$x.");
+ };
+ auto IsDataMapSymbol = [](const Symbol *B) {
+ return B->getName() == "$d" || B->getName().startswith("$d.");
+ };
+ // Collect mapping symbols for every executable InputSection.
+ for (InputFile *File : ObjectFiles) {
+ auto *F = cast<ObjFile<ELF64LE>>(File);
+ for (Symbol *B : F->getLocalSymbols()) {
+ auto *Def = dyn_cast<Defined>(B);
+ if (!Def)
+ continue;
+ if (!IsCodeMapSymbol(Def) && !IsDataMapSymbol(Def))
+ continue;
+ if (auto *Sec = dyn_cast_or_null<InputSection>(Def->Section))
+ if (Sec->Flags & SHF_EXECINSTR)
+ SectionMap[Sec].push_back(Def);
+ }
+ }
+ // For each InputSection make sure the mapping symbols are in sorted in
+ // ascending order and free from consecutive runs of mapping symbols with
+ // the same type. For example we must remove the redundant $d.1 from $x.0
+ // $d.0 $d.1 $x.1.
+ for (auto &KV : SectionMap) {
+ std::vector<const Defined *> &MapSyms = KV.second;
+ if (MapSyms.size() <= 1)
+ continue;
+ std::stable_sort(
+ MapSyms.begin(), MapSyms.end(),
+ [](const Defined *A, const Defined *B) { return A->Value < B->Value; });
+ MapSyms.erase(
+ std::unique(MapSyms.begin(), MapSyms.end(),
+ [=](const Defined *A, const Defined *B) {
+ return (IsCodeMapSymbol(A) && IsCodeMapSymbol(B)) ||
+ (IsDataMapSymbol(A) && IsDataMapSymbol(B));
+ }),
+ MapSyms.end());
+ }
+ Initialized = true;
+// Insert the PatchSections we have created back into the
+// InputSectionDescription. As inserting patches alters the addresses of
+// InputSections that follow them, we try and place the patches after all the
+// executable sections, although we may need to insert them earlier if the
+// InputSectionDescription is larger than the maximum branch range.
+void AArch64Err843419Patcher::insertPatches(
+ InputSectionDescription &ISD, std::vector<Patch843419Section *> &Patches) {
+ uint64_t ISLimit;
+ uint64_t PrevISLimit = ISD.Sections.front()->OutSecOff;
+ uint64_t PatchUpperBound = PrevISLimit + Target->getThunkSectionSpacing();
+ uint64_t OutSecAddr = ISD.Sections.front()->getParent()->Addr;
+ // Set the OutSecOff of patches to the place where we want to insert them.
+ // We use a similar strategy to Thunk placement. Place patches roughly
+ // every multiple of maximum branch range.
+ auto PatchIt = Patches.begin();
+ auto PatchEnd = Patches.end();
+ for (const InputSection *IS : ISD.Sections) {
+ ISLimit = IS->OutSecOff + IS->getSize();
+ if (ISLimit > PatchUpperBound) {
+ while (PatchIt != PatchEnd) {
+ if ((*PatchIt)->getLDSTAddr() - OutSecAddr >= PrevISLimit)
+ break;
+ (*PatchIt)->OutSecOff = PrevISLimit;
+ ++PatchIt;
+ }
+ PatchUpperBound = PrevISLimit + Target->getThunkSectionSpacing();
+ }
+ PrevISLimit = ISLimit;
+ }
+ for (; PatchIt != PatchEnd; ++PatchIt) {
+ (*PatchIt)->OutSecOff = ISLimit;
+ }
+ // merge all patch sections. We use the OutSecOff assigned above to
+ // determine the insertion point. This is ok as we only merge into an
+ // InputSectionDescription once per pass, and at the end of the pass
+ // assignAddresses() will recalculate all the OutSecOff values.
+ std::vector<InputSection *> Tmp;
+ Tmp.reserve(ISD.Sections.size() + Patches.size());
+ auto MergeCmp = [](const InputSection *A, const InputSection *B) {
+ if (A->OutSecOff < B->OutSecOff)
+ return true;
+ if (A->OutSecOff == B->OutSecOff && isa<Patch843419Section>(A) &&
+ !isa<Patch843419Section>(B))
+ return true;
+ return false;
+ };
+ std::merge(ISD.Sections.begin(), ISD.Sections.end(), Patches.begin(),
+ Patches.end(), std::back_inserter(Tmp), MergeCmp);
+ ISD.Sections = std::move(Tmp);
+// Given an erratum sequence that starts at address AdrpAddr, with an
+// instruction that we need to patch at PatcheeOffset from the start of
+// InputSection IS, create a Patch843419 Section and add it to the
+// Patches that we need to insert.
+static void implementPatch(uint64_t AdrpAddr, uint64_t PatcheeOffset,
+ InputSection *IS,
+ std::vector<Patch843419Section *> &Patches) {
+ // There may be a relocation at the same offset that we are patching. There
+ // are four cases that we need to consider.
+ // Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this
+ // instance of the erratum on a previous patch and altered the relocation. We
+ // have nothing more to do.
+ // Case 2: A TLS Relaxation R_RELAX_TLS_IE_TO_LE. In this case the ADRP that
+ // we read will be transformed into a MOVZ later so we actually don't match
+ // the sequence and have nothing more to do.
+ // Case 3: A load/store register (unsigned immediate) class relocation. There
+ // are two of these R_AARCH_LD64_ABS_LO12_NC and R_AARCH_LD64_GOT_LO12_NC and
+ // they are both absolute. We need to add the same relocation to the patch,
+ // and replace the relocation with a R_AARCH_JUMP26 branch relocation.
+ // Case 4: No relocation. We must create a new R_AARCH64_JUMP26 branch
+ // relocation at the offset.
+ auto RelIt = std::find_if(
+ IS->Relocations.begin(), IS->Relocations.end(),
+ [=](const Relocation &R) { return R.Offset == PatcheeOffset; });
+ if (RelIt != IS->Relocations.end() &&
+ (RelIt->Type == R_AARCH64_JUMP26 || RelIt->Expr == R_RELAX_TLS_IE_TO_LE))
+ return;
+ log("detected cortex-a53-843419 erratum sequence starting at " +
+ utohexstr(AdrpAddr) + " in unpatched output.");
+ auto *PS = make<Patch843419Section>(IS, PatcheeOffset);
+ Patches.push_back(PS);
+ auto MakeRelToPatch = [](uint64_t Offset, Symbol *PatchSym) {
+ return Relocation{R_PC, R_AARCH64_JUMP26, Offset, 0, PatchSym};
+ };
+ if (RelIt != IS->Relocations.end()) {
+ PS->Relocations.push_back(
+ {RelIt->Expr, RelIt->Type, 0, RelIt->Addend, RelIt->Sym});
+ *RelIt = MakeRelToPatch(PatcheeOffset, PS->PatchSym);
+ } else
+ IS->Relocations.push_back(MakeRelToPatch(PatcheeOffset, PS->PatchSym));
+// Scan all the instructions in InputSectionDescription, for each instance of
+// the erratum sequence create a Patch843419Section. We return the list of
+// Patch843419Sections that need to be applied to ISD.
+std::vector<Patch843419Section *>
+ InputSectionDescription &ISD) {
+ std::vector<Patch843419Section *> Patches;
+ for (InputSection *IS : ISD.Sections) {
+ // LLD doesn't use the erratum sequence in SyntheticSections.
+ if (isa<SyntheticSection>(IS))
+ continue;
+ // Use SectionMap to make sure we only scan code and not inline data.
+ // We have already sorted MapSyms in ascending order and removed consecutive
+ // mapping symbols of the same type. Our range of executable instructions to
+ // scan is therefore [CodeSym->Value, DataSym->Value) or [CodeSym->Value,
+ // section size).
+ std::vector<const Defined *> &MapSyms = SectionMap[IS];
+ auto CodeSym = llvm::find_if(MapSyms, [&](const Defined *MS) {
+ return MS->getName().startswith("$x");
+ });
+ while (CodeSym != MapSyms.end()) {
+ auto DataSym = std::next(CodeSym);
+ uint64_t Off = (*CodeSym)->Value;
+ uint64_t Limit =
+ (DataSym == MapSyms.end()) ? IS->data().size() : (*DataSym)->Value;
+ while (Off < Limit) {
+ uint64_t StartAddr = IS->getVA(Off);
+ if (uint64_t PatcheeOffset = scanCortexA53Errata843419(IS, Off, Limit))
+ implementPatch(StartAddr, PatcheeOffset, IS, Patches);
+ }
+ if (DataSym == MapSyms.end())
+ break;
+ CodeSym = std::next(DataSym);
+ }
+ }
+ return Patches;
+// For each InputSectionDescription make one pass over the executable sections
+// looking for the erratum sequence; creating a synthetic Patch843419Section
+// for each instance found. We insert these synthetic patch sections after the
+// executable code in each InputSectionDescription.
+// PreConditions:
+// The Output and Input Sections have had their final addresses assigned.
+// PostConditions:
+// Returns true if at least one patch was added. The addresses of the
+// Ouptut and Input Sections may have been changed.
+// Returns false if no patches were required and no changes were made.
+bool AArch64Err843419Patcher::createFixes() {
+ if (Initialized == false)
+ init();
+ bool AddressesChanged = false;
+ for (OutputSection *OS : OutputSections) {
+ if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR))
+ continue;
+ for (BaseCommand *BC : OS->SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
+ std::vector<Patch843419Section *> Patches =
+ patchInputSectionDescription(*ISD);
+ if (!Patches.empty()) {
+ insertPatches(*ISD, Patches);
+ AddressesChanged = true;
+ }
+ }
+ }
+ return AddressesChanged;
diff --git a/contrib/llvm/tools/lld/ELF/AArch64ErrataFix.h b/contrib/llvm/tools/lld/ELF/AArch64ErrataFix.h
new file mode 100644
index 000000000000..edd154d4cab3
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/AArch64ErrataFix.h
@@ -0,0 +1,51 @@
+//===- AArch64ErrataFix.h ---------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include <map>
+#include <vector>
+namespace lld {
+namespace elf {
+class Defined;
+class InputSection;
+struct InputSectionDescription;
+class OutputSection;
+class Patch843419Section;
+class AArch64Err843419Patcher {
+ // return true if Patches have been added to the OutputSections.
+ bool createFixes();
+ std::vector<Patch843419Section *>
+ patchInputSectionDescription(InputSectionDescription &ISD);
+ void insertPatches(InputSectionDescription &ISD,
+ std::vector<Patch843419Section *> &Patches);
+ void init();
+ // A cache of the mapping symbols defined by the InputSecion sorted in order
+ // of ascending value with redundant symbols removed. These describe
+ // the ranges of code and data in an executable InputSection.
+ std::map<InputSection *, std::vector<const Defined *>> SectionMap;
+ bool Initialized = false;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp b/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp
new file mode 100644
index 000000000000..08ffe2a08c0f
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp
@@ -0,0 +1,440 @@
+//===- AArch64.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Thunks.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+// Page(Expr) is the page address of the expression Expr, defined
+// as (Expr & ~0xFFF). (This applies even if the machine page size
+// supported by the platform has a different value.)
+uint64_t elf::getAArch64Page(uint64_t Expr) {
+ return Expr & ~static_cast<uint64_t>(0xFFF);
+namespace {
+class AArch64 final : public TargetInfo {
+ AArch64();
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ RelType getDynRel(RelType Type) const override;
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const override;
+ uint32_t getThunkSectionSpacing() const override;
+ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
+ bool usesOnlyLowPageBits(RelType Type) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const override;
+ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // namespace
+AArch64::AArch64() {
+ CopyRel = R_AARCH64_COPY;
+ RelativeRel = R_AARCH64_RELATIVE;
+ IRelativeRel = R_AARCH64_IRELATIVE;
+ GotRel = R_AARCH64_GLOB_DAT;
+ NoneRel = R_AARCH64_NONE;
+ TlsDescRel = R_AARCH64_TLSDESC;
+ TlsGotRel = R_AARCH64_TLS_TPREL64;
+ GotEntrySize = 8;
+ GotPltEntrySize = 8;
+ PltEntrySize = 16;
+ PltHeaderSize = 32;
+ DefaultMaxPageSize = 65536;
+ // Align to the 2 MiB page size (known as a superpage or huge page).
+ // FreeBSD automatically promotes 2 MiB-aligned allocations.
+ DefaultImageBase = 0x200000;
+ NeedsThunks = true;
+RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_AARCH64_TLSDESC_LD64_LO12:
+ return R_TLSDESC;
+ return R_TLSDESC_CALL;
+ return R_TLS;
+ case R_AARCH64_CALL26:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_TSTBR14:
+ return R_PLT_PC;
+ case R_AARCH64_PREL16:
+ case R_AARCH64_PREL32:
+ case R_AARCH64_PREL64:
+ case R_AARCH64_ADR_PREL_LO21:
+ case R_AARCH64_LD_PREL_LO19:
+ return R_PC;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ return R_AARCH64_PAGE_PC;
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ return R_GOT;
+ return R_AARCH64_GOT_PAGE_PC;
+ case R_AARCH64_NONE:
+ return R_NONE;
+ default:
+ return R_ABS;
+ }
+RelExpr AArch64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const {
+ if (Expr == R_RELAX_TLS_GD_TO_IE) {
+ if (Type == R_AARCH64_TLSDESC_ADR_PAGE21)
+ }
+ return Expr;
+bool AArch64::usesOnlyLowPageBits(RelType Type) const {
+ switch (Type) {
+ default:
+ return false;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_TLSDESC_LD64_LO12:
+ return true;
+ }
+RelType AArch64::getDynRel(RelType Type) const {
+ if (Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64)
+ return Type;
+ return R_AARCH64_NONE;
+void AArch64::writeGotPlt(uint8_t *Buf, const Symbol &) const {
+ write64le(Buf, In.Plt->getVA());
+void AArch64::writePltHeader(uint8_t *Buf) const {
+ const uint8_t PltData[] = {
+ 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
+ 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2]))
+ 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))]
+ 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2]))
+ 0x20, 0x02, 0x1f, 0xd6, // br x17
+ 0x1f, 0x20, 0x03, 0xd5, // nop
+ 0x1f, 0x20, 0x03, 0xd5, // nop
+ 0x1f, 0x20, 0x03, 0xd5 // nop
+ };
+ memcpy(Buf, PltData, sizeof(PltData));
+ uint64_t Got = In.GotPlt->getVA();
+ uint64_t Plt = In.Plt->getVA();
+ relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
+ getAArch64Page(Got + 16) - getAArch64Page(Plt + 4));
+ relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16);
+ relocateOne(Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, Got + 16);
+void AArch64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Inst[] = {
+ 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n]))
+ 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))]
+ 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n]))
+ 0x20, 0x02, 0x1f, 0xd6 // br x17
+ };
+ memcpy(Buf, Inst, sizeof(Inst));
+ relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21,
+ getAArch64Page(GotPltEntryAddr) - getAArch64Page(PltEntryAddr));
+ relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotPltEntryAddr);
+ relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr);
+bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const {
+ // ELF for the ARM 64-bit architecture, section Call and Jump relocations
+ // only permits range extension thunks for R_AARCH64_CALL26 and
+ // R_AARCH64_JUMP26 relocation types.
+ if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
+ return false;
+ uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
+ return !inBranchRange(Type, BranchAddr, Dst);
+uint32_t AArch64::getThunkSectionSpacing() const {
+ // See comment in Arch/ARM.cpp for a more detailed explanation of
+ // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
+ // Thunk have a range of +/- 128 MiB
+ return (128 * 1024 * 1024) - 0x30000;
+bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
+ if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
+ return true;
+ // The AArch64 call and unconditional branch instructions have a range of
+ // +/- 128 MiB.
+ uint64_t Range = 128 * 1024 * 1024;
+ if (Dst > Src) {
+ // Immediate of branch is signed.
+ Range -= 4;
+ return Dst - Src <= Range;
+ }
+ return Src - Dst <= Range;
+static void write32AArch64Addr(uint8_t *L, uint64_t Imm) {
+ uint32_t ImmLo = (Imm & 0x3) << 29;
+ uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
+ uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
+ write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi);
+// Return the bits [Start, End] from Val shifted Start bits.
+// For instance, getBits(0xF0, 4, 8) returns 0xF.
+static uint64_t getBits(uint64_t Val, int Start, int End) {
+ uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1;
+ return (Val >> Start) & Mask;
+static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); }
+// Update the immediate field in a AARCH64 ldr, str, and add instruction.
+static void or32AArch64Imm(uint8_t *L, uint64_t Imm) {
+ or32le(L, (Imm & 0xFFF) << 10);
+void AArch64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_AARCH64_ABS16:
+ case R_AARCH64_PREL16:
+ checkIntUInt(Loc, Val, 16, Type);
+ write16le(Loc, Val);
+ break;
+ case R_AARCH64_ABS32:
+ case R_AARCH64_PREL32:
+ checkIntUInt(Loc, Val, 32, Type);
+ write32le(Loc, Val);
+ break;
+ case R_AARCH64_ABS64:
+ case R_AARCH64_GLOB_DAT:
+ case R_AARCH64_PREL64:
+ write64le(Loc, Val);
+ break;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ or32AArch64Imm(Loc, Val);
+ break;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ checkInt(Loc, Val, 33, Type);
+ write32AArch64Addr(Loc, Val >> 12);
+ break;
+ case R_AARCH64_ADR_PREL_LO21:
+ checkInt(Loc, Val, 21, Type);
+ write32AArch64Addr(Loc, Val);
+ break;
+ case R_AARCH64_JUMP26:
+ // Normally we would just write the bits of the immediate field, however
+ // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
+ // we want to replace a non-branch instruction with a branch immediate
+ // instruction. By writing all the bits of the instruction including the
+ // opcode and the immediate (0 001 | 01 imm26) we can do this
+ // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
+ // the instruction we want to patch.
+ write32le(Loc, 0x14000000);
+ case R_AARCH64_CALL26:
+ checkInt(Loc, Val, 28, Type);
+ or32le(Loc, (Val & 0x0FFFFFFC) >> 2);
+ break;
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_LD_PREL_LO19:
+ checkAlignment(Loc, Val, 4, Type);
+ checkInt(Loc, Val, 21, Type);
+ or32le(Loc, (Val & 0x1FFFFC) << 3);
+ break;
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ or32AArch64Imm(Loc, getBits(Val, 0, 11));
+ break;
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ checkAlignment(Loc, Val, 2, Type);
+ or32AArch64Imm(Loc, getBits(Val, 1, 11));
+ break;
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ checkAlignment(Loc, Val, 4, Type);
+ or32AArch64Imm(Loc, getBits(Val, 2, 11));
+ break;
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_AARCH64_TLSDESC_LD64_LO12:
+ checkAlignment(Loc, Val, 8, Type);
+ or32AArch64Imm(Loc, getBits(Val, 3, 11));
+ break;
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ checkAlignment(Loc, Val, 16, Type);
+ or32AArch64Imm(Loc, getBits(Val, 4, 11));
+ break;
+ or32le(Loc, (Val & 0xFFFF) << 5);
+ break;
+ or32le(Loc, (Val & 0xFFFF0000) >> 11);
+ break;
+ or32le(Loc, (Val & 0xFFFF00000000) >> 27);
+ break;
+ case R_AARCH64_MOVW_UABS_G3:
+ or32le(Loc, (Val & 0xFFFF000000000000) >> 43);
+ break;
+ case R_AARCH64_TSTBR14:
+ checkInt(Loc, Val, 16, Type);
+ or32le(Loc, (Val & 0xFFFC) << 3);
+ break;
+ checkUInt(Loc, Val, 24, Type);
+ or32AArch64Imm(Loc, Val >> 12);
+ break;
+ or32AArch64Imm(Loc, Val);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+void AArch64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // TLSDESC Global-Dynamic relocation are in the form:
+ // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
+ // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
+ // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
+ // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
+ // blr x1
+ // And it can optimized to:
+ // movz x0, #0x0, lsl #16
+ // movk x0, #0x10
+ // nop
+ // nop
+ checkUInt(Loc, Val, 32, Type);
+ switch (Type) {
+ write32le(Loc, 0xd503201f); // nop
+ return;
+ write32le(Loc, 0xd2a00000 | (((Val >> 16) & 0xffff) << 5)); // movz
+ return;
+ case R_AARCH64_TLSDESC_LD64_LO12:
+ write32le(Loc, 0xf2800000 | ((Val & 0xffff) << 5)); // movk
+ return;
+ default:
+ llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
+ }
+void AArch64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // TLSDESC Global-Dynamic relocation are in the form:
+ // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
+ // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
+ // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
+ // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
+ // blr x1
+ // And it can optimized to:
+ // adrp x0, :gottprel:v
+ // ldr x0, [x0, :gottprel_lo12:v]
+ // nop
+ // nop
+ switch (Type) {
+ write32le(Loc, 0xd503201f); // nop
+ break;
+ write32le(Loc, 0x90000000); // adrp
+ relocateOne(Loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, Val);
+ break;
+ case R_AARCH64_TLSDESC_LD64_LO12:
+ write32le(Loc, 0xf9400000); // ldr
+ relocateOne(Loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, Val);
+ break;
+ default:
+ llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
+ }
+void AArch64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ checkUInt(Loc, Val, 32, Type);
+ // Generate MOVZ.
+ uint32_t RegNo = read32le(Loc) & 0x1f;
+ write32le(Loc, (0xd2a00000 | RegNo) | (((Val >> 16) & 0xffff) << 5));
+ return;
+ }
+ if (Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
+ // Generate MOVK.
+ uint32_t RegNo = read32le(Loc) & 0x1f;
+ write32le(Loc, (0xf2800000 | RegNo) | ((Val & 0xffff) << 5));
+ return;
+ }
+ llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
+TargetInfo *elf::getAArch64TargetInfo() {
+ static AArch64 Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/AMDGPU.cpp b/contrib/llvm/tools/lld/ELF/Arch/AMDGPU.cpp
new file mode 100644
index 000000000000..a7c6c84ceecd
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/AMDGPU.cpp
@@ -0,0 +1,105 @@
+//===- AMDGPU.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class AMDGPU final : public TargetInfo {
+ uint32_t calcEFlags() const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+} // namespace
+ RelativeRel = R_AMDGPU_RELATIVE64;
+ GotRel = R_AMDGPU_ABS64;
+ NoneRel = R_AMDGPU_NONE;
+ GotEntrySize = 8;
+static uint32_t getEFlags(InputFile *File) {
+ return cast<ObjFile<ELF64LE>>(File)->getObj().getHeader()->e_flags;
+uint32_t AMDGPU::calcEFlags() const {
+ assert(!ObjectFiles.empty());
+ uint32_t Ret = getEFlags(ObjectFiles[0]);
+ // Verify that all input files have the same e_flags.
+ for (InputFile *F : makeArrayRef(ObjectFiles).slice(1)) {
+ if (Ret == getEFlags(F))
+ continue;
+ error("incompatible e_flags: " + toString(F));
+ return 0;
+ }
+ return Ret;
+void AMDGPU::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_AMDGPU_ABS32:
+ case R_AMDGPU_REL32:
+ case R_AMDGPU_REL32_LO:
+ write32le(Loc, Val);
+ break;
+ case R_AMDGPU_ABS64:
+ case R_AMDGPU_REL64:
+ write64le(Loc, Val);
+ break;
+ case R_AMDGPU_REL32_HI:
+ write32le(Loc, Val >> 32);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+RelExpr AMDGPU::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_AMDGPU_ABS32:
+ case R_AMDGPU_ABS64:
+ return R_ABS;
+ case R_AMDGPU_REL32:
+ case R_AMDGPU_REL32_LO:
+ case R_AMDGPU_REL32_HI:
+ case R_AMDGPU_REL64:
+ return R_PC;
+ return R_GOT_PC;
+ default:
+ return R_INVALID;
+ }
+TargetInfo *elf::getAMDGPUTargetInfo() {
+ static AMDGPU Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp b/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp
new file mode 100644
index 000000000000..120caca671af
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp
@@ -0,0 +1,612 @@
+//===- ARM.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Thunks.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class ARM final : public TargetInfo {
+ ARM();
+ uint32_t calcEFlags() const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ RelType getDynRel(RelType Type) const override;
+ int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ void addPltSymbols(InputSection &IS, uint64_t Off) const override;
+ void addPltHeaderSymbols(InputSection &ISD) const override;
+ bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const override;
+ uint32_t getThunkSectionSpacing() const override;
+ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // namespace
+ARM::ARM() {
+ CopyRel = R_ARM_COPY;
+ RelativeRel = R_ARM_RELATIVE;
+ IRelativeRel = R_ARM_IRELATIVE;
+ GotRel = R_ARM_GLOB_DAT;
+ NoneRel = R_ARM_NONE;
+ TlsGotRel = R_ARM_TLS_TPOFF32;
+ TlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
+ TlsOffsetRel = R_ARM_TLS_DTPOFF32;
+ GotBaseSymInGotPlt = false;
+ GotEntrySize = 4;
+ GotPltEntrySize = 4;
+ PltEntrySize = 16;
+ PltHeaderSize = 32;
+ TrapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
+ NeedsThunks = true;
+uint32_t ARM::calcEFlags() const {
+ // The ABIFloatType is used by loaders to detect the floating point calling
+ // convention.
+ uint32_t ABIFloatType = 0;
+ if (Config->ARMVFPArgs == ARMVFPArgKind::Base ||
+ Config->ARMVFPArgs == ARMVFPArgKind::Default)
+ else if (Config->ARMVFPArgs == ARMVFPArgKind::VFP)
+ // We don't currently use any features incompatible with EF_ARM_EABI_VER5,
+ // but we don't have any firm guarantees of conformance. Linux AArch64
+ // kernels (as of 2016) require an EABI version to be set.
+ return EF_ARM_EABI_VER5 | ABIFloatType;
+RelExpr ARM::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_ARM_THM_JUMP11:
+ return R_PC;
+ case R_ARM_CALL:
+ case R_ARM_JUMP24:
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ case R_ARM_PREL31:
+ case R_ARM_THM_JUMP19:
+ case R_ARM_THM_JUMP24:
+ case R_ARM_THM_CALL:
+ return R_PLT_PC;
+ case R_ARM_GOTOFF32:
+ // (S + A) - GOT_ORG
+ return R_GOTREL;
+ case R_ARM_GOT_BREL:
+ // GOT(S) + A - GOT_ORG
+ return R_GOT_OFF;
+ case R_ARM_GOT_PREL:
+ case R_ARM_TLS_IE32:
+ // GOT(S) + A - P
+ return R_GOT_PC;
+ case R_ARM_SBREL32:
+ return R_ARM_SBREL;
+ case R_ARM_TARGET1:
+ return Config->Target1Rel ? R_PC : R_ABS;
+ case R_ARM_TARGET2:
+ if (Config->Target2 == Target2Policy::Rel)
+ return R_PC;
+ if (Config->Target2 == Target2Policy::Abs)
+ return R_ABS;
+ return R_GOT_PC;
+ case R_ARM_TLS_GD32:
+ return R_TLSGD_PC;
+ case R_ARM_TLS_LDM32:
+ return R_TLSLD_PC;
+ // B(S) + A - P
+ // FIXME: currently B(S) assumed to be .got, this may not hold for all
+ // platforms.
+ return R_GOTONLY_PC;
+ case R_ARM_REL32:
+ return R_PC;
+ case R_ARM_NONE:
+ return R_NONE;
+ case R_ARM_TLS_LE32:
+ return R_TLS;
+ case R_ARM_V4BX:
+ // V4BX is just a marker to indicate there's a "bx rN" instruction at the
+ // given address. It can be used to implement a special linker mode which
+ // rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and
+ // not ARMv4 output, we can just ignore it.
+ return R_HINT;
+ default:
+ return R_ABS;
+ }
+RelType ARM::getDynRel(RelType Type) const {
+ if ((Type == R_ARM_ABS32) || (Type == R_ARM_TARGET1 && !Config->Target1Rel))
+ return R_ARM_ABS32;
+ return R_ARM_NONE;
+void ARM::writeGotPlt(uint8_t *Buf, const Symbol &) const {
+ write32le(Buf, In.Plt->getVA());
+void ARM::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
+ // An ARM entry is the address of the ifunc resolver function.
+ write32le(Buf, S.getVA());
+// Long form PLT Header that does not have any restrictions on the displacement
+// of the .plt from the .plt.got.
+static void writePltHeaderLong(uint8_t *Buf) {
+ const uint8_t PltData[] = {
+ 0x04, 0xe0, 0x2d, 0xe5, // str lr, [sp,#-4]!
+ 0x04, 0xe0, 0x9f, 0xe5, // ldr lr, L2
+ 0x0e, 0xe0, 0x8f, 0xe0, // L1: add lr, pc, lr
+ 0x08, 0xf0, 0xbe, 0xe5, // ldr pc, [lr, #8]
+ 0x00, 0x00, 0x00, 0x00, // L2: .word &(.got.plt) - L1 - 8
+ 0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
+ 0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
+ 0xd4, 0xd4, 0xd4, 0xd4};
+ memcpy(Buf, PltData, sizeof(PltData));
+ uint64_t GotPlt = In.GotPlt->getVA();
+ uint64_t L1 = In.Plt->getVA() + 8;
+ write32le(Buf + 16, GotPlt - L1 - 8);
+// The default PLT header requires the .plt.got to be within 128 Mb of the
+// .plt in the positive direction.
+void ARM::writePltHeader(uint8_t *Buf) const {
+ // Use a similar sequence to that in writePlt(), the difference is the calling
+ // conventions mean we use lr instead of ip. The PLT entry is responsible for
+ // saving lr on the stack, the dynamic loader is responsible for reloading
+ // it.
+ const uint32_t PltData[] = {
+ 0xe52de004, // L1: str lr, [sp,#-4]!
+ 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
+ 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
+ 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
+ };
+ uint64_t Offset = In.GotPlt->getVA() - In.Plt->getVA() - 4;
+ if (!llvm::isUInt<27>(Offset)) {
+ // We cannot encode the Offset, use the long form.
+ writePltHeaderLong(Buf);
+ return;
+ }
+ write32le(Buf + 0, PltData[0]);
+ write32le(Buf + 4, PltData[1] | ((Offset >> 20) & 0xff));
+ write32le(Buf + 8, PltData[2] | ((Offset >> 12) & 0xff));
+ write32le(Buf + 12, PltData[3] | (Offset & 0xfff));
+ memcpy(Buf + 16, TrapInstr.data(), 4); // Pad to 32-byte boundary
+ memcpy(Buf + 20, TrapInstr.data(), 4);
+ memcpy(Buf + 24, TrapInstr.data(), 4);
+ memcpy(Buf + 28, TrapInstr.data(), 4);
+void ARM::addPltHeaderSymbols(InputSection &IS) const {
+ addSyntheticLocal("$a", STT_NOTYPE, 0, 0, IS);
+ addSyntheticLocal("$d", STT_NOTYPE, 16, 0, IS);
+// Long form PLT entries that do not have any restrictions on the displacement
+// of the .plt from the .plt.got.
+static void writePltLong(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) {
+ const uint8_t PltData[] = {
+ 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, L2
+ 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
+ 0x00, 0xf0, 0x9c, 0xe5, // ldr pc, [ip]
+ 0x00, 0x00, 0x00, 0x00, // L2: .word Offset(&(.plt.got) - L1 - 8
+ };
+ memcpy(Buf, PltData, sizeof(PltData));
+ uint64_t L1 = PltEntryAddr + 4;
+ write32le(Buf + 12, GotPltEntryAddr - L1 - 8);
+// The default PLT entries require the .plt.got to be within 128 Mb of the
+// .plt in the positive direction.
+void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ // The PLT entry is similar to the example given in Appendix A of ELF for
+ // the Arm Architecture. Instead of using the Group Relocations to find the
+ // optimal rotation for the 8-bit immediate used in the add instructions we
+ // hard code the most compact rotations for simplicity. This saves a load
+ // instruction over the long plt sequences.
+ const uint32_t PltData[] = {
+ 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.plt.got) - L1 - 8
+ 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.plt.got) - L1 - 8
+ 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.plt.got) - L1 - 8
+ };
+ uint64_t Offset = GotPltEntryAddr - PltEntryAddr - 8;
+ if (!llvm::isUInt<27>(Offset)) {
+ // We cannot encode the Offset, use the long form.
+ writePltLong(Buf, GotPltEntryAddr, PltEntryAddr, Index, RelOff);
+ return;
+ }
+ write32le(Buf + 0, PltData[0] | ((Offset >> 20) & 0xff));
+ write32le(Buf + 4, PltData[1] | ((Offset >> 12) & 0xff));
+ write32le(Buf + 8, PltData[2] | (Offset & 0xfff));
+ memcpy(Buf + 12, TrapInstr.data(), 4); // Pad to 16-byte boundary
+void ARM::addPltSymbols(InputSection &IS, uint64_t Off) const {
+ addSyntheticLocal("$a", STT_NOTYPE, Off, 0, IS);
+ addSyntheticLocal("$d", STT_NOTYPE, Off + 12, 0, IS);
+bool ARM::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const {
+ // If S is an undefined weak symbol and does not have a PLT entry then it
+ // will be resolved as a branch to the next instruction.
+ if (S.isUndefWeak() && !S.isInPlt())
+ return false;
+ // A state change from ARM to Thumb and vice versa must go through an
+ // interworking thunk if the relocation type is not R_ARM_CALL or
+ switch (Type) {
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ case R_ARM_JUMP24:
+ // Source is ARM, all PLT entries are ARM so no interworking required.
+ // Otherwise we need to interwork if Symbol has bit 0 set (Thumb).
+ if (Expr == R_PC && ((S.getVA() & 1) == 1))
+ return true;
+ case R_ARM_CALL: {
+ uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
+ return !inBranchRange(Type, BranchAddr, Dst);
+ }
+ case R_ARM_THM_JUMP19:
+ case R_ARM_THM_JUMP24:
+ // Source is Thumb, all PLT entries are ARM so interworking is required.
+ // Otherwise we need to interwork if Symbol has bit 0 clear (ARM).
+ if (Expr == R_PLT_PC || ((S.getVA() & 1) == 0))
+ return true;
+ case R_ARM_THM_CALL: {
+ uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
+ return !inBranchRange(Type, BranchAddr, Dst);
+ }
+ }
+ return false;
+uint32_t ARM::getThunkSectionSpacing() const {
+ // The placing of pre-created ThunkSections is controlled by the value
+ // ThunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
+ // place the ThunkSection such that all branches from the InputSections
+ // prior to the ThunkSection can reach a Thunk placed at the end of the
+ // ThunkSection. Graphically:
+ // | up to ThunkSectionSpacing .text input sections |
+ // | ThunkSection |
+ // | up to ThunkSectionSpacing .text input sections |
+ // | ThunkSection |
+ // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This
+ // is to match the most common expected case of a Thumb 2 encoded BL, BLX or
+ // B.W:
+ // ARM B, BL, BLX range +/- 32MiB
+ // Thumb B.W, BL, BLX range +/- 16MiB
+ // Thumb B<cc>.W range +/- 1MiB
+ // If a branch cannot reach a pre-created ThunkSection a new one will be
+ // created so we can handle the rare cases of a Thumb 2 conditional branch.
+ // We intentionally use a lower size for ThunkSectionSpacing than the maximum
+ // branch range so the end of the ThunkSection is more likely to be within
+ // range of the branch instruction that is furthest away. The value we shorten
+ // ThunkSectionSpacing by is set conservatively to allow us to create 16,384
+ // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
+ // one of the Thunks going out of range.
+ // On Arm the ThunkSectionSpacing depends on the range of the Thumb Branch
+ // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except
+ // ARMv6T2) the range is +/- 4MiB.
+ return (Config->ARMJ1J2BranchEncoding) ? 0x1000000 - 0x30000
+ : 0x400000 - 0x7500;
+bool ARM::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
+ uint64_t Range;
+ uint64_t InstrSize;
+ switch (Type) {
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ case R_ARM_JUMP24:
+ case R_ARM_CALL:
+ Range = 0x2000000;
+ InstrSize = 4;
+ break;
+ case R_ARM_THM_JUMP19:
+ Range = 0x100000;
+ InstrSize = 2;
+ break;
+ case R_ARM_THM_JUMP24:
+ case R_ARM_THM_CALL:
+ Range = Config->ARMJ1J2BranchEncoding ? 0x1000000 : 0x400000;
+ InstrSize = 2;
+ break;
+ default:
+ return true;
+ }
+ // PC at Src is 2 instructions ahead, immediate of branch is signed
+ if (Src > Dst)
+ Range -= 2 * InstrSize;
+ else
+ Range += InstrSize;
+ if ((Dst & 0x1) == 0)
+ // Destination is ARM, if ARM caller then Src is already 4-byte aligned.
+ // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
+ // destination will be 4 byte aligned.
+ Src &= ~0x3;
+ else
+ // Bit 0 == 1 denotes Thumb state, it is not part of the range
+ Dst &= ~0x1;
+ uint64_t Distance = (Src > Dst) ? Src - Dst : Dst - Src;
+ return Distance <= Range;
+void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_ARM_ABS32:
+ case R_ARM_GLOB_DAT:
+ case R_ARM_GOTOFF32:
+ case R_ARM_GOT_BREL:
+ case R_ARM_GOT_PREL:
+ case R_ARM_REL32:
+ case R_ARM_SBREL32:
+ case R_ARM_TARGET1:
+ case R_ARM_TARGET2:
+ case R_ARM_TLS_GD32:
+ case R_ARM_TLS_IE32:
+ case R_ARM_TLS_LDM32:
+ case R_ARM_TLS_LDO32:
+ case R_ARM_TLS_LE32:
+ case R_ARM_TLS_TPOFF32:
+ case R_ARM_TLS_DTPOFF32:
+ write32le(Loc, Val);
+ break;
+ case R_ARM_TLS_DTPMOD32:
+ write32le(Loc, 1);
+ break;
+ case R_ARM_PREL31:
+ checkInt(Loc, Val, 31, Type);
+ write32le(Loc, (read32le(Loc) & 0x80000000) | (Val & ~0x80000000));
+ break;
+ case R_ARM_CALL:
+ // R_ARM_CALL is used for BL and BLX instructions, depending on the
+ // value of bit 0 of Val, we must select a BL or BLX instruction
+ if (Val & 1) {
+ // If bit 0 of Val is 1 the target is Thumb, we must select a BLX.
+ // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
+ checkInt(Loc, Val, 26, Type);
+ write32le(Loc, 0xfa000000 | // opcode
+ ((Val & 2) << 23) | // H
+ ((Val >> 2) & 0x00ffffff)); // imm24
+ break;
+ }
+ if ((read32le(Loc) & 0xfe000000) == 0xfa000000)
+ // BLX (always unconditional) instruction to an ARM Target, select an
+ // unconditional BL.
+ write32le(Loc, 0xeb000000 | (read32le(Loc) & 0x00ffffff));
+ // fall through as BL encoding is shared with B
+ case R_ARM_JUMP24:
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ checkInt(Loc, Val, 26, Type);
+ write32le(Loc, (read32le(Loc) & ~0x00ffffff) | ((Val >> 2) & 0x00ffffff));
+ break;
+ case R_ARM_THM_JUMP11:
+ checkInt(Loc, Val, 12, Type);
+ write16le(Loc, (read32le(Loc) & 0xf800) | ((Val >> 1) & 0x07ff));
+ break;
+ case R_ARM_THM_JUMP19:
+ // Encoding T3: Val = S:J2:J1:imm6:imm11:0
+ checkInt(Loc, Val, 21, Type);
+ write16le(Loc,
+ (read16le(Loc) & 0xfbc0) | // opcode cond
+ ((Val >> 10) & 0x0400) | // S
+ ((Val >> 12) & 0x003f)); // imm6
+ write16le(Loc + 2,
+ 0x8000 | // opcode
+ ((Val >> 8) & 0x0800) | // J2
+ ((Val >> 5) & 0x2000) | // J1
+ ((Val >> 1) & 0x07ff)); // imm11
+ break;
+ case R_ARM_THM_CALL:
+ // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the
+ // value of bit 0 of Val, we must select a BL or BLX instruction
+ if ((Val & 1) == 0) {
+ // Ensure BLX destination is 4-byte aligned. As BLX instruction may
+ // only be two byte aligned. This must be done before overflow check
+ Val = alignTo(Val, 4);
+ }
+ // Bit 12 is 0 for BLX, 1 for BL
+ write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12);
+ if (!Config->ARMJ1J2BranchEncoding) {
+ // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
+ // different encoding rules and range due to J1 and J2 always being 1.
+ checkInt(Loc, Val, 23, Type);
+ write16le(Loc,
+ 0xf000 | // opcode
+ ((Val >> 12) & 0x07ff)); // imm11
+ write16le(Loc + 2,
+ (read16le(Loc + 2) & 0xd000) | // opcode
+ 0x2800 | // J1 == J2 == 1
+ ((Val >> 1) & 0x07ff)); // imm11
+ break;
+ }
+ // Fall through as rest of encoding is the same as B.W
+ case R_ARM_THM_JUMP24:
+ // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
+ checkInt(Loc, Val, 25, Type);
+ write16le(Loc,
+ 0xf000 | // opcode
+ ((Val >> 14) & 0x0400) | // S
+ ((Val >> 12) & 0x03ff)); // imm10
+ write16le(Loc + 2,
+ (read16le(Loc + 2) & 0xd000) | // opcode
+ (((~(Val >> 10)) ^ (Val >> 11)) & 0x2000) | // J1
+ (((~(Val >> 11)) ^ (Val >> 13)) & 0x0800) | // J2
+ ((Val >> 1) & 0x07ff)); // imm11
+ break;
+ write32le(Loc, (read32le(Loc) & ~0x000f0fff) | ((Val & 0xf000) << 4) |
+ (Val & 0x0fff));
+ break;
+ case R_ARM_MOVT_ABS:
+ write32le(Loc, (read32le(Loc) & ~0x000f0fff) |
+ (((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff));
+ break;
+ // Encoding T1: A = imm4:i:imm3:imm8
+ write16le(Loc,
+ 0xf2c0 | // opcode
+ ((Val >> 17) & 0x0400) | // i
+ ((Val >> 28) & 0x000f)); // imm4
+ write16le(Loc + 2,
+ (read16le(Loc + 2) & 0x8f00) | // opcode
+ ((Val >> 12) & 0x7000) | // imm3
+ ((Val >> 16) & 0x00ff)); // imm8
+ break;
+ // Encoding T3: A = imm4:i:imm3:imm8
+ write16le(Loc,
+ 0xf240 | // opcode
+ ((Val >> 1) & 0x0400) | // i
+ ((Val >> 12) & 0x000f)); // imm4
+ write16le(Loc + 2,
+ (read16le(Loc + 2) & 0x8f00) | // opcode
+ ((Val << 4) & 0x7000) | // imm3
+ (Val & 0x00ff)); // imm8
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+int64_t ARM::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
+ switch (Type) {
+ default:
+ return 0;
+ case R_ARM_ABS32:
+ case R_ARM_GOTOFF32:
+ case R_ARM_GOT_BREL:
+ case R_ARM_GOT_PREL:
+ case R_ARM_REL32:
+ case R_ARM_TARGET1:
+ case R_ARM_TARGET2:
+ case R_ARM_TLS_GD32:
+ case R_ARM_TLS_LDM32:
+ case R_ARM_TLS_LDO32:
+ case R_ARM_TLS_IE32:
+ case R_ARM_TLS_LE32:
+ return SignExtend64<32>(read32le(Buf));
+ case R_ARM_PREL31:
+ return SignExtend64<31>(read32le(Buf));
+ case R_ARM_CALL:
+ case R_ARM_JUMP24:
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ return SignExtend64<26>(read32le(Buf) << 2);
+ case R_ARM_THM_JUMP11:
+ return SignExtend64<12>(read16le(Buf) << 1);
+ case R_ARM_THM_JUMP19: {
+ // Encoding T3: A = S:J2:J1:imm10:imm6:0
+ uint16_t Hi = read16le(Buf);
+ uint16_t Lo = read16le(Buf + 2);
+ return SignExtend64<20>(((Hi & 0x0400) << 10) | // S
+ ((Lo & 0x0800) << 8) | // J2
+ ((Lo & 0x2000) << 5) | // J1
+ ((Hi & 0x003f) << 12) | // imm6
+ ((Lo & 0x07ff) << 1)); // imm11:0
+ }
+ case R_ARM_THM_CALL:
+ if (!Config->ARMJ1J2BranchEncoding) {
+ // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
+ // different encoding rules and range due to J1 and J2 always being 1.
+ uint16_t Hi = read16le(Buf);
+ uint16_t Lo = read16le(Buf + 2);
+ return SignExtend64<22>(((Hi & 0x7ff) << 12) | // imm11
+ ((Lo & 0x7ff) << 1)); // imm11:0
+ break;
+ }
+ case R_ARM_THM_JUMP24: {
+ // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
+ // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
+ uint16_t Hi = read16le(Buf);
+ uint16_t Lo = read16le(Buf + 2);
+ return SignExtend64<24>(((Hi & 0x0400) << 14) | // S
+ (~((Lo ^ (Hi << 3)) << 10) & 0x00800000) | // I1
+ (~((Lo ^ (Hi << 1)) << 11) & 0x00400000) | // I2
+ ((Hi & 0x003ff) << 12) | // imm0
+ ((Lo & 0x007ff) << 1)); // imm11:0
+ }
+ // ELF for the ARM Architecture the implicit addend for MOVW and
+ // MOVT is in the range -32768 <= A < 32768
+ case R_ARM_MOVT_ABS:
+ case R_ARM_MOVT_PREL: {
+ uint64_t Val = read32le(Buf) & 0x000f0fff;
+ return SignExtend64<16>(((Val & 0x000f0000) >> 4) | (Val & 0x00fff));
+ }
+ // Encoding T3: A = imm4:i:imm3:imm8
+ uint16_t Hi = read16le(Buf);
+ uint16_t Lo = read16le(Buf + 2);
+ return SignExtend64<16>(((Hi & 0x000f) << 12) | // imm4
+ ((Hi & 0x0400) << 1) | // i
+ ((Lo & 0x7000) >> 4) | // imm3
+ (Lo & 0x00ff)); // imm8
+ }
+ }
+TargetInfo *elf::getARMTargetInfo() {
+ static ARM Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/AVR.cpp b/contrib/llvm/tools/lld/ELF/Arch/AVR.cpp
new file mode 100644
index 000000000000..637da3778bd2
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/AVR.cpp
@@ -0,0 +1,77 @@
+//===- AVR.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// AVR is a Harvard-architecture 8-bit micrcontroller designed for small
+// baremetal programs. All AVR-family processors have 32 8-bit registers.
+// The tiniest AVR has 32 byte RAM and 1 KiB program memory, and the largest
+// one supports up to 2^24 data address space and 2^22 code address space.
+// Since it is a baremetal programming, there's usually no loader to load
+// ELF files on AVRs. You are expected to link your program against address
+// 0 and pull out a .text section from the result using objcopy, so that you
+// can write the linked code to on-chip flush memory. You can do that with
+// the following commands:
+// ld.lld -Ttext=0 -o foo foo.o
+// objcopy -O binary --only-section=.text foo output.bin
+// Note that the current AVR support is very preliminary so you can't
+// link any useful program yet, though.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class AVR final : public TargetInfo {
+ AVR();
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // namespace
+AVR::AVR() { NoneRel = R_AVR_NONE; }
+RelExpr AVR::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ return R_ABS;
+void AVR::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_AVR_CALL: {
+ uint16_t Hi = Val >> 17;
+ uint16_t Lo = Val >> 1;
+ write16le(Loc, read16le(Loc) | ((Hi >> 1) << 4) | (Hi & 1));
+ write16le(Loc + 2, Lo);
+ break;
+ }
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
+ }
+TargetInfo *elf::getAVRTargetInfo() {
+ static AVR Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/Hexagon.cpp b/contrib/llvm/tools/lld/ELF/Arch/Hexagon.cpp
new file mode 100644
index 000000000000..b4d33be2ad39
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/Hexagon.cpp
@@ -0,0 +1,292 @@
+//===-- Hexagon.cpp -------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class Hexagon final : public TargetInfo {
+ Hexagon();
+ uint32_t calcEFlags() const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+} // namespace
+Hexagon::Hexagon() {
+ PltRel = R_HEX_JMP_SLOT;
+ RelativeRel = R_HEX_RELATIVE;
+ GotRel = R_HEX_GLOB_DAT;
+ GotEntrySize = 4;
+ // The zero'th GOT entry is reserved for the address of _DYNAMIC. The
+ // next 3 are reserved for the dynamic loader.
+ GotPltHeaderEntriesNum = 4;
+ GotPltEntrySize = 4;
+ PltEntrySize = 16;
+ PltHeaderSize = 32;
+ // Hexagon Linux uses 64K pages by default.
+ DefaultMaxPageSize = 0x10000;
+ NoneRel = R_HEX_NONE;
+uint32_t Hexagon::calcEFlags() const {
+ assert(!ObjectFiles.empty());
+ // The architecture revision must always be equal to or greater than
+ // greatest revision in the list of inputs.
+ uint32_t Ret = 0;
+ for (InputFile *F : ObjectFiles) {
+ uint32_t EFlags = cast<ObjFile<ELF32LE>>(F)->getObj().getHeader()->e_flags;
+ if (EFlags > Ret)
+ Ret = EFlags;
+ }
+ return Ret;
+static uint32_t applyMask(uint32_t Mask, uint32_t Data) {
+ uint32_t Result = 0;
+ size_t Off = 0;
+ for (size_t Bit = 0; Bit != 32; ++Bit) {
+ uint32_t ValBit = (Data >> Off) & 1;
+ uint32_t MaskBit = (Mask >> Bit) & 1;
+ if (MaskBit) {
+ Result |= (ValBit << Bit);
+ ++Off;
+ }
+ }
+ return Result;
+RelExpr Hexagon::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_HEX_B9_PCREL:
+ case R_HEX_B9_PCREL_X:
+ case R_HEX_B13_PCREL:
+ case R_HEX_B15_PCREL:
+ case R_HEX_B15_PCREL_X:
+ case R_HEX_6_PCREL_X:
+ case R_HEX_32_PCREL:
+ return R_PC;
+ case R_HEX_B22_PCREL:
+ case R_HEX_PLT_B22_PCREL:
+ case R_HEX_B22_PCREL_X:
+ case R_HEX_B32_PCREL_X:
+ return R_PLT_PC;
+ case R_HEX_GOT_11_X:
+ case R_HEX_GOT_16_X:
+ case R_HEX_GOT_32_6_X:
+ return R_HEXAGON_GOT;
+ default:
+ return R_ABS;
+ }
+static uint32_t findMaskR6(uint32_t Insn) {
+ // There are (arguably too) many relocation masks for the DSP's
+ // R_HEX_6_X type. The table below is used to select the correct mask
+ // for the given instruction.
+ struct InstructionMask {
+ uint32_t CmpMask;
+ uint32_t RelocMask;
+ };
+ static const InstructionMask R6[] = {
+ {0x38000000, 0x0000201f}, {0x39000000, 0x0000201f},
+ {0x3e000000, 0x00001f80}, {0x3f000000, 0x00001f80},
+ {0x40000000, 0x000020f8}, {0x41000000, 0x000007e0},
+ {0x42000000, 0x000020f8}, {0x43000000, 0x000007e0},
+ {0x44000000, 0x000020f8}, {0x45000000, 0x000007e0},
+ {0x46000000, 0x000020f8}, {0x47000000, 0x000007e0},
+ {0x6a000000, 0x00001f80}, {0x7c000000, 0x001f2000},
+ {0x9a000000, 0x00000f60}, {0x9b000000, 0x00000f60},
+ {0x9c000000, 0x00000f60}, {0x9d000000, 0x00000f60},
+ {0x9f000000, 0x001f0100}, {0xab000000, 0x0000003f},
+ {0xad000000, 0x0000003f}, {0xaf000000, 0x00030078},
+ {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
+ {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
+ // Duplex forms have a fixed mask and parse bits 15:14 are always
+ // zero. Non-duplex insns will always have at least one bit set in the
+ // parse field.
+ if ((0xC000 & Insn) == 0x0)
+ return 0x03f00000;
+ for (InstructionMask I : R6)
+ if ((0xff000000 & Insn) == I.CmpMask)
+ return I.RelocMask;
+ error("unrecognized instruction for R_HEX_6 relocation: 0x" +
+ utohexstr(Insn));
+ return 0;
+static uint32_t findMaskR8(uint32_t Insn) {
+ if ((0xff000000 & Insn) == 0xde000000)
+ return 0x00e020e8;
+ if ((0xff000000 & Insn) == 0x3c000000)
+ return 0x0000207f;
+ return 0x00001fe0;
+static uint32_t findMaskR11(uint32_t Insn) {
+ if ((0xff000000 & Insn) == 0xa1000000)
+ return 0x060020ff;
+ return 0x06003fe0;
+static uint32_t findMaskR16(uint32_t Insn) {
+ if ((0xff000000 & Insn) == 0x48000000)
+ return 0x061f20ff;
+ if ((0xff000000 & Insn) == 0x49000000)
+ return 0x061f3fe0;
+ if ((0xff000000 & Insn) == 0x78000000)
+ return 0x00df3fe0;
+ if ((0xff000000 & Insn) == 0xb0000000)
+ return 0x0fe03fe0;
+ error("unrecognized instruction for R_HEX_16_X relocation: 0x" +
+ utohexstr(Insn));
+ return 0;
+static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); }
+void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_HEX_NONE:
+ break;
+ case R_HEX_6_PCREL_X:
+ case R_HEX_6_X:
+ or32le(Loc, applyMask(findMaskR6(read32le(Loc)), Val));
+ break;
+ case R_HEX_8_X:
+ or32le(Loc, applyMask(findMaskR8(read32le(Loc)), Val));
+ break;
+ case R_HEX_9_X:
+ or32le(Loc, applyMask(0x00003fe0, Val & 0x3f));
+ break;
+ case R_HEX_10_X:
+ or32le(Loc, applyMask(0x00203fe0, Val & 0x3f));
+ break;
+ case R_HEX_11_X:
+ case R_HEX_GOT_11_X:
+ or32le(Loc, applyMask(findMaskR11(read32le(Loc)), Val & 0x3f));
+ break;
+ case R_HEX_12_X:
+ or32le(Loc, applyMask(0x000007e0, Val));
+ break;
+ case R_HEX_16_X: // These relocs only have 6 effective bits.
+ case R_HEX_GOT_16_X:
+ or32le(Loc, applyMask(findMaskR16(read32le(Loc)), Val & 0x3f));
+ break;
+ case R_HEX_32:
+ case R_HEX_32_PCREL:
+ or32le(Loc, Val);
+ break;
+ case R_HEX_32_6_X:
+ case R_HEX_GOT_32_6_X:
+ or32le(Loc, applyMask(0x0fff3fff, Val >> 6));
+ break;
+ case R_HEX_B9_PCREL:
+ or32le(Loc, applyMask(0x003000fe, Val >> 2));
+ break;
+ case R_HEX_B9_PCREL_X:
+ or32le(Loc, applyMask(0x003000fe, Val & 0x3f));
+ break;
+ case R_HEX_B13_PCREL:
+ or32le(Loc, applyMask(0x00202ffe, Val >> 2));
+ break;
+ case R_HEX_B15_PCREL:
+ or32le(Loc, applyMask(0x00df20fe, Val >> 2));
+ break;
+ case R_HEX_B15_PCREL_X:
+ or32le(Loc, applyMask(0x00df20fe, Val & 0x3f));
+ break;
+ case R_HEX_B22_PCREL:
+ case R_HEX_PLT_B22_PCREL:
+ or32le(Loc, applyMask(0x1ff3ffe, Val >> 2));
+ break;
+ case R_HEX_B22_PCREL_X:
+ or32le(Loc, applyMask(0x1ff3ffe, Val & 0x3f));
+ break;
+ case R_HEX_B32_PCREL_X:
+ or32le(Loc, applyMask(0x0fff3fff, Val >> 6));
+ break;
+ case R_HEX_HI16:
+ or32le(Loc, applyMask(0x00c03fff, Val >> 16));
+ break;
+ case R_HEX_LO16:
+ or32le(Loc, applyMask(0x00c03fff, Val));
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
+ break;
+ }
+void Hexagon::writePltHeader(uint8_t *Buf) const {
+ const uint8_t PltData[] = {
+ 0x00, 0x40, 0x00, 0x00, // { immext (#0)
+ 0x1c, 0xc0, 0x49, 0x6a, // r28 = add (pc, ##GOT0@PCREL) } # @GOT0
+ 0x0e, 0x42, 0x9c, 0xe2, // { r14 -= add (r28, #16) # offset of GOTn
+ 0x4f, 0x40, 0x9c, 0x91, // r15 = memw (r28 + #8) # object ID at GOT2
+ 0x3c, 0xc0, 0x9c, 0x91, // r28 = memw (r28 + #4) }# dynamic link at GOT1
+ 0x0e, 0x42, 0x0e, 0x8c, // { r14 = asr (r14, #2) # index of PLTn
+ 0x00, 0xc0, 0x9c, 0x52, // jumpr r28 } # call dynamic linker
+ 0x0c, 0xdb, 0x00, 0x54, // trap0(#0xdb) # bring plt0 into 16byte alignment
+ };
+ memcpy(Buf, PltData, sizeof(PltData));
+ // Offset from PLT0 to the GOT.
+ uint64_t Off = In.GotPlt->getVA() - In.Plt->getVA();
+ relocateOne(Buf, R_HEX_B32_PCREL_X, Off);
+ relocateOne(Buf + 4, R_HEX_6_PCREL_X, Off);
+void Hexagon::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Inst[] = {
+ 0x00, 0x40, 0x00, 0x00, // { immext (#0)
+ 0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) }
+ 0x1c, 0xc0, 0x8e, 0x91, // r28 = memw (r14)
+ 0x00, 0xc0, 0x9c, 0x52, // jumpr r28
+ };
+ memcpy(Buf, Inst, sizeof(Inst));
+ relocateOne(Buf, R_HEX_B32_PCREL_X, GotPltEntryAddr - PltEntryAddr);
+ relocateOne(Buf + 4, R_HEX_6_PCREL_X, GotPltEntryAddr - PltEntryAddr);
+TargetInfo *elf::getHexagonTargetInfo() {
+ static Hexagon Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/MSP430.cpp b/contrib/llvm/tools/lld/ELF/Arch/MSP430.cpp
new file mode 100644
index 000000000000..fe0c0fe64daf
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/MSP430.cpp
@@ -0,0 +1,94 @@
+//===- MSP430.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// The MSP430 is a 16-bit microcontroller RISC architecture. The instruction set
+// has only 27 core instructions orthogonally augmented with a variety
+// of addressing modes for source and destination operands. Entire address space
+// of MSP430 is 64KB (the extended MSP430X architecture is not considered here).
+// A typical MSP430 MCU has several kilobytes of RAM and ROM, plenty
+// of peripherals and is generally optimized for a low power consumption.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class MSP430 final : public TargetInfo {
+ MSP430();
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // namespace
+MSP430::MSP430() {
+ // mov.b #0, r3
+ TrapInstr = {0x43, 0x43, 0x43, 0x43};
+RelExpr MSP430::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_MSP430_10_PCREL:
+ case R_MSP430_16_PCREL:
+ case R_MSP430_16_PCREL_BYTE:
+ case R_MSP430_2X_PCREL:
+ case R_MSP430_RL_PCREL:
+ case R_MSP430_SYM_DIFF:
+ return R_PC;
+ default:
+ return R_ABS;
+ }
+void MSP430::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_MSP430_8:
+ checkIntUInt(Loc, Val, 8, Type);
+ *Loc = Val;
+ break;
+ case R_MSP430_16:
+ case R_MSP430_16_PCREL:
+ case R_MSP430_16_BYTE:
+ case R_MSP430_16_PCREL_BYTE:
+ checkIntUInt(Loc, Val, 16, Type);
+ write16le(Loc, Val);
+ break;
+ case R_MSP430_32:
+ checkIntUInt(Loc, Val, 32, Type);
+ write32le(Loc, Val);
+ break;
+ case R_MSP430_10_PCREL: {
+ int16_t Offset = ((int16_t)Val >> 1) - 1;
+ checkInt(Loc, Offset, 10, Type);
+ write16le(Loc, (read16le(Loc) & 0xFC00) | (Offset & 0x3FF));
+ break;
+ }
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
+ }
+TargetInfo *elf::getMSP430TargetInfo() {
+ static MSP430 Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp b/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp
new file mode 100644
index 000000000000..23b0c1dd8a2d
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp
@@ -0,0 +1,676 @@
+//===- MIPS.cpp -----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "OutputSections.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Thunks.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+template <class ELFT> class MIPS final : public TargetInfo {
+ MIPS();
+ uint32_t calcEFlags() const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
+ RelType getDynRel(RelType Type) const override;
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ bool usesOnlyLowPageBits(RelType Type) const override;
+} // namespace
+template <class ELFT> MIPS<ELFT>::MIPS() {
+ GotPltHeaderEntriesNum = 2;
+ DefaultMaxPageSize = 65536;
+ GotEntrySize = sizeof(typename ELFT::uint);
+ GotPltEntrySize = sizeof(typename ELFT::uint);
+ GotBaseSymInGotPlt = false;
+ PltEntrySize = 16;
+ PltHeaderSize = 32;
+ CopyRel = R_MIPS_COPY;
+ NoneRel = R_MIPS_NONE;
+ NeedsThunks = true;
+ // Set `sigrie 1` as a trap instruction.
+ write32(TrapInstr.data(), 0x04170001);
+ if (ELFT::Is64Bits) {
+ RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32;
+ TlsGotRel = R_MIPS_TLS_TPREL64;
+ TlsModuleIndexRel = R_MIPS_TLS_DTPMOD64;
+ TlsOffsetRel = R_MIPS_TLS_DTPREL64;
+ } else {
+ RelativeRel = R_MIPS_REL32;
+ TlsGotRel = R_MIPS_TLS_TPREL32;
+ TlsModuleIndexRel = R_MIPS_TLS_DTPMOD32;
+ TlsOffsetRel = R_MIPS_TLS_DTPREL32;
+ }
+template <class ELFT> uint32_t MIPS<ELFT>::calcEFlags() const {
+ return calcMipsEFlags<ELFT>();
+template <class ELFT>
+RelExpr MIPS<ELFT>::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ // See comment in the calculateMipsRelChain.
+ if (ELFT::Is64Bits || Config->MipsN32Abi)
+ Type &= 0xff;
+ switch (Type) {
+ case R_MIPS_JALR:
+ return R_HINT;
+ case R_MIPS_GPREL16:
+ case R_MIPS_GPREL32:
+ return R_MIPS_GOTREL;
+ case R_MIPS_26:
+ case R_MICROMIPS_26_S1:
+ return R_PLT;
+ case R_MICROMIPS_PC26_S1:
+ return R_PLT_PC;
+ case R_MIPS_HI16:
+ case R_MIPS_LO16:
+ case R_MICROMIPS_HI16:
+ case R_MICROMIPS_LO16:
+ // R_MIPS_HI16/R_MIPS_LO16 relocations against _gp_disp calculate
+ // offset between start of function and 'gp' value which by default
+ // equal to the start of .got section. In that case we consider these
+ // relocations as relative.
+ if (&S == ElfSym::MipsGpDisp)
+ return R_MIPS_GOT_GP_PC;
+ if (&S == ElfSym::MipsLocalGp)
+ return R_MIPS_GOT_GP;
+ case R_MIPS_32:
+ case R_MIPS_64:
+ case R_MIPS_SUB:
+ case R_MIPS_TLS_TPREL32:
+ case R_MIPS_TLS_TPREL64:
+ return R_ABS;
+ case R_MIPS_PC32:
+ case R_MIPS_PC16:
+ case R_MIPS_PC19_S2:
+ case R_MIPS_PC21_S2:
+ case R_MIPS_PC26_S2:
+ case R_MIPS_PCHI16:
+ case R_MIPS_PCLO16:
+ case R_MICROMIPS_PC7_S1:
+ case R_MICROMIPS_PC10_S1:
+ case R_MICROMIPS_PC16_S1:
+ case R_MICROMIPS_PC18_S3:
+ case R_MICROMIPS_PC19_S2:
+ case R_MICROMIPS_PC23_S2:
+ case R_MICROMIPS_PC21_S1:
+ return R_PC;
+ case R_MIPS_GOT16:
+ if (S.isLocal())
+ case R_MIPS_CALL16:
+ return R_MIPS_GOT_OFF;
+ case R_MIPS_CALL_HI16:
+ case R_MIPS_CALL_LO16:
+ case R_MIPS_GOT_HI16:
+ case R_MIPS_GOT_LO16:
+ return R_MIPS_GOT_OFF32;
+ case R_MIPS_TLS_GD:
+ return R_MIPS_TLSGD;
+ case R_MIPS_TLS_LDM:
+ return R_MIPS_TLSLD;
+ case R_MIPS_NONE:
+ return R_NONE;
+ default:
+ return R_INVALID;
+ }
+template <class ELFT> RelType MIPS<ELFT>::getDynRel(RelType Type) const {
+ if (Type == R_MIPS_32 || Type == R_MIPS_64)
+ return RelativeRel;
+ return R_MIPS_NONE;
+template <class ELFT>
+void MIPS<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &) const {
+ uint64_t VA = In.Plt->getVA();
+ if (isMicroMips())
+ VA |= 1;
+ write32<ELFT::TargetEndianness>(Buf, VA);
+template <endianness E> static uint32_t readShuffle(const uint8_t *Loc) {
+ // The major opcode of a microMIPS instruction needs to appear
+ // in the first 16-bit word (lowest address) for efficient hardware
+ // decode so that it knows if the instruction is 16-bit or 32-bit
+ // as early as possible. To do so, little-endian binaries keep 16-bit
+ // words in a big-endian order. That is why we have to swap these
+ // words to get a correct value.
+ uint32_t V = read32<E>(Loc);
+ if (E == support::little)
+ return (V << 16) | (V >> 16);
+ return V;
+template <endianness E>
+static void writeValue(uint8_t *Loc, uint64_t V, uint8_t BitsSize,
+ uint8_t Shift) {
+ uint32_t Instr = read32<E>(Loc);
+ uint32_t Mask = 0xffffffff >> (32 - BitsSize);
+ uint32_t Data = (Instr & ~Mask) | ((V >> Shift) & Mask);
+ write32<E>(Loc, Data);
+template <endianness E>
+static void writeShuffleValue(uint8_t *Loc, uint64_t V, uint8_t BitsSize,
+ uint8_t Shift) {
+ // See comments in readShuffle for purpose of this code.
+ uint16_t *Words = (uint16_t *)Loc;
+ if (E == support::little)
+ std::swap(Words[0], Words[1]);
+ writeValue<E>(Loc, V, BitsSize, Shift);
+ if (E == support::little)
+ std::swap(Words[0], Words[1]);
+template <endianness E>
+static void writeMicroRelocation16(uint8_t *Loc, uint64_t V, uint8_t BitsSize,
+ uint8_t Shift) {
+ uint16_t Instr = read16<E>(Loc);
+ uint16_t Mask = 0xffff >> (16 - BitsSize);
+ uint16_t Data = (Instr & ~Mask) | ((V >> Shift) & Mask);
+ write16<E>(Loc, Data);
+template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {
+ const endianness E = ELFT::TargetEndianness;
+ if (isMicroMips()) {
+ uint64_t GotPlt = In.GotPlt->getVA();
+ uint64_t Plt = In.Plt->getVA();
+ // Overwrite trap instructions written by Writer::writeTrapInstr.
+ memset(Buf, 0, PltHeaderSize);
+ write16<E>(Buf, isMipsR6() ? 0x7860 : 0x7980); // addiupc v1, (GOTPLT) - .
+ write16<E>(Buf + 4, 0xff23); // lw $25, 0($3)
+ write16<E>(Buf + 8, 0x0535); // subu16 $2, $2, $3
+ write16<E>(Buf + 10, 0x2525); // srl16 $2, $2, 2
+ write16<E>(Buf + 12, 0x3302); // addiu $24, $2, -2
+ write16<E>(Buf + 14, 0xfffe);
+ write16<E>(Buf + 16, 0x0dff); // move $15, $31
+ if (isMipsR6()) {
+ write16<E>(Buf + 18, 0x0f83); // move $28, $3
+ write16<E>(Buf + 20, 0x472b); // jalrc $25
+ write16<E>(Buf + 22, 0x0c00); // nop
+ relocateOne(Buf, R_MICROMIPS_PC19_S2, GotPlt - Plt);
+ } else {
+ write16<E>(Buf + 18, 0x45f9); // jalrc $25
+ write16<E>(Buf + 20, 0x0f83); // move $28, $3
+ write16<E>(Buf + 22, 0x0c00); // nop
+ relocateOne(Buf, R_MICROMIPS_PC23_S2, GotPlt - Plt);
+ }
+ return;
+ }
+ if (Config->MipsN32Abi) {
+ write32<E>(Buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0])
+ write32<E>(Buf + 4, 0x8dd90000); // lw $25, %lo(&GOTPLT[0])($14)
+ write32<E>(Buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0])
+ write32<E>(Buf + 12, 0x030ec023); // subu $24, $24, $14
+ write32<E>(Buf + 16, 0x03e07825); // move $15, $31
+ write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2
+ } else if (ELFT::Is64Bits) {
+ write32<E>(Buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0])
+ write32<E>(Buf + 4, 0xddd90000); // ld $25, %lo(&GOTPLT[0])($14)
+ write32<E>(Buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0])
+ write32<E>(Buf + 12, 0x030ec023); // subu $24, $24, $14
+ write32<E>(Buf + 16, 0x03e07825); // move $15, $31
+ write32<E>(Buf + 20, 0x0018c0c2); // srl $24, $24, 3
+ } else {
+ write32<E>(Buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0])
+ write32<E>(Buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28)
+ write32<E>(Buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0])
+ write32<E>(Buf + 12, 0x031cc023); // subu $24, $24, $28
+ write32<E>(Buf + 16, 0x03e07825); // move $15, $31
+ write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2
+ }
+ uint32_t JalrInst = Config->ZHazardplt ? 0x0320fc09 : 0x0320f809;
+ write32<E>(Buf + 24, JalrInst); // jalr.hb $25 or jalr $25
+ write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2
+ uint64_t GotPlt = In.GotPlt->getVA();
+ writeValue<E>(Buf, GotPlt + 0x8000, 16, 16);
+ writeValue<E>(Buf + 4, GotPlt, 16, 0);
+ writeValue<E>(Buf + 8, GotPlt, 16, 0);
+template <class ELFT>
+void MIPS<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const endianness E = ELFT::TargetEndianness;
+ if (isMicroMips()) {
+ // Overwrite trap instructions written by Writer::writeTrapInstr.
+ memset(Buf, 0, PltEntrySize);
+ if (isMipsR6()) {
+ write16<E>(Buf, 0x7840); // addiupc $2, (GOTPLT) - .
+ write16<E>(Buf + 4, 0xff22); // lw $25, 0($2)
+ write16<E>(Buf + 8, 0x0f02); // move $24, $2
+ write16<E>(Buf + 10, 0x4723); // jrc $25 / jr16 $25
+ relocateOne(Buf, R_MICROMIPS_PC19_S2, GotPltEntryAddr - PltEntryAddr);
+ } else {
+ write16<E>(Buf, 0x7900); // addiupc $2, (GOTPLT) - .
+ write16<E>(Buf + 4, 0xff22); // lw $25, 0($2)
+ write16<E>(Buf + 8, 0x4599); // jrc $25 / jr16 $25
+ write16<E>(Buf + 10, 0x0f02); // move $24, $2
+ relocateOne(Buf, R_MICROMIPS_PC23_S2, GotPltEntryAddr - PltEntryAddr);
+ }
+ return;
+ }
+ uint32_t JrInst = isMipsR6() ? (Config->ZHazardplt ? 0x03200409 : 0x03200009)
+ : (Config->ZHazardplt ? 0x03200408 : 0x03200008);
+ write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry)
+ write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15)
+ write32<E>(Buf + 8, JrInst); // jr $25 / jr.hb $25
+ write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry)
+ writeValue<E>(Buf, GotPltEntryAddr + 0x8000, 16, 16);
+ writeValue<E>(Buf + 4, GotPltEntryAddr, 16, 0);
+ writeValue<E>(Buf + 12, GotPltEntryAddr, 16, 0);
+template <class ELFT>
+bool MIPS<ELFT>::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const {
+ // Any MIPS PIC code function is invoked with its address in register $t9.
+ // So if we have a branch instruction from non-PIC code to the PIC one
+ // we cannot make the jump directly and need to create a small stubs
+ // to save the target function address.
+ // See page 3-38 ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+ if (Type != R_MIPS_26 && Type != R_MICROMIPS_26_S1 &&
+ Type != R_MICROMIPS_PC26_S1)
+ return false;
+ auto *F = dyn_cast_or_null<ELFFileBase<ELFT>>(File);
+ if (!F)
+ return false;
+ // If current file has PIC code, LA25 stub is not required.
+ if (F->getObj().getHeader()->e_flags & EF_MIPS_PIC)
+ return false;
+ auto *D = dyn_cast<Defined>(&S);
+ // LA25 is required if target file has PIC code
+ // or target symbol is a PIC symbol.
+ return D && isMipsPIC<ELFT>(D);
+template <class ELFT>
+int64_t MIPS<ELFT>::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
+ const endianness E = ELFT::TargetEndianness;
+ switch (Type) {
+ case R_MIPS_32:
+ case R_MIPS_GPREL32:
+ case R_MIPS_TLS_TPREL32:
+ return SignExtend64<32>(read32<E>(Buf));
+ case R_MIPS_26:
+ // FIXME (simon): If the relocation target symbol is not a PLT entry
+ // we should use another expression for calculation:
+ // ((A << 2) | (P & 0xf0000000)) >> 2
+ return SignExtend64<28>(read32<E>(Buf) << 2);
+ case R_MIPS_GOT16:
+ case R_MIPS_HI16:
+ case R_MIPS_PCHI16:
+ return SignExtend64<16>(read32<E>(Buf)) << 16;
+ case R_MIPS_GPREL16:
+ case R_MIPS_LO16:
+ case R_MIPS_PCLO16:
+ return SignExtend64<16>(read32<E>(Buf));
+ case R_MICROMIPS_HI16:
+ return SignExtend64<16>(readShuffle<E>(Buf)) << 16;
+ case R_MICROMIPS_LO16:
+ return SignExtend64<16>(readShuffle<E>(Buf));
+ return SignExtend64<9>(readShuffle<E>(Buf) << 2);
+ case R_MIPS_PC16:
+ return SignExtend64<18>(read32<E>(Buf) << 2);
+ case R_MIPS_PC19_S2:
+ return SignExtend64<21>(read32<E>(Buf) << 2);
+ case R_MIPS_PC21_S2:
+ return SignExtend64<23>(read32<E>(Buf) << 2);
+ case R_MIPS_PC26_S2:
+ return SignExtend64<28>(read32<E>(Buf) << 2);
+ case R_MIPS_PC32:
+ return SignExtend64<32>(read32<E>(Buf));
+ case R_MICROMIPS_26_S1:
+ return SignExtend64<27>(readShuffle<E>(Buf) << 1);
+ case R_MICROMIPS_PC7_S1:
+ return SignExtend64<8>(read16<E>(Buf) << 1);
+ case R_MICROMIPS_PC10_S1:
+ return SignExtend64<11>(read16<E>(Buf) << 1);
+ case R_MICROMIPS_PC16_S1:
+ return SignExtend64<17>(readShuffle<E>(Buf) << 1);
+ case R_MICROMIPS_PC18_S3:
+ return SignExtend64<21>(readShuffle<E>(Buf) << 3);
+ case R_MICROMIPS_PC19_S2:
+ return SignExtend64<21>(readShuffle<E>(Buf) << 2);
+ case R_MICROMIPS_PC21_S1:
+ return SignExtend64<22>(readShuffle<E>(Buf) << 1);
+ case R_MICROMIPS_PC23_S2:
+ return SignExtend64<25>(readShuffle<E>(Buf) << 2);
+ case R_MICROMIPS_PC26_S1:
+ return SignExtend64<27>(readShuffle<E>(Buf) << 1);
+ default:
+ return 0;
+ }
+static std::pair<uint32_t, uint64_t>
+calculateMipsRelChain(uint8_t *Loc, RelType Type, uint64_t Val) {
+ // MIPS N64 ABI packs multiple relocations into the single relocation
+ // record. In general, all up to three relocations can have arbitrary
+ // types. In fact, Clang and GCC uses only a few combinations. For now,
+ // we support two of them. That is allow to pass at least all LLVM
+ // test suite cases.
+ // <any relocation> / R_MIPS_SUB / R_MIPS_HI16 | R_MIPS_LO16
+ // <any relocation> / R_MIPS_64 / R_MIPS_NONE
+ // The first relocation is a 'real' relocation which is calculated
+ // using the corresponding symbol's value. The second and the third
+ // relocations used to modify result of the first one: extend it to
+ // 64-bit, extract high or low part etc. For details, see part 2.9 Relocation
+ // at the https://dmz-portal.mips.com/mw/images/8/82/007-4658-001.pdf
+ RelType Type2 = (Type >> 8) & 0xff;
+ RelType Type3 = (Type >> 16) & 0xff;
+ if (Type2 == R_MIPS_NONE && Type3 == R_MIPS_NONE)
+ return std::make_pair(Type, Val);
+ if (Type2 == R_MIPS_64 && Type3 == R_MIPS_NONE)
+ return std::make_pair(Type2, Val);
+ if (Type2 == R_MIPS_SUB && (Type3 == R_MIPS_HI16 || Type3 == R_MIPS_LO16))
+ return std::make_pair(Type3, -Val);
+ error(getErrorLocation(Loc) + "unsupported relocations combination " +
+ Twine(Type));
+ return std::make_pair(Type & 0xff, Val);
+template <class ELFT>
+void MIPS<ELFT>::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ const endianness E = ELFT::TargetEndianness;
+ if (ELFT::Is64Bits || Config->MipsN32Abi)
+ std::tie(Type, Val) = calculateMipsRelChain(Loc, Type, Val);
+ // Thread pointer and DRP offsets from the start of TLS data area.
+ // https://www.linux-mips.org/wiki/NPTL
+ if (Type == R_MIPS_TLS_DTPREL_HI16 || Type == R_MIPS_TLS_DTPREL_LO16 ||
+ Type == R_MIPS_TLS_DTPREL32 || Type == R_MIPS_TLS_DTPREL64 ||
+ Val -= 0x8000;
+ } else if (Type == R_MIPS_TLS_TPREL_HI16 || Type == R_MIPS_TLS_TPREL_LO16 ||
+ Type == R_MIPS_TLS_TPREL32 || Type == R_MIPS_TLS_TPREL64 ||
+ Val -= 0x7000;
+ }
+ switch (Type) {
+ case R_MIPS_32:
+ case R_MIPS_GPREL32:
+ case R_MIPS_TLS_TPREL32:
+ write32<E>(Loc, Val);
+ break;
+ case R_MIPS_64:
+ case R_MIPS_TLS_TPREL64:
+ write64<E>(Loc, Val);
+ break;
+ case R_MIPS_26:
+ writeValue<E>(Loc, Val, 26, 2);
+ break;
+ case R_MIPS_GOT16:
+ // The R_MIPS_GOT16 relocation's value in "relocatable" linking mode
+ // is updated addend (not a GOT index). In that case write high 16 bits
+ // to store a correct addend value.
+ if (Config->Relocatable) {
+ writeValue<E>(Loc, Val + 0x8000, 16, 16);
+ } else {
+ checkInt(Loc, Val, 16, Type);
+ writeValue<E>(Loc, Val, 16, 0);
+ }
+ break;
+ if (Config->Relocatable) {
+ writeShuffleValue<E>(Loc, Val + 0x8000, 16, 16);
+ } else {
+ checkInt(Loc, Val, 16, Type);
+ writeShuffleValue<E>(Loc, Val, 16, 0);
+ }
+ break;
+ case R_MIPS_CALL16:
+ case R_MIPS_GPREL16:
+ case R_MIPS_TLS_GD:
+ case R_MIPS_TLS_LDM:
+ checkInt(Loc, Val, 16, Type);
+ case R_MIPS_CALL_LO16:
+ case R_MIPS_GOT_LO16:
+ case R_MIPS_LO16:
+ case R_MIPS_PCLO16:
+ writeValue<E>(Loc, Val, 16, 0);
+ break;
+ checkInt(Loc, Val, 16, Type);
+ writeShuffleValue<E>(Loc, Val, 16, 0);
+ break;
+ case R_MICROMIPS_LO16:
+ writeShuffleValue<E>(Loc, Val, 16, 0);
+ break;
+ checkInt(Loc, Val, 7, Type);
+ writeShuffleValue<E>(Loc, Val, 7, 2);
+ break;
+ case R_MIPS_CALL_HI16:
+ case R_MIPS_GOT_HI16:
+ case R_MIPS_HI16:
+ case R_MIPS_PCHI16:
+ writeValue<E>(Loc, Val + 0x8000, 16, 16);
+ break;
+ case R_MICROMIPS_HI16:
+ writeShuffleValue<E>(Loc, Val + 0x8000, 16, 16);
+ break;
+ writeValue<E>(Loc, Val + 0x80008000, 16, 32);
+ break;
+ writeValue<E>(Loc, Val + 0x800080008000, 16, 48);
+ break;
+ case R_MIPS_JALR:
+ // Ignore this optimization relocation for now
+ break;
+ case R_MIPS_PC16:
+ checkAlignment(Loc, Val, 4, Type);
+ checkInt(Loc, Val, 18, Type);
+ writeValue<E>(Loc, Val, 16, 2);
+ break;
+ case R_MIPS_PC19_S2:
+ checkAlignment(Loc, Val, 4, Type);
+ checkInt(Loc, Val, 21, Type);
+ writeValue<E>(Loc, Val, 19, 2);
+ break;
+ case R_MIPS_PC21_S2:
+ checkAlignment(Loc, Val, 4, Type);
+ checkInt(Loc, Val, 23, Type);
+ writeValue<E>(Loc, Val, 21, 2);
+ break;
+ case R_MIPS_PC26_S2:
+ checkAlignment(Loc, Val, 4, Type);
+ checkInt(Loc, Val, 28, Type);
+ writeValue<E>(Loc, Val, 26, 2);
+ break;
+ case R_MIPS_PC32:
+ writeValue<E>(Loc, Val, 32, 0);
+ break;
+ case R_MICROMIPS_26_S1:
+ case R_MICROMIPS_PC26_S1:
+ checkInt(Loc, Val, 27, Type);
+ writeShuffleValue<E>(Loc, Val, 26, 1);
+ break;
+ case R_MICROMIPS_PC7_S1:
+ checkInt(Loc, Val, 8, Type);
+ writeMicroRelocation16<E>(Loc, Val, 7, 1);
+ break;
+ case R_MICROMIPS_PC10_S1:
+ checkInt(Loc, Val, 11, Type);
+ writeMicroRelocation16<E>(Loc, Val, 10, 1);
+ break;
+ case R_MICROMIPS_PC16_S1:
+ checkInt(Loc, Val, 17, Type);
+ writeShuffleValue<E>(Loc, Val, 16, 1);
+ break;
+ case R_MICROMIPS_PC18_S3:
+ checkInt(Loc, Val, 21, Type);
+ writeShuffleValue<E>(Loc, Val, 18, 3);
+ break;
+ case R_MICROMIPS_PC19_S2:
+ checkInt(Loc, Val, 21, Type);
+ writeShuffleValue<E>(Loc, Val, 19, 2);
+ break;
+ case R_MICROMIPS_PC21_S1:
+ checkInt(Loc, Val, 22, Type);
+ writeShuffleValue<E>(Loc, Val, 21, 1);
+ break;
+ case R_MICROMIPS_PC23_S2:
+ checkInt(Loc, Val, 25, Type);
+ writeShuffleValue<E>(Loc, Val, 23, 2);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+template <class ELFT> bool MIPS<ELFT>::usesOnlyLowPageBits(RelType Type) const {
+ return Type == R_MIPS_LO16 || Type == R_MIPS_GOT_OFST ||
+ Type == R_MICROMIPS_LO16;
+// Return true if the symbol is a PIC function.
+template <class ELFT> bool elf::isMipsPIC(const Defined *Sym) {
+ if (!Sym->isFunc())
+ return false;
+ if (Sym->StOther & STO_MIPS_PIC)
+ return true;
+ if (!Sym->Section)
+ return false;
+ ObjFile<ELFT> *File =
+ cast<InputSectionBase>(Sym->Section)->template getFile<ELFT>();
+ if (!File)
+ return false;
+ return File->getObj().getHeader()->e_flags & EF_MIPS_PIC;
+template <class ELFT> TargetInfo *elf::getMipsTargetInfo() {
+ static MIPS<ELFT> Target;
+ return &Target;
+template TargetInfo *elf::getMipsTargetInfo<ELF32LE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF32BE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF64LE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF64BE>();
+template bool elf::isMipsPIC<ELF32LE>(const Defined *);
+template bool elf::isMipsPIC<ELF32BE>(const Defined *);
+template bool elf::isMipsPIC<ELF64LE>(const Defined *);
+template bool elf::isMipsPIC<ELF64BE>(const Defined *);
diff --git a/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp b/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp
new file mode 100644
index 000000000000..98ceac3075e0
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp
@@ -0,0 +1,390 @@
+//===- MipsArchTree.cpp --------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains a helper function for the Writer.
+#include "InputFiles.h"
+#include "SymbolTable.h"
+#include "Writer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/MipsABIFlags.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+struct ArchTreeEdge {
+ uint32_t Child;
+ uint32_t Parent;
+struct FileFlags {
+ InputFile *File;
+ uint32_t Flags;
+} // namespace
+static StringRef getAbiName(uint32_t Flags) {
+ switch (Flags) {
+ case 0:
+ return "n64";
+ case EF_MIPS_ABI2:
+ return "n32";
+ case EF_MIPS_ABI_O32:
+ return "o32";
+ case EF_MIPS_ABI_O64:
+ return "o64";
+ case EF_MIPS_ABI_EABI32:
+ return "eabi32";
+ case EF_MIPS_ABI_EABI64:
+ return "eabi64";
+ default:
+ return "unknown";
+ }
+static StringRef getNanName(bool IsNan2008) {
+ return IsNan2008 ? "2008" : "legacy";
+static StringRef getFpName(bool IsFp64) { return IsFp64 ? "64" : "32"; }
+static void checkFlags(ArrayRef<FileFlags> Files) {
+ assert(!Files.empty() && "expected non-empty file list");
+ uint32_t ABI = Files[0].Flags & (EF_MIPS_ABI | EF_MIPS_ABI2);
+ bool Nan = Files[0].Flags & EF_MIPS_NAN2008;
+ bool Fp = Files[0].Flags & EF_MIPS_FP64;
+ for (const FileFlags &F : Files) {
+ if (Config->Is64 && F.Flags & EF_MIPS_MICROMIPS)
+ error(toString(F.File) + ": microMIPS 64-bit is not supported");
+ uint32_t ABI2 = F.Flags & (EF_MIPS_ABI | EF_MIPS_ABI2);
+ if (ABI != ABI2)
+ error(toString(F.File) + ": ABI '" + getAbiName(ABI2) +
+ "' is incompatible with target ABI '" + getAbiName(ABI) + "'");
+ bool Nan2 = F.Flags & EF_MIPS_NAN2008;
+ if (Nan != Nan2)
+ error(toString(F.File) + ": -mnan=" + getNanName(Nan2) +
+ " is incompatible with target -mnan=" + getNanName(Nan));
+ bool Fp2 = F.Flags & EF_MIPS_FP64;
+ if (Fp != Fp2)
+ error(toString(F.File) + ": -mfp" + getFpName(Fp2) +
+ " is incompatible with target -mfp" + getFpName(Fp));
+ }
+static uint32_t getMiscFlags(ArrayRef<FileFlags> Files) {
+ uint32_t Ret = 0;
+ for (const FileFlags &F : Files)
+ Ret |= F.Flags &
+ return Ret;
+static uint32_t getPicFlags(ArrayRef<FileFlags> Files) {
+ // Check PIC/non-PIC compatibility.
+ bool IsPic = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC);
+ for (const FileFlags &F : Files.slice(1)) {
+ bool IsPic2 = F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC);
+ if (IsPic && !IsPic2)
+ warn(toString(F.File) +
+ ": linking non-abicalls code with abicalls code " +
+ toString(Files[0].File));
+ if (!IsPic && IsPic2)
+ warn(toString(F.File) +
+ ": linking abicalls code with non-abicalls code " +
+ toString(Files[0].File));
+ }
+ // Compute the result PIC/non-PIC flag.
+ uint32_t Ret = Files[0].Flags & (EF_MIPS_PIC | EF_MIPS_CPIC);
+ for (const FileFlags &F : Files.slice(1))
+ Ret &= F.Flags & (EF_MIPS_PIC | EF_MIPS_CPIC);
+ // PIC code is inherently CPIC and may not set CPIC flag explicitly.
+ if (Ret & EF_MIPS_PIC)
+ Ret |= EF_MIPS_CPIC;
+ return Ret;
+static ArchTreeEdge ArchTree[] = {
+ // MIPS32R6 and MIPS64R6 are not compatible with other extensions
+ // MIPS64R2 extensions.
+ // MIPS64 extensions.
+ // MIPS V extensions.
+ // R5000 extensions.
+ // MIPS IV extensions.
+ // VR4100 extensions.
+ // MIPS III extensions.
+ // MIPS32 extensions.
+ // MIPS II extensions.
+ // MIPS I extensions.
+static bool isArchMatched(uint32_t New, uint32_t Res) {
+ if (New == Res)
+ return true;
+ if (New == EF_MIPS_ARCH_32 && isArchMatched(EF_MIPS_ARCH_64, Res))
+ return true;
+ if (New == EF_MIPS_ARCH_32R2 && isArchMatched(EF_MIPS_ARCH_64R2, Res))
+ return true;
+ for (const auto &Edge : ArchTree) {
+ if (Res == Edge.Child) {
+ Res = Edge.Parent;
+ if (Res == New)
+ return true;
+ }
+ }
+ return false;
+static StringRef getMachName(uint32_t Flags) {
+ switch (Flags & EF_MIPS_MACH) {
+ return "";
+ case EF_MIPS_MACH_3900:
+ return "r3900";
+ case EF_MIPS_MACH_4010:
+ return "r4010";
+ case EF_MIPS_MACH_4100:
+ return "r4100";
+ case EF_MIPS_MACH_4650:
+ return "r4650";
+ case EF_MIPS_MACH_4120:
+ return "r4120";
+ case EF_MIPS_MACH_4111:
+ return "r4111";
+ case EF_MIPS_MACH_5400:
+ return "vr5400";
+ case EF_MIPS_MACH_5900:
+ return "vr5900";
+ case EF_MIPS_MACH_5500:
+ return "vr5500";
+ case EF_MIPS_MACH_9000:
+ return "rm9000";
+ return "loongson2e";
+ return "loongson2f";
+ return "loongson3a";
+ return "octeon";
+ return "octeon2";
+ return "octeon3";
+ case EF_MIPS_MACH_SB1:
+ return "sb1";
+ return "xlr";
+ default:
+ return "unknown machine";
+ }
+static StringRef getArchName(uint32_t Flags) {
+ switch (Flags & EF_MIPS_ARCH) {
+ case EF_MIPS_ARCH_1:
+ return "mips1";
+ case EF_MIPS_ARCH_2:
+ return "mips2";
+ case EF_MIPS_ARCH_3:
+ return "mips3";
+ case EF_MIPS_ARCH_4:
+ return "mips4";
+ case EF_MIPS_ARCH_5:
+ return "mips5";
+ case EF_MIPS_ARCH_32:
+ return "mips32";
+ case EF_MIPS_ARCH_64:
+ return "mips64";
+ case EF_MIPS_ARCH_32R2:
+ return "mips32r2";
+ case EF_MIPS_ARCH_64R2:
+ return "mips64r2";
+ case EF_MIPS_ARCH_32R6:
+ return "mips32r6";
+ case EF_MIPS_ARCH_64R6:
+ return "mips64r6";
+ default:
+ return "unknown arch";
+ }
+static std::string getFullArchName(uint32_t Flags) {
+ StringRef Arch = getArchName(Flags);
+ StringRef Mach = getMachName(Flags);
+ if (Mach.empty())
+ return Arch.str();
+ return (Arch + " (" + Mach + ")").str();
+// There are (arguably too) many MIPS ISAs out there. Their relationships
+// can be represented as a forest. If all input files have ISAs which
+// reachable by repeated proceeding from the single child to the parent,
+// these input files are compatible. In that case we need to return "highest"
+// ISA. If there are incompatible input files, we show an error.
+// For example, mips1 is a "parent" of mips2 and such files are compatible.
+// Output file gets EF_MIPS_ARCH_2 flag. From the other side mips3 and mips32
+// are incompatible because nor mips3 is a parent for misp32, nor mips32
+// is a parent for mips3.
+static uint32_t getArchFlags(ArrayRef<FileFlags> Files) {
+ uint32_t Ret = Files[0].Flags & (EF_MIPS_ARCH | EF_MIPS_MACH);
+ for (const FileFlags &F : Files.slice(1)) {
+ uint32_t New = F.Flags & (EF_MIPS_ARCH | EF_MIPS_MACH);
+ // Check ISA compatibility.
+ if (isArchMatched(New, Ret))
+ continue;
+ if (!isArchMatched(Ret, New)) {
+ error("incompatible target ISA:\n>>> " + toString(Files[0].File) + ": " +
+ getFullArchName(Ret) + "\n>>> " + toString(F.File) + ": " +
+ getFullArchName(New));
+ return 0;
+ }
+ Ret = New;
+ }
+ return Ret;
+template <class ELFT> uint32_t elf::calcMipsEFlags() {
+ std::vector<FileFlags> V;
+ for (InputFile *F : ObjectFiles)
+ V.push_back({F, cast<ObjFile<ELFT>>(F)->getObj().getHeader()->e_flags});
+ if (V.empty())
+ return 0;
+ checkFlags(V);
+ return getMiscFlags(V) | getPicFlags(V) | getArchFlags(V);
+static int compareMipsFpAbi(uint8_t FpA, uint8_t FpB) {
+ if (FpA == FpB)
+ return 0;
+ if (FpB == Mips::Val_GNU_MIPS_ABI_FP_ANY)
+ return 1;
+ if (FpB == Mips::Val_GNU_MIPS_ABI_FP_64A &&
+ FpA == Mips::Val_GNU_MIPS_ABI_FP_64)
+ return 1;
+ if (FpB != Mips::Val_GNU_MIPS_ABI_FP_XX)
+ return -1;
+ if (FpA == Mips::Val_GNU_MIPS_ABI_FP_DOUBLE ||
+ FpA == Mips::Val_GNU_MIPS_ABI_FP_64 ||
+ FpA == Mips::Val_GNU_MIPS_ABI_FP_64A)
+ return 1;
+ return -1;
+static StringRef getMipsFpAbiName(uint8_t FpAbi) {
+ switch (FpAbi) {
+ case Mips::Val_GNU_MIPS_ABI_FP_ANY:
+ return "any";
+ case Mips::Val_GNU_MIPS_ABI_FP_DOUBLE:
+ return "-mdouble-float";
+ case Mips::Val_GNU_MIPS_ABI_FP_SINGLE:
+ return "-msingle-float";
+ case Mips::Val_GNU_MIPS_ABI_FP_SOFT:
+ return "-msoft-float";
+ case Mips::Val_GNU_MIPS_ABI_FP_OLD_64:
+ return "-mgp32 -mfp64 (old)";
+ case Mips::Val_GNU_MIPS_ABI_FP_XX:
+ return "-mfpxx";
+ case Mips::Val_GNU_MIPS_ABI_FP_64:
+ return "-mgp32 -mfp64";
+ case Mips::Val_GNU_MIPS_ABI_FP_64A:
+ return "-mgp32 -mfp64 -mno-odd-spreg";
+ default:
+ return "unknown";
+ }
+uint8_t elf::getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag,
+ StringRef FileName) {
+ if (compareMipsFpAbi(NewFlag, OldFlag) >= 0)
+ return NewFlag;
+ if (compareMipsFpAbi(OldFlag, NewFlag) < 0)
+ error(FileName + ": floating point ABI '" + getMipsFpAbiName(NewFlag) +
+ "' is incompatible with target floating point ABI '" +
+ getMipsFpAbiName(OldFlag) + "'");
+ return OldFlag;
+template <class ELFT> static bool isN32Abi(const InputFile *F) {
+ if (auto *EF = dyn_cast<ELFFileBase<ELFT>>(F))
+ return EF->getObj().getHeader()->e_flags & EF_MIPS_ABI2;
+ return false;
+bool elf::isMipsN32Abi(const InputFile *F) {
+ switch (Config->EKind) {
+ case ELF32LEKind:
+ return isN32Abi<ELF32LE>(F);
+ case ELF32BEKind:
+ return isN32Abi<ELF32BE>(F);
+ case ELF64LEKind:
+ return isN32Abi<ELF64LE>(F);
+ case ELF64BEKind:
+ return isN32Abi<ELF64BE>(F);
+ default:
+ llvm_unreachable("unknown Config->EKind");
+ }
+bool elf::isMicroMips() { return Config->EFlags & EF_MIPS_MICROMIPS; }
+bool elf::isMipsR6() {
+ uint32_t Arch = Config->EFlags & EF_MIPS_ARCH;
+ return Arch == EF_MIPS_ARCH_32R6 || Arch == EF_MIPS_ARCH_64R6;
+template uint32_t elf::calcMipsEFlags<ELF32LE>();
+template uint32_t elf::calcMipsEFlags<ELF32BE>();
+template uint32_t elf::calcMipsEFlags<ELF64LE>();
+template uint32_t elf::calcMipsEFlags<ELF64BE>();
diff --git a/contrib/llvm/tools/lld/ELF/Arch/PPC.cpp b/contrib/llvm/tools/lld/ELF/Arch/PPC.cpp
new file mode 100644
index 000000000000..767378067341
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/PPC.cpp
@@ -0,0 +1,81 @@
+//===- PPC.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class PPC final : public TargetInfo {
+ PPC();
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+} // namespace
+PPC::PPC() {
+ NoneRel = R_PPC_NONE;
+ GotBaseSymOff = 0x8000;
+ GotBaseSymInGotPlt = false;
+RelExpr PPC::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_PPC_REL14:
+ case R_PPC_REL24:
+ case R_PPC_REL32:
+ return R_PC;
+ case R_PPC_PLTREL24:
+ return R_PLT_PC;
+ default:
+ return R_ABS;
+ }
+void PPC::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_PPC_ADDR16_HA:
+ write16be(Loc, (Val + 0x8000) >> 16);
+ break;
+ case R_PPC_ADDR16_HI:
+ write16be(Loc, Val >> 16);
+ break;
+ case R_PPC_ADDR16_LO:
+ write16be(Loc, Val);
+ break;
+ case R_PPC_ADDR32:
+ case R_PPC_REL32:
+ write32be(Loc, Val);
+ break;
+ case R_PPC_REL14:
+ write32be(Loc, read32be(Loc) | (Val & 0xFFFC));
+ break;
+ case R_PPC_PLTREL24:
+ case R_PPC_REL24:
+ write32be(Loc, read32be(Loc) | (Val & 0x3FFFFFC));
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+TargetInfo *elf::getPPCTargetInfo() {
+ static PPC Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp b/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
new file mode 100644
index 000000000000..8a320c9a4e9e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
@@ -0,0 +1,931 @@
+//===- PPC64.cpp ----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+static uint64_t PPC64TocOffset = 0x8000;
+static uint64_t DynamicThreadPointerOffset = 0x8000;
+// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
+// instructions that can be used as part of the initial exec TLS sequence.
+enum XFormOpcd {
+ LBZX = 87,
+ LHZX = 279,
+ LWZX = 23,
+ LDX = 21,
+ STBX = 215,
+ STHX = 407,
+ STWX = 151,
+ STDX = 149,
+ ADD = 266,
+enum DFormOpcd {
+ LBZ = 34,
+ LBZU = 35,
+ LHZ = 40,
+ LHZU = 41,
+ LHAU = 43,
+ LWZ = 32,
+ LWZU = 33,
+ LFSU = 49,
+ LD = 58,
+ LFDU = 51,
+ STB = 38,
+ STBU = 39,
+ STH = 44,
+ STHU = 45,
+ STW = 36,
+ STWU = 37,
+ STFSU = 53,
+ STFDU = 55,
+ STD = 62,
+ ADDI = 14
+uint64_t elf::getPPC64TocBase() {
+ // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
+ // TOC starts where the first of these sections starts. We always create a
+ // .got when we see a relocation that uses it, so for us the start is always
+ // the .got.
+ uint64_t TocVA = In.Got->getVA();
+ // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
+ // thus permitting a full 64 Kbytes segment. Note that the glibc startup
+ // code (crt1.o) assumes that you can get from the TOC base to the
+ // start of the .toc section with only a single (signed) 16-bit relocation.
+ return TocVA + PPC64TocOffset;
+unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) {
+ // The offset is encoded into the 3 most significant bits of the st_other
+ // field, with some special values described in section 3.4.1 of the ABI:
+ // 0 --> Zero offset between the GEP and LEP, and the function does NOT use
+ // the TOC pointer (r2). r2 will hold the same value on returning from
+ // the function as it did on entering the function.
+ // 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
+ // caller-saved register for all callers.
+ // 2-6 --> The binary logarithm of the offset eg:
+ // 2 --> 2^2 = 4 bytes --> 1 instruction.
+ // 6 --> 2^6 = 64 bytes --> 16 instructions.
+ // 7 --> Reserved.
+ uint8_t GepToLep = (StOther >> 5) & 7;
+ if (GepToLep < 2)
+ return 0;
+ // The value encoded in the st_other bits is the
+ // log-base-2(offset).
+ if (GepToLep < 7)
+ return 1 << GepToLep;
+ error("reserved value of 7 in the 3 most-significant-bits of st_other");
+ return 0;
+namespace {
+class PPC64 final : public TargetInfo {
+ PPC64();
+ uint32_t calcEFlags() const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void writeGotHeader(uint8_t *Buf) const override;
+ bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const override;
+ bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
+ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const override;
+ void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const override;
+} // namespace
+// Relocation masks following the #lo(value), #hi(value), #ha(value),
+// #higher(value), #highera(value), #highest(value), and #highesta(value)
+// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi
+// document.
+static uint16_t lo(uint64_t V) { return V; }
+static uint16_t hi(uint64_t V) { return V >> 16; }
+static uint16_t ha(uint64_t V) { return (V + 0x8000) >> 16; }
+static uint16_t higher(uint64_t V) { return V >> 32; }
+static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; }
+static uint16_t highest(uint64_t V) { return V >> 48; }
+static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; }
+// Extracts the 'PO' field of an instruction encoding.
+static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); }
+static bool isDQFormInstruction(uint32_t Encoding) {
+ switch (getPrimaryOpCode(Encoding)) {
+ default:
+ return false;
+ case 56:
+ // The only instruction with a primary opcode of 56 is `lq`.
+ return true;
+ case 61:
+ // There are both DS and DQ instruction forms with this primary opcode.
+ // Namely `lxv` and `stxv` are the DQ-forms that use it.
+ // The DS 'XO' bits being set to 01 is restricted to DQ form.
+ return (Encoding & 3) == 0x1;
+ }
+static bool isInstructionUpdateForm(uint32_t Encoding) {
+ switch (getPrimaryOpCode(Encoding)) {
+ default:
+ return false;
+ case LBZU:
+ case LHAU:
+ case LHZU:
+ case LWZU:
+ case LFSU:
+ case LFDU:
+ case STBU:
+ case STHU:
+ case STWU:
+ case STFSU:
+ case STFDU:
+ return true;
+ // LWA has the same opcode as LD, and the DS bits is what differentiates
+ // between LD/LDU/LWA
+ case LD:
+ case STD:
+ return (Encoding & 3) == 1;
+ }
+// There are a number of places when we either want to read or write an
+// instruction when handling a half16 relocation type. On big-endian the buffer
+// pointer is pointing into the middle of the word we want to extract, and on
+// little-endian it is pointing to the start of the word. These 2 helpers are to
+// simplify reading and writing in that context.
+static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) {
+ write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr);
+static uint32_t readInstrFromHalf16(const uint8_t *Loc) {
+ return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0));
+PPC64::PPC64() {
+ GotRel = R_PPC64_GLOB_DAT;
+ NoneRel = R_PPC64_NONE;
+ PltRel = R_PPC64_JMP_SLOT;
+ RelativeRel = R_PPC64_RELATIVE;
+ IRelativeRel = R_PPC64_IRELATIVE;
+ GotEntrySize = 8;
+ PltEntrySize = 4;
+ GotPltEntrySize = 8;
+ GotBaseSymInGotPlt = false;
+ GotBaseSymOff = 0x8000;
+ GotHeaderEntriesNum = 1;
+ GotPltHeaderEntriesNum = 2;
+ PltHeaderSize = 60;
+ NeedsThunks = true;
+ TlsModuleIndexRel = R_PPC64_DTPMOD64;
+ TlsOffsetRel = R_PPC64_DTPREL64;
+ TlsGotRel = R_PPC64_TPREL64;
+ NeedsMoreStackNonSplit = false;
+ // We need 64K pages (at least under glibc/Linux, the loader won't
+ // set different permissions on a finer granularity than that).
+ DefaultMaxPageSize = 65536;
+ // The PPC64 ELF ABI v1 spec, says:
+ //
+ // It is normally desirable to put segments with different characteristics
+ // in separate 256 Mbyte portions of the address space, to give the
+ // operating system full paging flexibility in the 64-bit address space.
+ //
+ // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers
+ // use 0x10000000 as the starting address.
+ DefaultImageBase = 0x10000000;
+ write32(TrapInstr.data(), 0x7fe00008);
+static uint32_t getEFlags(InputFile *File) {
+ if (Config->EKind == ELF64BEKind)
+ return cast<ObjFile<ELF64BE>>(File)->getObj().getHeader()->e_flags;
+ return cast<ObjFile<ELF64LE>>(File)->getObj().getHeader()->e_flags;
+// This file implements v2 ABI. This function makes sure that all
+// object files have v2 or an unspecified version as an ABI version.
+uint32_t PPC64::calcEFlags() const {
+ for (InputFile *F : ObjectFiles) {
+ uint32_t Flag = getEFlags(F);
+ if (Flag == 1)
+ error(toString(F) + ": ABI version 1 is not supported");
+ else if (Flag > 2)
+ error(toString(F) + ": unrecognized e_flags: " + Twine(Flag));
+ }
+ return 2;
+void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // Reference: of the 64-bit ELF V2 abi supplement.
+ // The general dynamic code sequence for a global `x` will look like:
+ // Instruction Relocation Symbol
+ // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
+ // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
+ // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
+ // R_PPC64_REL24 __tls_get_addr
+ // nop None None
+ // Relaxing to local exec entails converting:
+ // addis r3, r2, x@got@tlsgd@ha into nop
+ // addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha
+ // bl __tls_get_addr(x@tlsgd) into nop
+ // nop into addi r3, r3, x@tprel@l
+ switch (Type) {
+ case R_PPC64_GOT_TLSGD16_HA:
+ writeInstrFromHalf16(Loc, 0x60000000); // nop
+ break;
+ case R_PPC64_GOT_TLSGD16:
+ case R_PPC64_GOT_TLSGD16_LO:
+ writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13
+ relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
+ break;
+ case R_PPC64_TLSGD:
+ write32(Loc, 0x60000000); // nop
+ write32(Loc + 4, 0x38630000); // addi r3, r3
+ // Since we are relocating a half16 type relocation and Loc + 4 points to
+ // the start of an instruction we need to advance the buffer by an extra
+ // 2 bytes on BE.
+ relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0),
+ R_PPC64_TPREL16_LO, Val);
+ break;
+ default:
+ llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
+ }
+void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // Reference: of the 64-bit ELF V2 abi supplement.
+ // The local dynamic code sequence for a global `x` will look like:
+ // Instruction Relocation Symbol
+ // addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x
+ // addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x
+ // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x
+ // R_PPC64_REL24 __tls_get_addr
+ // nop None None
+ // Relaxing to local exec entails converting:
+ // addis r3, r2, x@got@tlsld@ha into nop
+ // addi r3, r3, x@got@tlsld@l into addis r3, r13, 0
+ // bl __tls_get_addr(x@tlsgd) into nop
+ // nop into addi r3, r3, 4096
+ switch (Type) {
+ case R_PPC64_GOT_TLSLD16_HA:
+ writeInstrFromHalf16(Loc, 0x60000000); // nop
+ break;
+ case R_PPC64_GOT_TLSLD16_LO:
+ writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0
+ break;
+ case R_PPC64_TLSLD:
+ write32(Loc, 0x60000000); // nop
+ write32(Loc + 4, 0x38631000); // addi r3, r3, 4096
+ break;
+ case R_PPC64_DTPREL16:
+ case R_PPC64_DTPREL16_HA:
+ case R_PPC64_DTPREL16_HI:
+ case R_PPC64_DTPREL16_DS:
+ case R_PPC64_DTPREL16_LO:
+ case R_PPC64_DTPREL16_LO_DS:
+ case R_PPC64_GOT_DTPREL16_HA:
+ case R_PPC64_GOT_DTPREL16_LO_DS:
+ case R_PPC64_GOT_DTPREL16_DS:
+ case R_PPC64_GOT_DTPREL16_HI:
+ relocateOne(Loc, Type, Val);
+ break;
+ default:
+ llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
+ }
+static unsigned getDFormOp(unsigned SecondaryOp) {
+ switch (SecondaryOp) {
+ case LBZX:
+ return LBZ;
+ case LHZX:
+ return LHZ;
+ case LWZX:
+ return LWZ;
+ case LDX:
+ return LD;
+ case STBX:
+ return STB;
+ case STHX:
+ return STH;
+ case STWX:
+ return STW;
+ case STDX:
+ return STD;
+ case ADD:
+ return ADDI;
+ default:
+ error("unrecognized instruction for IE to LE R_PPC64_TLS");
+ return 0;
+ }
+void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // The initial exec code sequence for a global `x` will look like:
+ // Instruction Relocation Symbol
+ // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
+ // ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
+ // add r9, r9, x@tls R_PPC64_TLS x
+ // Relaxing to local exec entails converting:
+ // addis r9, r2, x@got@tprel@ha into nop
+ // ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
+ // add r9, r9, x@tls into addi r9, r9, x@tprel@l
+ // x@tls R_PPC64_TLS is a relocation which does not compute anything,
+ // it is replaced with r13 (thread pointer).
+ // The add instruction in the initial exec sequence has multiple variations
+ // that need to be handled. If we are building an address it will use an add
+ // instruction, if we are accessing memory it will use any of the X-form
+ // indexed load or store instructions.
+ unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0;
+ switch (Type) {
+ case R_PPC64_GOT_TPREL16_HA:
+ write32(Loc - Offset, 0x60000000); // nop
+ break;
+ case R_PPC64_GOT_TPREL16_LO_DS:
+ case R_PPC64_GOT_TPREL16_DS: {
+ uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10
+ write32(Loc - Offset, 0x3C0D0000 | RegNo); // addis RegNo, r13
+ relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
+ break;
+ }
+ case R_PPC64_TLS: {
+ uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc));
+ if (PrimaryOp != 31)
+ error("unrecognized instruction for IE to LE R_PPC64_TLS");
+ uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30
+ uint32_t DFormOp = getDFormOp(SecondaryOp);
+ write32(Loc, ((DFormOp << 26) | (read32(Loc) & 0x03FFFFFF)));
+ relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val);
+ break;
+ }
+ default:
+ llvm_unreachable("unknown relocation for IE to LE");
+ break;
+ }
+RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_PPC64_GOT16:
+ case R_PPC64_GOT16_DS:
+ case R_PPC64_GOT16_HA:
+ case R_PPC64_GOT16_HI:
+ case R_PPC64_GOT16_LO:
+ case R_PPC64_GOT16_LO_DS:
+ return R_GOT_OFF;
+ case R_PPC64_TOC16:
+ case R_PPC64_TOC16_DS:
+ case R_PPC64_TOC16_HA:
+ case R_PPC64_TOC16_HI:
+ case R_PPC64_TOC16_LO:
+ case R_PPC64_TOC16_LO_DS:
+ return R_GOTREL;
+ case R_PPC64_TOC:
+ return R_PPC_TOC;
+ case R_PPC64_REL14:
+ case R_PPC64_REL24:
+ return R_PPC_CALL_PLT;
+ case R_PPC64_REL16_LO:
+ case R_PPC64_REL16_HA:
+ case R_PPC64_REL32:
+ case R_PPC64_REL64:
+ return R_PC;
+ case R_PPC64_GOT_TLSGD16:
+ case R_PPC64_GOT_TLSGD16_HA:
+ case R_PPC64_GOT_TLSGD16_HI:
+ case R_PPC64_GOT_TLSGD16_LO:
+ return R_TLSGD_GOT;
+ case R_PPC64_GOT_TLSLD16:
+ case R_PPC64_GOT_TLSLD16_HA:
+ case R_PPC64_GOT_TLSLD16_HI:
+ case R_PPC64_GOT_TLSLD16_LO:
+ return R_TLSLD_GOT;
+ case R_PPC64_GOT_TPREL16_HA:
+ case R_PPC64_GOT_TPREL16_LO_DS:
+ case R_PPC64_GOT_TPREL16_DS:
+ case R_PPC64_GOT_TPREL16_HI:
+ return R_GOT_OFF;
+ case R_PPC64_GOT_DTPREL16_HA:
+ case R_PPC64_GOT_DTPREL16_LO_DS:
+ case R_PPC64_GOT_DTPREL16_DS:
+ case R_PPC64_GOT_DTPREL16_HI:
+ return R_TLSLD_GOT_OFF;
+ case R_PPC64_TPREL16:
+ case R_PPC64_TPREL16_HA:
+ case R_PPC64_TPREL16_LO:
+ case R_PPC64_TPREL16_HI:
+ case R_PPC64_TPREL16_DS:
+ case R_PPC64_TPREL16_LO_DS:
+ case R_PPC64_TPREL16_HIGHER:
+ return R_TLS;
+ case R_PPC64_DTPREL16:
+ case R_PPC64_DTPREL16_DS:
+ case R_PPC64_DTPREL16_HA:
+ case R_PPC64_DTPREL16_HI:
+ case R_PPC64_DTPREL16_LO:
+ case R_PPC64_DTPREL16_LO_DS:
+ case R_PPC64_DTPREL64:
+ return R_ABS;
+ case R_PPC64_TLSGD:
+ return R_TLSDESC_CALL;
+ case R_PPC64_TLSLD:
+ return R_TLSLD_HINT;
+ case R_PPC64_TLS:
+ return R_TLSIE_HINT;
+ default:
+ return R_ABS;
+ }
+void PPC64::writeGotHeader(uint8_t *Buf) const {
+ write64(Buf, getPPC64TocBase());
+void PPC64::writePltHeader(uint8_t *Buf) const {
+ // The generic resolver stub goes first.
+ write32(Buf + 0, 0x7c0802a6); // mflr r0
+ write32(Buf + 4, 0x429f0005); // bcl 20,4*cr7+so,8 <_glink+0x8>
+ write32(Buf + 8, 0x7d6802a6); // mflr r11
+ write32(Buf + 12, 0x7c0803a6); // mtlr r0
+ write32(Buf + 16, 0x7d8b6050); // subf r12, r11, r12
+ write32(Buf + 20, 0x380cffcc); // subi r0,r12,52
+ write32(Buf + 24, 0x7800f082); // srdi r0,r0,62,2
+ write32(Buf + 28, 0xe98b002c); // ld r12,44(r11)
+ write32(Buf + 32, 0x7d6c5a14); // add r11,r12,r11
+ write32(Buf + 36, 0xe98b0000); // ld r12,0(r11)
+ write32(Buf + 40, 0xe96b0008); // ld r11,8(r11)
+ write32(Buf + 44, 0x7d8903a6); // mtctr r12
+ write32(Buf + 48, 0x4e800420); // bctr
+ // The 'bcl' instruction will set the link register to the address of the
+ // following instruction ('mflr r11'). Here we store the offset from that
+ // instruction to the first entry in the GotPlt section.
+ int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8);
+ write64(Buf + 52, GotPltOffset);
+void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ int32_t Offset = PltHeaderSize + Index * PltEntrySize;
+ // bl __glink_PLTresolve
+ write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc));
+static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
+ // Relocations relative to the toc-base need to be adjusted by the Toc offset.
+ uint64_t TocBiasedVal = Val - PPC64TocOffset;
+ // Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset.
+ uint64_t DTPBiasedVal = Val - DynamicThreadPointerOffset;
+ switch (Type) {
+ // TOC biased relocation.
+ case R_PPC64_GOT16:
+ case R_PPC64_GOT_TLSGD16:
+ case R_PPC64_GOT_TLSLD16:
+ case R_PPC64_TOC16:
+ return {R_PPC64_ADDR16, TocBiasedVal};
+ case R_PPC64_GOT16_DS:
+ case R_PPC64_TOC16_DS:
+ case R_PPC64_GOT_TPREL16_DS:
+ case R_PPC64_GOT_DTPREL16_DS:
+ return {R_PPC64_ADDR16_DS, TocBiasedVal};
+ case R_PPC64_GOT16_HA:
+ case R_PPC64_GOT_TLSGD16_HA:
+ case R_PPC64_GOT_TLSLD16_HA:
+ case R_PPC64_GOT_TPREL16_HA:
+ case R_PPC64_GOT_DTPREL16_HA:
+ case R_PPC64_TOC16_HA:
+ return {R_PPC64_ADDR16_HA, TocBiasedVal};
+ case R_PPC64_GOT16_HI:
+ case R_PPC64_GOT_TLSGD16_HI:
+ case R_PPC64_GOT_TLSLD16_HI:
+ case R_PPC64_GOT_TPREL16_HI:
+ case R_PPC64_GOT_DTPREL16_HI:
+ case R_PPC64_TOC16_HI:
+ return {R_PPC64_ADDR16_HI, TocBiasedVal};
+ case R_PPC64_GOT16_LO:
+ case R_PPC64_GOT_TLSGD16_LO:
+ case R_PPC64_GOT_TLSLD16_LO:
+ case R_PPC64_TOC16_LO:
+ return {R_PPC64_ADDR16_LO, TocBiasedVal};
+ case R_PPC64_GOT16_LO_DS:
+ case R_PPC64_TOC16_LO_DS:
+ case R_PPC64_GOT_TPREL16_LO_DS:
+ case R_PPC64_GOT_DTPREL16_LO_DS:
+ return {R_PPC64_ADDR16_LO_DS, TocBiasedVal};
+ // Dynamic Thread pointer biased relocation types.
+ case R_PPC64_DTPREL16:
+ return {R_PPC64_ADDR16, DTPBiasedVal};
+ case R_PPC64_DTPREL16_DS:
+ return {R_PPC64_ADDR16_DS, DTPBiasedVal};
+ case R_PPC64_DTPREL16_HA:
+ return {R_PPC64_ADDR16_HA, DTPBiasedVal};
+ case R_PPC64_DTPREL16_HI:
+ return {R_PPC64_ADDR16_HI, DTPBiasedVal};
+ return {R_PPC64_ADDR16_HIGHER, DTPBiasedVal};
+ return {R_PPC64_ADDR16_HIGHERA, DTPBiasedVal};
+ return {R_PPC64_ADDR16_HIGHEST, DTPBiasedVal};
+ return {R_PPC64_ADDR16_HIGHESTA, DTPBiasedVal};
+ case R_PPC64_DTPREL16_LO:
+ return {R_PPC64_ADDR16_LO, DTPBiasedVal};
+ case R_PPC64_DTPREL16_LO_DS:
+ return {R_PPC64_ADDR16_LO_DS, DTPBiasedVal};
+ case R_PPC64_DTPREL64:
+ return {R_PPC64_ADDR64, DTPBiasedVal};
+ default:
+ return {Type, Val};
+ }
+static bool isTocOptType(RelType Type) {
+ switch (Type) {
+ case R_PPC64_GOT16_HA:
+ case R_PPC64_GOT16_LO_DS:
+ case R_PPC64_TOC16_HA:
+ case R_PPC64_TOC16_LO_DS:
+ case R_PPC64_TOC16_LO:
+ return true;
+ default:
+ return false;
+ }
+void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // We need to save the original relocation type to use in diagnostics, and
+ // use the original type to determine if we should toc-optimize the
+ // instructions being relocated.
+ RelType OriginalType = Type;
+ bool ShouldTocOptimize = isTocOptType(Type);
+ // For dynamic thread pointer relative, toc-relative, and got-indirect
+ // relocations, proceed in terms of the corresponding ADDR16 relocation type.
+ std::tie(Type, Val) = toAddr16Rel(Type, Val);
+ switch (Type) {
+ case R_PPC64_ADDR14: {
+ checkAlignment(Loc, Val, 4, Type);
+ // Preserve the AA/LK bits in the branch instruction
+ uint8_t AALK = Loc[3];
+ write16(Loc + 2, (AALK & 3) | (Val & 0xfffc));
+ break;
+ }
+ case R_PPC64_ADDR16:
+ case R_PPC64_TPREL16:
+ checkInt(Loc, Val, 16, OriginalType);
+ write16(Loc, Val);
+ break;
+ case R_PPC64_ADDR16_DS:
+ case R_PPC64_TPREL16_DS: {
+ checkInt(Loc, Val, 16, OriginalType);
+ // DQ-form instructions use bits 28-31 as part of the instruction encoding
+ // DS-form instructions only use bits 30-31.
+ uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3;
+ checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
+ write16(Loc, (read16(Loc) & Mask) | lo(Val));
+ } break;
+ case R_PPC64_ADDR16_HA:
+ case R_PPC64_REL16_HA:
+ case R_PPC64_TPREL16_HA:
+ if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0)
+ writeInstrFromHalf16(Loc, 0x60000000);
+ else
+ write16(Loc, ha(Val));
+ break;
+ case R_PPC64_ADDR16_HI:
+ case R_PPC64_REL16_HI:
+ case R_PPC64_TPREL16_HI:
+ write16(Loc, hi(Val));
+ break;
+ case R_PPC64_ADDR16_HIGHER:
+ case R_PPC64_TPREL16_HIGHER:
+ write16(Loc, higher(Val));
+ break;
+ case R_PPC64_ADDR16_HIGHERA:
+ write16(Loc, highera(Val));
+ break;
+ case R_PPC64_ADDR16_HIGHEST:
+ write16(Loc, highest(Val));
+ break;
+ write16(Loc, highesta(Val));
+ break;
+ case R_PPC64_ADDR16_LO:
+ case R_PPC64_REL16_LO:
+ case R_PPC64_TPREL16_LO:
+ // When the high-adjusted part of a toc relocation evalutes to 0, it is
+ // changed into a nop. The lo part then needs to be updated to use the
+ // toc-pointer register r2, as the base register.
+ if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
+ uint32_t Instr = readInstrFromHalf16(Loc);
+ if (isInstructionUpdateForm(Instr))
+ error(getErrorLocation(Loc) +
+ "can't toc-optimize an update instruction: 0x" +
+ utohexstr(Instr));
+ Instr = (Instr & 0xFFE00000) | 0x00020000;
+ writeInstrFromHalf16(Loc, Instr);
+ }
+ write16(Loc, lo(Val));
+ break;
+ case R_PPC64_ADDR16_LO_DS:
+ case R_PPC64_TPREL16_LO_DS: {
+ // DQ-form instructions use bits 28-31 as part of the instruction encoding
+ // DS-form instructions only use bits 30-31.
+ uint32_t Inst = readInstrFromHalf16(Loc);
+ uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3;
+ checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
+ if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
+ // When the high-adjusted part of a toc relocation evalutes to 0, it is
+ // changed into a nop. The lo part then needs to be updated to use the toc
+ // pointer register r2, as the base register.
+ if (isInstructionUpdateForm(Inst))
+ error(getErrorLocation(Loc) +
+ "Can't toc-optimize an update instruction: 0x" +
+ Twine::utohexstr(Inst));
+ Inst = (Inst & 0xFFE0000F) | 0x00020000;
+ writeInstrFromHalf16(Loc, Inst);
+ }
+ write16(Loc, (read16(Loc) & Mask) | lo(Val));
+ } break;
+ case R_PPC64_ADDR32:
+ case R_PPC64_REL32:
+ checkInt(Loc, Val, 32, Type);
+ write32(Loc, Val);
+ break;
+ case R_PPC64_ADDR64:
+ case R_PPC64_REL64:
+ case R_PPC64_TOC:
+ write64(Loc, Val);
+ break;
+ case R_PPC64_REL14: {
+ uint32_t Mask = 0x0000FFFC;
+ checkInt(Loc, Val, 16, Type);
+ checkAlignment(Loc, Val, 4, Type);
+ write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
+ break;
+ }
+ case R_PPC64_REL24: {
+ uint32_t Mask = 0x03FFFFFC;
+ checkInt(Loc, Val, 26, Type);
+ checkAlignment(Loc, Val, 4, Type);
+ write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
+ break;
+ }
+ case R_PPC64_DTPREL64:
+ write64(Loc, Val - DynamicThreadPointerOffset);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const {
+ if (Type != R_PPC64_REL14 && Type != R_PPC64_REL24)
+ return false;
+ // If a function is in the Plt it needs to be called with a call-stub.
+ if (S.isInPlt())
+ return true;
+ // If a symbol is a weak undefined and we are compiling an executable
+ // it doesn't need a range-extending thunk since it can't be called.
+ if (S.isUndefWeak() && !Config->Shared)
+ return false;
+ // If the offset exceeds the range of the branch type then it will need
+ // a range-extending thunk.
+ return !inBranchRange(Type, BranchAddr, S.getVA());
+bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
+ int64_t Offset = Dst - Src;
+ if (Type == R_PPC64_REL14)
+ return isInt<16>(Offset);
+ if (Type == R_PPC64_REL24)
+ return isInt<26>(Offset);
+ llvm_unreachable("unsupported relocation type used in branch");
+RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const {
+ if (Expr == R_RELAX_TLS_GD_TO_IE)
+ if (Expr == R_RELAX_TLS_LD_TO_LE)
+ return Expr;
+// Reference: of the 64-bit ELF V2 abi supplement.
+// The general dynamic code sequence for a global `x` uses 4 instructions.
+// Instruction Relocation Symbol
+// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
+// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
+// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
+// R_PPC64_REL24 __tls_get_addr
+// nop None None
+// Relaxing to initial-exec entails:
+// 1) Convert the addis/addi pair that builds the address of the tls_index
+// struct for 'x' to an addis/ld pair that loads an offset from a got-entry.
+// 2) Convert the call to __tls_get_addr to a nop.
+// 3) Convert the nop following the call to an add of the loaded offset to the
+// thread pointer.
+// Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is
+// used as the relaxation hint for both steps 2 and 3.
+void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_PPC64_GOT_TLSGD16_HA:
+ // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to
+ // addis rT, r2, sym@got@tprel@ha.
+ relocateOne(Loc, R_PPC64_GOT_TPREL16_HA, Val);
+ return;
+ case R_PPC64_GOT_TLSGD16_LO: {
+ // Relax from addi r3, rA, sym@got@tlsgd@l to
+ // ld r3, sym@got@tprel@l(rA)
+ uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16));
+ writeInstrFromHalf16(Loc, 0xE8600000 | InputRegister);
+ relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val);
+ return;
+ }
+ case R_PPC64_TLSGD:
+ write32(Loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop
+ write32(Loc + 4, 0x7c636A14); // nop --> add r3, r3, r13
+ return;
+ default:
+ llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
+ }
+// The prologue for a split-stack function is expected to look roughly
+// like this:
+// .Lglobal_entry_point:
+// # TOC pointer initalization.
+// ...
+// .Llocal_entry_point:
+// # load the __private_ss member of the threads tcbhead.
+// ld r0,-0x7000-64(r13)
+// # subtract the functions stack size from the stack pointer.
+// addis r12, r1, ha(-stack-frame size)
+// addi r12, r12, l(-stack-frame size)
+// # compare needed to actual and branch to allocate_more_stack if more
+// # space is needed, otherwise fallthrough to 'normal' function body.
+// cmpld cr7,r12,r0
+// blt- cr7, .Lallocate_more_stack
+// -) The allocate_more_stack block might be placed after the split-stack
+// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
+// instead.
+// -) If either the addis or addi is not needed due to the stack size being
+// smaller then 32K or a multiple of 64K they will be replaced with a nop,
+// but there will always be 2 instructions the linker can overwrite for the
+// adjusted stack size.
+// The linkers job here is to increase the stack size used in the addis/addi
+// pair by split-stack-size-adjust.
+// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
+// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
+bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const {
+ // If the caller has a global entry point adjust the buffer past it. The start
+ // of the split-stack prologue will be at the local entry point.
+ Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);
+ // At the very least we expect to see a load of some split-stack data from the
+ // tcb, and 2 instructions that calculate the ending stack address this
+ // function will require. If there is not enough room for at least 3
+ // instructions it can't be a split-stack prologue.
+ if (Loc + 12 >= End)
+ return false;
+ // First instruction must be `ld r0, -0x7000-64(r13)`
+ if (read32(Loc) != 0xe80d8fc0)
+ return false;
+ int16_t HiImm = 0;
+ int16_t LoImm = 0;
+ // First instruction can be either an addis if the frame size is larger then
+ // 32K, or an addi if the size is less then 32K.
+ int32_t FirstInstr = read32(Loc + 4);
+ if (getPrimaryOpCode(FirstInstr) == 15) {
+ HiImm = FirstInstr & 0xFFFF;
+ } else if (getPrimaryOpCode(FirstInstr) == 14) {
+ LoImm = FirstInstr & 0xFFFF;
+ } else {
+ return false;
+ }
+ // Second instruction is either an addi or a nop. If the first instruction was
+ // an addi then LoImm is set and the second instruction must be a nop.
+ uint32_t SecondInstr = read32(Loc + 8);
+ if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
+ LoImm = SecondInstr & 0xFFFF;
+ } else if (SecondInstr != 0x60000000) {
+ return false;
+ }
+ // The register operands of the first instruction should be the stack-pointer
+ // (r1) as the input (RA) and r12 as the output (RT). If the second
+ // instruction is not a nop, then it should use r12 as both input and output.
+ auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT,
+ uint8_t ExpectedRA) {
+ return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
+ ((Instr & 0x1F0000) >> 16 == ExpectedRA);
+ };
+ if (!CheckRegOperands(FirstInstr, 12, 1))
+ return false;
+ if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
+ return false;
+ int32_t StackFrameSize = (HiImm * 65536) + LoImm;
+ // Check that the adjusted size doesn't overflow what we can represent with 2
+ // instructions.
+ if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) {
+ error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
+ return false;
+ }
+ int32_t AdjustedStackFrameSize =
+ StackFrameSize - Config->SplitStackAdjustSize;
+ LoImm = AdjustedStackFrameSize & 0xFFFF;
+ HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
+ if (HiImm) {
+ write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm);
+ // If the low immediate is zero the second instruction will be a nop.
+ SecondInstr = LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000;
+ write32(Loc + 8, SecondInstr);
+ } else {
+ // addi r12, r1, imm
+ write32(Loc + 4, (0x39810000) | (uint16_t)LoImm);
+ write32(Loc + 8, 0x60000000);
+ }
+ return true;
+TargetInfo *elf::getPPC64TargetInfo() {
+ static PPC64 Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/RISCV.cpp b/contrib/llvm/tools/lld/ELF/Arch/RISCV.cpp
new file mode 100644
index 000000000000..461e8d35c3e6
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/RISCV.cpp
@@ -0,0 +1,279 @@
+//===- RISCV.cpp ----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Target.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class RISCV final : public TargetInfo {
+ RISCV();
+ uint32_t calcEFlags() const override;
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // end anonymous namespace
+static uint32_t getEFlags(InputFile *F) {
+ if (Config->Is64)
+ return cast<ObjFile<ELF64LE>>(F)->getObj().getHeader()->e_flags;
+ return cast<ObjFile<ELF32LE>>(F)->getObj().getHeader()->e_flags;
+uint32_t RISCV::calcEFlags() const {
+ assert(!ObjectFiles.empty());
+ uint32_t Target = getEFlags(ObjectFiles.front());
+ for (InputFile *F : ObjectFiles) {
+ uint32_t EFlags = getEFlags(F);
+ if (EFlags & EF_RISCV_RVC)
+ Target |= EF_RISCV_RVC;
+ if ((EFlags & EF_RISCV_FLOAT_ABI) != (Target & EF_RISCV_FLOAT_ABI))
+ error(toString(F) +
+ ": cannot link object files with different floating-point ABI");
+ if ((EFlags & EF_RISCV_RVE) != (Target & EF_RISCV_RVE))
+ error(toString(F) +
+ ": cannot link object files with different EF_RISCV_RVE");
+ }
+ return Target;
+RelExpr RISCV::getRelExpr(const RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_RISCV_JAL:
+ case R_RISCV_CALL:
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_32_PCREL:
+ return R_PC;
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_PCREL_LO12_S:
+ return R_HINT;
+ default:
+ return R_ABS;
+ }
+// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63.
+static uint32_t extractBits(uint64_t V, uint32_t Begin, uint32_t End) {
+ return (V & ((1ULL << (Begin + 1)) - 1)) >> End;
+void RISCV::relocateOne(uint8_t *Loc, const RelType Type,
+ const uint64_t Val) const {
+ switch (Type) {
+ case R_RISCV_32:
+ write32le(Loc, Val);
+ return;
+ case R_RISCV_64:
+ write64le(Loc, Val);
+ return;
+ checkInt(Loc, static_cast<int64_t>(Val) >> 1, 8, Type);
+ checkAlignment(Loc, Val, 2, Type);
+ uint16_t Insn = read16le(Loc) & 0xE383;
+ uint16_t Imm8 = extractBits(Val, 8, 8) << 12;
+ uint16_t Imm4_3 = extractBits(Val, 4, 3) << 10;
+ uint16_t Imm7_6 = extractBits(Val, 7, 6) << 5;
+ uint16_t Imm2_1 = extractBits(Val, 2, 1) << 3;
+ uint16_t Imm5 = extractBits(Val, 5, 5) << 2;
+ Insn |= Imm8 | Imm4_3 | Imm7_6 | Imm2_1 | Imm5;
+ write16le(Loc, Insn);
+ return;
+ }
+ case R_RISCV_RVC_JUMP: {
+ checkInt(Loc, static_cast<int64_t>(Val) >> 1, 11, Type);
+ checkAlignment(Loc, Val, 2, Type);
+ uint16_t Insn = read16le(Loc) & 0xE003;
+ uint16_t Imm11 = extractBits(Val, 11, 11) << 12;
+ uint16_t Imm4 = extractBits(Val, 4, 4) << 11;
+ uint16_t Imm9_8 = extractBits(Val, 9, 8) << 9;
+ uint16_t Imm10 = extractBits(Val, 10, 10) << 8;
+ uint16_t Imm6 = extractBits(Val, 6, 6) << 7;
+ uint16_t Imm7 = extractBits(Val, 7, 7) << 6;
+ uint16_t Imm3_1 = extractBits(Val, 3, 1) << 3;
+ uint16_t Imm5 = extractBits(Val, 5, 5) << 2;
+ Insn |= Imm11 | Imm4 | Imm9_8 | Imm10 | Imm6 | Imm7 | Imm3_1 | Imm5;
+ write16le(Loc, Insn);
+ return;
+ }
+ case R_RISCV_RVC_LUI: {
+ int32_t Imm = ((Val + 0x800) >> 12);
+ checkUInt(Loc, Imm, 6, Type);
+ if (Imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0`
+ write16le(Loc, (read16le(Loc) & 0x0F83) | 0x4000);
+ } else {
+ uint16_t Imm17 = extractBits(Val + 0x800, 17, 17) << 12;
+ uint16_t Imm16_12 = extractBits(Val + 0x800, 16, 12) << 2;
+ write16le(Loc, (read16le(Loc) & 0xEF83) | Imm17 | Imm16_12);
+ }
+ return;
+ }
+ case R_RISCV_JAL: {
+ checkInt(Loc, static_cast<int64_t>(Val) >> 1, 20, Type);
+ checkAlignment(Loc, Val, 2, Type);
+ uint32_t Insn = read32le(Loc) & 0xFFF;
+ uint32_t Imm20 = extractBits(Val, 20, 20) << 31;
+ uint32_t Imm10_1 = extractBits(Val, 10, 1) << 21;
+ uint32_t Imm11 = extractBits(Val, 11, 11) << 20;
+ uint32_t Imm19_12 = extractBits(Val, 19, 12) << 12;
+ Insn |= Imm20 | Imm10_1 | Imm11 | Imm19_12;
+ write32le(Loc, Insn);
+ return;
+ }
+ case R_RISCV_BRANCH: {
+ checkInt(Loc, static_cast<int64_t>(Val) >> 1, 12, Type);
+ checkAlignment(Loc, Val, 2, Type);
+ uint32_t Insn = read32le(Loc) & 0x1FFF07F;
+ uint32_t Imm12 = extractBits(Val, 12, 12) << 31;
+ uint32_t Imm10_5 = extractBits(Val, 10, 5) << 25;
+ uint32_t Imm4_1 = extractBits(Val, 4, 1) << 8;
+ uint32_t Imm11 = extractBits(Val, 11, 11) << 7;
+ Insn |= Imm12 | Imm10_5 | Imm4_1 | Imm11;
+ write32le(Loc, Insn);
+ return;
+ }
+ // auipc + jalr pair
+ case R_RISCV_CALL: {
+ checkInt(Loc, Val, 32, Type);
+ if (isInt<32>(Val)) {
+ relocateOne(Loc, R_RISCV_PCREL_HI20, Val);
+ relocateOne(Loc + 4, R_RISCV_PCREL_LO12_I, Val);
+ }
+ return;
+ }
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_HI20: {
+ checkInt(Loc, Val, 32, Type);
+ uint32_t Hi = Val + 0x800;
+ write32le(Loc, (read32le(Loc) & 0xFFF) | (Hi & 0xFFFFF000));
+ return;
+ }
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_LO12_I: {
+ checkInt(Loc, Val, 32, Type);
+ uint32_t Hi = Val + 0x800;
+ uint32_t Lo = Val - (Hi & 0xFFFFF000);
+ write32le(Loc, (read32le(Loc) & 0xFFFFF) | ((Lo & 0xFFF) << 20));
+ return;
+ }
+ case R_RISCV_PCREL_LO12_S:
+ case R_RISCV_LO12_S: {
+ checkInt(Loc, Val, 32, Type);
+ uint32_t Hi = Val + 0x800;
+ uint32_t Lo = Val - (Hi & 0xFFFFF000);
+ uint32_t Imm11_5 = extractBits(Lo, 11, 5) << 25;
+ uint32_t Imm4_0 = extractBits(Lo, 4, 0) << 7;
+ write32le(Loc, (read32le(Loc) & 0x1FFF07F) | Imm11_5 | Imm4_0);
+ return;
+ }
+ case R_RISCV_ADD8:
+ *Loc += Val;
+ return;
+ case R_RISCV_ADD16:
+ write16le(Loc, read16le(Loc) + Val);
+ return;
+ case R_RISCV_ADD32:
+ write32le(Loc, read32le(Loc) + Val);
+ return;
+ case R_RISCV_ADD64:
+ write64le(Loc, read64le(Loc) + Val);
+ return;
+ case R_RISCV_SUB6:
+ *Loc = (*Loc & 0xc0) | (((*Loc & 0x3f) - Val) & 0x3f);
+ return;
+ case R_RISCV_SUB8:
+ *Loc -= Val;
+ return;
+ case R_RISCV_SUB16:
+ write16le(Loc, read16le(Loc) - Val);
+ return;
+ case R_RISCV_SUB32:
+ write32le(Loc, read32le(Loc) - Val);
+ return;
+ case R_RISCV_SUB64:
+ write64le(Loc, read64le(Loc) - Val);
+ return;
+ case R_RISCV_SET6:
+ *Loc = (*Loc & 0xc0) | (Val & 0x3f);
+ return;
+ case R_RISCV_SET8:
+ *Loc = Val;
+ return;
+ case R_RISCV_SET16:
+ write16le(Loc, Val);
+ return;
+ case R_RISCV_SET32:
+ case R_RISCV_32_PCREL:
+ write32le(Loc, Val);
+ return;
+ return; // Ignored (for now)
+ case R_RISCV_NONE:
+ return; // Do nothing
+ // These are handled by the dynamic linker
+ case R_RISCV_COPY:
+ // GP-relative relocations are only produced after relaxation, which
+ // we don't support for now
+ default:
+ error(getErrorLocation(Loc) +
+ "unimplemented relocation: " + toString(Type));
+ return;
+ }
+TargetInfo *elf::getRISCVTargetInfo() {
+ static RISCV Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/SPARCV9.cpp b/contrib/llvm/tools/lld/ELF/Arch/SPARCV9.cpp
new file mode 100644
index 000000000000..831aa2028e7f
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/SPARCV9.cpp
@@ -0,0 +1,149 @@
+//===- SPARCV9.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class SPARCV9 final : public TargetInfo {
+ SPARCV9();
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // namespace
+ CopyRel = R_SPARC_COPY;
+ NoneRel = R_SPARC_NONE;
+ RelativeRel = R_SPARC_RELATIVE;
+ GotEntrySize = 8;
+ PltEntrySize = 32;
+ PltHeaderSize = 4 * PltEntrySize;
+ PageSize = 8192;
+ DefaultMaxPageSize = 0x100000;
+ DefaultImageBase = 0x100000;
+RelExpr SPARCV9::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_SPARC_32:
+ case R_SPARC_UA32:
+ case R_SPARC_64:
+ case R_SPARC_UA64:
+ return R_ABS;
+ case R_SPARC_PC10:
+ case R_SPARC_PC22:
+ case R_SPARC_DISP32:
+ case R_SPARC_WDISP30:
+ return R_PC;
+ case R_SPARC_GOT10:
+ return R_GOT_OFF;
+ case R_SPARC_GOT22:
+ return R_GOT_OFF;
+ case R_SPARC_WPLT30:
+ return R_PLT_PC;
+ case R_SPARC_NONE:
+ return R_NONE;
+ default:
+ return R_INVALID;
+ }
+void SPARCV9::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_SPARC_32:
+ case R_SPARC_UA32:
+ // V-word32
+ checkUInt(Loc, Val, 32, Type);
+ write32be(Loc, Val);
+ break;
+ case R_SPARC_DISP32:
+ // V-disp32
+ checkInt(Loc, Val, 32, Type);
+ write32be(Loc, Val);
+ break;
+ case R_SPARC_WDISP30:
+ case R_SPARC_WPLT30:
+ // V-disp30
+ checkInt(Loc, Val, 32, Type);
+ write32be(Loc, (read32be(Loc) & ~0x3fffffff) | ((Val >> 2) & 0x3fffffff));
+ break;
+ case R_SPARC_22:
+ // V-imm22
+ checkUInt(Loc, Val, 22, Type);
+ write32be(Loc, (read32be(Loc) & ~0x003fffff) | (Val & 0x003fffff));
+ break;
+ case R_SPARC_GOT22:
+ case R_SPARC_PC22:
+ // T-imm22
+ write32be(Loc, (read32be(Loc) & ~0x003fffff) | ((Val >> 10) & 0x003fffff));
+ break;
+ case R_SPARC_WDISP19:
+ // V-disp19
+ checkInt(Loc, Val, 21, Type);
+ write32be(Loc, (read32be(Loc) & ~0x0007ffff) | ((Val >> 2) & 0x0007ffff));
+ break;
+ case R_SPARC_GOT10:
+ case R_SPARC_PC10:
+ // T-simm10
+ write32be(Loc, (read32be(Loc) & ~0x000003ff) | (Val & 0x000003ff));
+ break;
+ case R_SPARC_64:
+ case R_SPARC_UA64:
+ // V-xword64
+ write64be(Loc, Val);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+void SPARCV9::writePlt(uint8_t *Buf, uint64_t GotEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t PltData[] = {
+ 0x03, 0x00, 0x00, 0x00, // sethi (. - .PLT0), %g1
+ 0x30, 0x68, 0x00, 0x00, // ba,a %xcc, .PLT1
+ 0x01, 0x00, 0x00, 0x00, // nop
+ 0x01, 0x00, 0x00, 0x00, // nop
+ 0x01, 0x00, 0x00, 0x00, // nop
+ 0x01, 0x00, 0x00, 0x00, // nop
+ 0x01, 0x00, 0x00, 0x00, // nop
+ 0x01, 0x00, 0x00, 0x00 // nop
+ };
+ memcpy(Buf, PltData, sizeof(PltData));
+ uint64_t Off = getPltEntryOffset(Index);
+ relocateOne(Buf, R_SPARC_22, Off);
+ relocateOne(Buf + 4, R_SPARC_WDISP19, -(Off + 4 - PltEntrySize));
+TargetInfo *elf::getSPARCV9TargetInfo() {
+ static SPARCV9 Target;
+ return &Target;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/X86.cpp b/contrib/llvm/tools/lld/ELF/Arch/X86.cpp
new file mode 100644
index 000000000000..e910375d2fc7
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/X86.cpp
@@ -0,0 +1,554 @@
+//===- X86.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class X86 : public TargetInfo {
+ X86();
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
+ void writeGotPltHeader(uint8_t *Buf) const override;
+ RelType getDynRel(RelType Type) const override;
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const override;
+ void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+} // namespace
+X86::X86() {
+ CopyRel = R_386_COPY;
+ GotRel = R_386_GLOB_DAT;
+ NoneRel = R_386_NONE;
+ PltRel = R_386_JUMP_SLOT;
+ IRelativeRel = R_386_IRELATIVE;
+ RelativeRel = R_386_RELATIVE;
+ TlsGotRel = R_386_TLS_TPOFF;
+ TlsModuleIndexRel = R_386_TLS_DTPMOD32;
+ TlsOffsetRel = R_386_TLS_DTPOFF32;
+ GotEntrySize = 4;
+ GotPltEntrySize = 4;
+ PltEntrySize = 16;
+ PltHeaderSize = 16;
+ TlsGdRelaxSkip = 2;
+ TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
+ // Align to the non-PAE large page size (known as a superpage or huge page).
+ // FreeBSD automatically promotes large, superpage-aligned allocations.
+ DefaultImageBase = 0x400000;
+static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; }
+RelExpr X86::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_386_8:
+ case R_386_16:
+ case R_386_32:
+ case R_386_TLS_LDO_32:
+ return R_ABS;
+ case R_386_TLS_GD:
+ case R_386_TLS_LDM:
+ case R_386_PLT32:
+ return R_PLT_PC;
+ case R_386_PC8:
+ case R_386_PC16:
+ case R_386_PC32:
+ return R_PC;
+ case R_386_GOTPC:
+ case R_386_TLS_IE:
+ return R_GOT;
+ case R_386_GOT32:
+ case R_386_GOT32X:
+ // These relocations are arguably mis-designed because their calculations
+ // depend on the instructions they are applied to. This is bad because we
+ // usually don't care about whether the target section contains valid
+ // machine instructions or not. But this is part of the documented ABI, so
+ // we had to implement as the standard requires.
+ //
+ // x86 does not support PC-relative data access. Therefore, in order to
+ // access GOT contents, a GOT address needs to be known at link-time
+ // (which means non-PIC) or compilers have to emit code to get a GOT
+ // address at runtime (which means code is position-independent but
+ // compilers need to emit extra code for each GOT access.) This decision
+ // is made at compile-time. In the latter case, compilers emit code to
+ // load an GOT address to a register, which is usually %ebx.
+ //
+ // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
+ // foo@GOT(%reg).
+ //
+ // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
+ // find such relocation, we should report an error. foo@GOT is resolved to
+ // an *absolute* address of foo's GOT entry, because both GOT address and
+ // foo's offset are known. In other words, it's G + A.
+ //
+ // foo@GOT(%reg) needs to be resolved to a *relative* offset from a GOT to
+ // foo's GOT entry in the table, because GOT address is not known but foo's
+ // offset in the table is known. It's G + A - GOT.
+ //
+ // It's unfortunate that compilers emit the same relocation for these
+ // different use cases. In order to distinguish them, we have to read a
+ // machine instruction.
+ //
+ // The following code implements it. We assume that Loc[0] is the first
+ // byte of a displacement or an immediate field of a valid machine
+ // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
+ // the byte, we can determine whether the instruction is register-relative
+ // (i.e. it was generated for foo@GOT(%reg)) or absolute (i.e. foo@GOT).
+ return hasBaseReg(Loc[-1]) ? R_GOT_FROM_END : R_GOT;
+ case R_386_TLS_GOTIE:
+ return R_GOT_FROM_END;
+ case R_386_GOTOFF:
+ case R_386_TLS_LE:
+ return R_TLS;
+ case R_386_TLS_LE_32:
+ return R_NEG_TLS;
+ case R_386_NONE:
+ return R_NONE;
+ default:
+ return R_INVALID;
+ }
+RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const {
+ switch (Expr) {
+ default:
+ return Expr;
+ }
+void X86::writeGotPltHeader(uint8_t *Buf) const {
+ write32le(Buf, In.Dynamic->getVA());
+void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
+ // Entries in .got.plt initially points back to the corresponding
+ // PLT entries with a fixed offset to skip the first instruction.
+ write32le(Buf, S.getPltVA() + 6);
+void X86::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
+ // An x86 entry is the address of the ifunc resolver function.
+ write32le(Buf, S.getVA());
+RelType X86::getDynRel(RelType Type) const {
+ if (Type == R_386_TLS_LE)
+ return R_386_TLS_TPOFF;
+ if (Type == R_386_TLS_LE_32)
+ return R_386_TLS_TPOFF32;
+ return Type;
+void X86::writePltHeader(uint8_t *Buf) const {
+ if (Config->Pic) {
+ const uint8_t V[] = {
+ 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl GOTPLT+4(%ebx)
+ 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *GOTPLT+8(%ebx)
+ 0x90, 0x90, 0x90, 0x90 // nop
+ };
+ memcpy(Buf, V, sizeof(V));
+ uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
+ uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
+ write32le(Buf + 2, GotPlt + 4);
+ write32le(Buf + 8, GotPlt + 8);
+ return;
+ }
+ const uint8_t PltData[] = {
+ 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
+ 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
+ 0x90, 0x90, 0x90, 0x90, // nop
+ };
+ memcpy(Buf, PltData, sizeof(PltData));
+ uint32_t GotPlt = In.GotPlt->getVA();
+ write32le(Buf + 2, GotPlt + 4);
+ write32le(Buf + 8, GotPlt + 8);
+void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Inst[] = {
+ 0xff, 0x00, 0, 0, 0, 0, // jmp *foo_in_GOT or jmp *foo@GOT(%ebx)
+ 0x68, 0, 0, 0, 0, // pushl $reloc_offset
+ 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
+ };
+ memcpy(Buf, Inst, sizeof(Inst));
+ if (Config->Pic) {
+ // jmp *foo@GOT(%ebx)
+ uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
+ Buf[1] = 0xa3;
+ write32le(Buf + 2, GotPltEntryAddr - Ebx);
+ } else {
+ // jmp *foo_in_GOT
+ Buf[1] = 0x25;
+ write32le(Buf + 2, GotPltEntryAddr);
+ }
+ write32le(Buf + 7, RelOff);
+ write32le(Buf + 12, -getPltEntryOffset(Index) - 16);
+int64_t X86::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
+ switch (Type) {
+ case R_386_8:
+ case R_386_PC8:
+ return SignExtend64<8>(*Buf);
+ case R_386_16:
+ case R_386_PC16:
+ return SignExtend64<16>(read16le(Buf));
+ case R_386_32:
+ case R_386_GOT32:
+ case R_386_GOT32X:
+ case R_386_GOTOFF:
+ case R_386_GOTPC:
+ case R_386_PC32:
+ case R_386_PLT32:
+ case R_386_TLS_LDO_32:
+ case R_386_TLS_LE:
+ return SignExtend64<32>(read32le(Buf));
+ default:
+ return 0;
+ }
+void X86::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_386_8:
+ // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
+ // being used for some 16-bit programs such as boot loaders, so
+ // we want to support them.
+ checkIntUInt(Loc, Val, 8, Type);
+ *Loc = Val;
+ break;
+ case R_386_PC8:
+ checkInt(Loc, Val, 8, Type);
+ *Loc = Val;
+ break;
+ case R_386_16:
+ checkIntUInt(Loc, Val, 16, Type);
+ write16le(Loc, Val);
+ break;
+ case R_386_PC16:
+ // R_386_PC16 is normally used with 16 bit code. In that situation
+ // the PC is 16 bits, just like the addend. This means that it can
+ // point from any 16 bit address to any other if the possibility
+ // of wrapping is included.
+ // The only restriction we have to check then is that the destination
+ // address fits in 16 bits. That is impossible to do here. The problem is
+ // that we are passed the final value, which already had the
+ // current location subtracted from it.
+ // We just check that Val fits in 17 bits. This misses some cases, but
+ // should have no false positives.
+ checkInt(Loc, Val, 17, Type);
+ write16le(Loc, Val);
+ break;
+ case R_386_32:
+ case R_386_GLOB_DAT:
+ case R_386_GOT32:
+ case R_386_GOT32X:
+ case R_386_GOTOFF:
+ case R_386_GOTPC:
+ case R_386_PC32:
+ case R_386_PLT32:
+ case R_386_RELATIVE:
+ case R_386_TLS_DTPMOD32:
+ case R_386_TLS_DTPOFF32:
+ case R_386_TLS_GD:
+ case R_386_TLS_GOTIE:
+ case R_386_TLS_IE:
+ case R_386_TLS_LDM:
+ case R_386_TLS_LDO_32:
+ case R_386_TLS_LE:
+ case R_386_TLS_LE_32:
+ case R_386_TLS_TPOFF:
+ case R_386_TLS_TPOFF32:
+ checkInt(Loc, Val, 32, Type);
+ write32le(Loc, Val);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+void X86::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // Convert
+ // leal x@tlsgd(, %ebx, 1),
+ // call __tls_get_addr@plt
+ // to
+ // movl %gs:0,%eax
+ // subl $x@ntpoff,%eax
+ const uint8_t Inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+ 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
+ };
+ memcpy(Loc - 3, Inst, sizeof(Inst));
+ write32le(Loc + 5, Val);
+void X86::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // Convert
+ // leal x@tlsgd(, %ebx, 1),
+ // call __tls_get_addr@plt
+ // to
+ // movl %gs:0, %eax
+ // addl x@gotntpoff(%ebx), %eax
+ const uint8_t Inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+ 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
+ };
+ memcpy(Loc - 3, Inst, sizeof(Inst));
+ write32le(Loc + 5, Val);
+// In some conditions, relocations can be optimized to avoid using GOT.
+// This function does that for Initial Exec to Local Exec case.
+void X86::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ // Ulrich's document section 6.2 says that @gotntpoff can
+ // be used with MOVL or ADDL instructions.
+ // @indntpoff is similar to @gotntpoff, but for use in
+ // position dependent code.
+ uint8_t Reg = (Loc[-1] >> 3) & 7;
+ if (Type == R_386_TLS_IE) {
+ if (Loc[-1] == 0xa1) {
+ // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
+ // This case is different from the generic case below because
+ // this is a 5 byte instruction while below is 6 bytes.
+ Loc[-1] = 0xb8;
+ } else if (Loc[-2] == 0x8b) {
+ // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
+ Loc[-2] = 0xc7;
+ Loc[-1] = 0xc0 | Reg;
+ } else {
+ // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
+ Loc[-2] = 0x81;
+ Loc[-1] = 0xc0 | Reg;
+ }
+ } else {
+ assert(Type == R_386_TLS_GOTIE);
+ if (Loc[-2] == 0x8b) {
+ // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
+ Loc[-2] = 0xc7;
+ Loc[-1] = 0xc0 | Reg;
+ } else {
+ // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
+ Loc[-2] = 0x8d;
+ Loc[-1] = 0x80 | (Reg << 3) | Reg;
+ }
+ }
+ write32le(Loc, Val);
+void X86::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ if (Type == R_386_TLS_LDO_32) {
+ write32le(Loc, Val);
+ return;
+ }
+ // Convert
+ // leal foo(%reg),%eax
+ // call ___tls_get_addr
+ // to
+ // movl %gs:0,%eax
+ // nop
+ // leal 0(%esi,1),%esi
+ const uint8_t Inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
+ 0x90, // nop
+ 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
+ };
+ memcpy(Loc - 2, Inst, sizeof(Inst));
+namespace {
+class RetpolinePic : public X86 {
+ RetpolinePic();
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+class RetpolineNoPic : public X86 {
+ RetpolineNoPic();
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+} // namespace
+RetpolinePic::RetpolinePic() {
+ PltHeaderSize = 48;
+ PltEntrySize = 32;
+void RetpolinePic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
+ write32le(Buf, S.getPltVA() + 17);
+void RetpolinePic::writePltHeader(uint8_t *Buf) const {
+ const uint8_t Insn[] = {
+ 0xff, 0xb3, 0, 0, 0, 0, // 0: pushl GOTPLT+4(%ebx)
+ 0x50, // 6: pushl %eax
+ 0x8b, 0x83, 0, 0, 0, 0, // 7: mov GOTPLT+8(%ebx), %eax
+ 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
+ 0xf3, 0x90, // 12: loop: pause
+ 0x0f, 0xae, 0xe8, // 14: lfence
+ 0xeb, 0xf9, // 17: jmp loop
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
+ 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
+ 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
+ 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
+ 0x89, 0xc8, // 2b: mov %ecx, %eax
+ 0x59, // 2d: pop %ecx
+ 0xc3, // 2e: ret
+ 0xcc, // 2f: int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
+ uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
+ write32le(Buf + 2, GotPlt + 4);
+ write32le(Buf + 9, GotPlt + 8);
+void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Insn[] = {
+ 0x50, // pushl %eax
+ 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
+ 0xe8, 0, 0, 0, 0, // call plt+0x20
+ 0xe9, 0, 0, 0, 0, // jmp plt+0x12
+ 0x68, 0, 0, 0, 0, // pushl $reloc_offset
+ 0xe9, 0, 0, 0, 0, // jmp plt+0
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
+ unsigned Off = getPltEntryOffset(Index);
+ write32le(Buf + 3, GotPltEntryAddr - Ebx);
+ write32le(Buf + 8, -Off - 12 + 32);
+ write32le(Buf + 13, -Off - 17 + 18);
+ write32le(Buf + 18, RelOff);
+ write32le(Buf + 23, -Off - 27);
+RetpolineNoPic::RetpolineNoPic() {
+ PltHeaderSize = 48;
+ PltEntrySize = 32;
+void RetpolineNoPic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
+ write32le(Buf, S.getPltVA() + 16);
+void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
+ const uint8_t Insn[] = {
+ 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
+ 0x50, // 6: pushl %eax
+ 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
+ 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
+ 0xf3, 0x90, // 11: loop: pause
+ 0x0f, 0xae, 0xe8, // 13: lfence
+ 0xeb, 0xf9, // 16: jmp loop
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
+ 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
+ 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
+ 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
+ 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
+ 0x89, 0xc8, // 2b: mov %ecx, %eax
+ 0x59, // 2d: pop %ecx
+ 0xc3, // 2e: ret
+ 0xcc, // 2f: int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ uint32_t GotPlt = In.GotPlt->getVA();
+ write32le(Buf + 2, GotPlt + 4);
+ write32le(Buf + 8, GotPlt + 8);
+void RetpolineNoPic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Insn[] = {
+ 0x50, // 0: pushl %eax
+ 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
+ 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
+ 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
+ 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
+ 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
+ 0xcc, // 1f: int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ unsigned Off = getPltEntryOffset(Index);
+ write32le(Buf + 2, GotPltEntryAddr);
+ write32le(Buf + 7, -Off - 11 + 32);
+ write32le(Buf + 12, -Off - 16 + 17);
+ write32le(Buf + 17, RelOff);
+ write32le(Buf + 22, -Off - 26);
+TargetInfo *elf::getX86TargetInfo() {
+ if (Config->ZRetpolineplt) {
+ if (Config->Pic) {
+ static RetpolinePic T;
+ return &T;
+ }
+ static RetpolineNoPic T;
+ return &T;
+ }
+ static X86 T;
+ return &T;
diff --git a/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp b/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp
new file mode 100644
index 000000000000..a000eeb079d9
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp
@@ -0,0 +1,677 @@
+//===- X86_64.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Endian.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+template <class ELFT> class X86_64 : public TargetInfo {
+ X86_64();
+ RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const override;
+ RelType getDynRel(RelType Type) const override;
+ void writeGotPltHeader(uint8_t *Buf) const override;
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+ void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const override;
+ void relaxGot(uint8_t *Loc, uint64_t Val) const override;
+ void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+ bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const override;
+ void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
+ uint8_t ModRm) const;
+} // namespace
+template <class ELFT> X86_64<ELFT>::X86_64() {
+ CopyRel = R_X86_64_COPY;
+ GotRel = R_X86_64_GLOB_DAT;
+ NoneRel = R_X86_64_NONE;
+ PltRel = R_X86_64_JUMP_SLOT;
+ RelativeRel = R_X86_64_RELATIVE;
+ IRelativeRel = R_X86_64_IRELATIVE;
+ TlsGotRel = R_X86_64_TPOFF64;
+ TlsModuleIndexRel = R_X86_64_DTPMOD64;
+ TlsOffsetRel = R_X86_64_DTPOFF64;
+ GotEntrySize = 8;
+ GotPltEntrySize = 8;
+ PltEntrySize = 16;
+ PltHeaderSize = 16;
+ TlsGdRelaxSkip = 2;
+ TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
+ // Align to the large page size (known as a superpage or huge page).
+ // FreeBSD automatically promotes large, superpage-aligned allocations.
+ DefaultImageBase = 0x200000;
+template <class ELFT>
+RelExpr X86_64<ELFT>::getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const {
+ switch (Type) {
+ case R_X86_64_8:
+ case R_X86_64_16:
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ case R_X86_64_DTPOFF32:
+ case R_X86_64_DTPOFF64:
+ return R_ABS;
+ case R_X86_64_TPOFF32:
+ return R_TLS;
+ case R_X86_64_TLSLD:
+ return R_TLSLD_PC;
+ case R_X86_64_TLSGD:
+ return R_TLSGD_PC;
+ case R_X86_64_SIZE32:
+ case R_X86_64_SIZE64:
+ return R_SIZE;
+ case R_X86_64_PLT32:
+ return R_PLT_PC;
+ case R_X86_64_PC32:
+ case R_X86_64_PC64:
+ return R_PC;
+ case R_X86_64_GOT32:
+ case R_X86_64_GOT64:
+ return R_GOT_FROM_END;
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ case R_X86_64_GOTTPOFF:
+ return R_GOT_PC;
+ case R_X86_64_GOTOFF64:
+ case R_X86_64_GOTPC32:
+ case R_X86_64_GOTPC64:
+ case R_X86_64_NONE:
+ return R_NONE;
+ default:
+ return R_INVALID;
+ }
+template <class ELFT> void X86_64<ELFT>::writeGotPltHeader(uint8_t *Buf) const {
+ // The first entry holds the value of _DYNAMIC. It is not clear why that is
+ // required, but it is documented in the psabi and the glibc dynamic linker
+ // seems to use it (note that this is relevant for linking ld.so, not any
+ // other program).
+ write64le(Buf, In.Dynamic->getVA());
+template <class ELFT>
+void X86_64<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
+ // See comments in X86::writeGotPlt.
+ write64le(Buf, S.getPltVA() + 6);
+template <class ELFT> void X86_64<ELFT>::writePltHeader(uint8_t *Buf) const {
+ const uint8_t PltData[] = {
+ 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)
+ 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
+ 0x0f, 0x1f, 0x40, 0x00, // nop
+ };
+ memcpy(Buf, PltData, sizeof(PltData));
+ uint64_t GotPlt = In.GotPlt->getVA();
+ uint64_t Plt = In.Plt->getVA();
+ write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8
+ write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16
+template <class ELFT>
+void X86_64<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Inst[] = {
+ 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
+ 0x68, 0, 0, 0, 0, // pushq <relocation index>
+ 0xe9, 0, 0, 0, 0, // jmpq plt[0]
+ };
+ memcpy(Buf, Inst, sizeof(Inst));
+ write32le(Buf + 2, GotPltEntryAddr - PltEntryAddr - 6);
+ write32le(Buf + 7, Index);
+ write32le(Buf + 12, -getPltEntryOffset(Index) - 16);
+template <class ELFT> RelType X86_64<ELFT>::getDynRel(RelType Type) const {
+ if (Type == R_X86_64_64 || Type == R_X86_64_PC64 || Type == R_X86_64_SIZE32 ||
+ Type == R_X86_64_SIZE64)
+ return Type;
+ return R_X86_64_NONE;
+template <class ELFT>
+void X86_64<ELFT>::relaxTlsGdToLe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ // Convert
+ // .byte 0x66
+ // leaq x@tlsgd(%rip), %rdi
+ // .word 0x6666
+ // rex64
+ // call __tls_get_addr@plt
+ // to
+ // mov %fs:0x0,%rax
+ // lea x@tpoff,%rax
+ const uint8_t Inst[] = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
+ 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax
+ };
+ memcpy(Loc - 4, Inst, sizeof(Inst));
+ // The original code used a pc relative relocation and so we have to
+ // compensate for the -4 in had in the addend.
+ write32le(Loc + 8, Val + 4);
+template <class ELFT>
+void X86_64<ELFT>::relaxTlsGdToIe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ // Convert
+ // .byte 0x66
+ // leaq x@tlsgd(%rip), %rdi
+ // .word 0x6666
+ // rex64
+ // call __tls_get_addr@plt
+ // to
+ // mov %fs:0x0,%rax
+ // addq x@tpoff,%rax
+ const uint8_t Inst[] = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
+ 0x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@tpoff,%rax
+ };
+ memcpy(Loc - 4, Inst, sizeof(Inst));
+ // Both code sequences are PC relatives, but since we are moving the constant
+ // forward by 8 bytes we have to subtract the value by 8.
+ write32le(Loc + 8, Val - 8);
+// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
+// R_X86_64_TPOFF32 so that it does not use GOT.
+template <class ELFT>
+void X86_64<ELFT>::relaxTlsIeToLe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ uint8_t *Inst = Loc - 3;
+ uint8_t Reg = Loc[-1] >> 3;
+ uint8_t *RegSlot = Loc - 1;
+ // Note that ADD with RSP or R12 is converted to ADD instead of LEA
+ // because LEA with these registers needs 4 bytes to encode and thus
+ // wouldn't fit the space.
+ if (memcmp(Inst, "\x48\x03\x25", 3) == 0) {
+ // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
+ memcpy(Inst, "\x48\x81\xc4", 3);
+ } else if (memcmp(Inst, "\x4c\x03\x25", 3) == 0) {
+ // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
+ memcpy(Inst, "\x49\x81\xc4", 3);
+ } else if (memcmp(Inst, "\x4c\x03", 2) == 0) {
+ // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
+ memcpy(Inst, "\x4d\x8d", 2);
+ *RegSlot = 0x80 | (Reg << 3) | Reg;
+ } else if (memcmp(Inst, "\x48\x03", 2) == 0) {
+ // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
+ memcpy(Inst, "\x48\x8d", 2);
+ *RegSlot = 0x80 | (Reg << 3) | Reg;
+ } else if (memcmp(Inst, "\x4c\x8b", 2) == 0) {
+ // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
+ memcpy(Inst, "\x49\xc7", 2);
+ *RegSlot = 0xc0 | Reg;
+ } else if (memcmp(Inst, "\x48\x8b", 2) == 0) {
+ // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
+ memcpy(Inst, "\x48\xc7", 2);
+ *RegSlot = 0xc0 | Reg;
+ } else {
+ error(getErrorLocation(Loc - 3) +
+ "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
+ }
+ // The original code used a PC relative relocation.
+ // Need to compensate for the -4 it had in the addend.
+ write32le(Loc, Val + 4);
+template <class ELFT>
+void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ if (Type == R_X86_64_DTPOFF64) {
+ write64le(Loc, Val);
+ return;
+ }
+ if (Type == R_X86_64_DTPOFF32) {
+ write32le(Loc, Val);
+ return;
+ }
+ const uint8_t Inst[] = {
+ 0x66, 0x66, // .word 0x6666
+ 0x66, // .byte 0x66
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
+ };
+ if (Loc[4] == 0xe8) {
+ // Convert
+ // leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>
+ // callq __tls_get_addr@PLT # e8 <disp32>
+ // leaq bar@dtpoff(%rax), %rcx
+ // to
+ // .word 0x6666
+ // .byte 0x66
+ // mov %fs:0,%rax
+ // leaq bar@tpoff(%rax), %rcx
+ memcpy(Loc - 3, Inst, sizeof(Inst));
+ return;
+ }
+ if (Loc[4] == 0xff && Loc[5] == 0x15) {
+ // Convert
+ // leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>
+ // call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>
+ // to
+ // .long 0x66666666
+ // movq %fs:0,%rax
+ // See "Table 11.9: LD -> LE Code Transition (LP64)" in
+ // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
+ Loc[-3] = 0x66;
+ memcpy(Loc - 2, Inst, sizeof(Inst));
+ return;
+ }
+ error(getErrorLocation(Loc - 3) +
+ "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
+template <class ELFT>
+void X86_64<ELFT>::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
+ switch (Type) {
+ case R_X86_64_8:
+ checkUInt(Loc, Val, 8, Type);
+ *Loc = Val;
+ break;
+ case R_X86_64_16:
+ checkUInt(Loc, Val, 16, Type);
+ write16le(Loc, Val);
+ break;
+ case R_X86_64_32:
+ checkUInt(Loc, Val, 32, Type);
+ write32le(Loc, Val);
+ break;
+ case R_X86_64_32S:
+ case R_X86_64_TPOFF32:
+ case R_X86_64_GOT32:
+ case R_X86_64_GOTPC32:
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ case R_X86_64_PC32:
+ case R_X86_64_GOTTPOFF:
+ case R_X86_64_PLT32:
+ case R_X86_64_TLSGD:
+ case R_X86_64_TLSLD:
+ case R_X86_64_DTPOFF32:
+ case R_X86_64_SIZE32:
+ checkInt(Loc, Val, 32, Type);
+ write32le(Loc, Val);
+ break;
+ case R_X86_64_64:
+ case R_X86_64_DTPOFF64:
+ case R_X86_64_GLOB_DAT:
+ case R_X86_64_PC64:
+ case R_X86_64_SIZE64:
+ case R_X86_64_GOT64:
+ case R_X86_64_GOTOFF64:
+ case R_X86_64_GOTPC64:
+ write64le(Loc, Val);
+ break;
+ default:
+ error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+ }
+template <class ELFT>
+RelExpr X86_64<ELFT>::adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr RelExpr) const {
+ if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX)
+ return RelExpr;
+ const uint8_t Op = Data[-2];
+ const uint8_t ModRm = Data[-1];
+ // FIXME: When PIC is disabled and foo is defined locally in the
+ // lower 32 bit address space, memory operand in mov can be converted into
+ // immediate operand. Otherwise, mov must be changed to lea. We support only
+ // latter relaxation at this moment.
+ if (Op == 0x8b)
+ return R_RELAX_GOT_PC;
+ // Relax call and jmp.
+ if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25))
+ return R_RELAX_GOT_PC;
+ // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
+ // If PIC then no relaxation is available.
+ // We also don't relax test/binop instructions without REX byte,
+ // they are 32bit operations and not common to have.
+ assert(Type == R_X86_64_REX_GOTPCRELX);
+ return Config->Pic ? RelExpr : R_RELAX_GOT_PC_NOPIC;
+// A subset of relaxations can only be applied for no-PIC. This method
+// handles such relaxations. Instructions encoding information was taken from:
+// "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
+// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
+// 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
+template <class ELFT>
+void X86_64<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
+ uint8_t ModRm) const {
+ const uint8_t Rex = Loc[-3];
+ // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
+ if (Op == 0x85) {
+ // See "TEST-Logical Compare" (4-428 Vol. 2B),
+ // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
+ // ModR/M byte has form XX YYY ZZZ, where
+ // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
+ // XX has different meanings:
+ // 00: The operand's memory address is in reg1.
+ // 01: The operand's memory address is reg1 + a byte-sized displacement.
+ // 10: The operand's memory address is reg1 + a word-sized displacement.
+ // 11: The operand is reg1 itself.
+ // If an instruction requires only one operand, the unused reg2 field
+ // holds extra opcode bits rather than a register code
+ // 0xC0 == 11 000 000 binary.
+ // 0x38 == 00 111 000 binary.
+ // We transfer reg2 to reg1 here as operand.
+ // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
+ Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte.
+ // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
+ // See "TEST-Logical Compare" (4-428 Vol. 2B).
+ Loc[-2] = 0xf7;
+ // Move R bit to the B bit in REX byte.
+ // REX byte is encoded as 0100WRXB, where
+ // 0100 is 4bit fixed pattern.
+ // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
+ // default operand size is used (which is 32-bit for most but not all
+ // instructions).
+ // REX.R This 1-bit value is an extension to the MODRM.reg field.
+ // REX.X This 1-bit value is an extension to the SIB.index field.
+ // REX.B This 1-bit value is an extension to the MODRM.rm field or the
+ // SIB.base field.
+ // See " More on REX Prefix Fields " (2-8 Vol. 2A).
+ Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2;
+ write32le(Loc, Val);
+ return;
+ }
+ // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
+ // or xor operations.
+ // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
+ // Logic is close to one for test instruction above, but we also
+ // write opcode extension here, see below for details.
+ Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte.
+ // Primary opcode is 0x81, opcode extension is one of:
+ // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
+ // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
+ // This value was wrote to MODRM.reg in a line above.
+ // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
+ // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
+ // descriptions about each operation.
+ Loc[-2] = 0x81;
+ Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2;
+ write32le(Loc, Val);
+template <class ELFT>
+void X86_64<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const {
+ const uint8_t Op = Loc[-2];
+ const uint8_t ModRm = Loc[-1];
+ // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
+ if (Op == 0x8b) {
+ Loc[-2] = 0x8d;
+ write32le(Loc, Val);
+ return;
+ }
+ if (Op != 0xff) {
+ // We are relaxing a rip relative to an absolute, so compensate
+ // for the old -4 addend.
+ assert(!Config->Pic);
+ relaxGotNoPic(Loc, Val + 4, Op, ModRm);
+ return;
+ }
+ // Convert call/jmp instructions.
+ if (ModRm == 0x15) {
+ // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
+ // Instead we convert to "addr32 call foo" where addr32 is an instruction
+ // prefix. That makes result expression to be a single instruction.
+ Loc[-2] = 0x67; // addr32 prefix
+ Loc[-1] = 0xe8; // call
+ write32le(Loc, Val);
+ return;
+ }
+ // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
+ // jmp doesn't return, so it is fine to use nop here, it is just a stub.
+ assert(ModRm == 0x25);
+ Loc[-2] = 0xe9; // jmp
+ Loc[3] = 0x90; // nop
+ write32le(Loc - 1, Val + 1);
+// This anonymous namespace works around a warning bug in
+// old versions of gcc. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480
+namespace {
+// A split-stack prologue starts by checking the amount of stack remaining
+// in one of two ways:
+// A) Comparing of the stack pointer to a field in the tcb.
+// B) Or a load of a stack pointer offset with an lea to r10 or r11.
+template <>
+bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
+ uint8_t *End,
+ uint8_t StOther) const {
+ if (Loc + 8 >= End)
+ return false;
+ // Replace "cmp %fs:0x70,%rsp" and subsequent branch
+ // with "stc, nopl 0x0(%rax,%rax,1)"
+ if (memcmp(Loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
+ memcpy(Loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
+ return true;
+ }
+ // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
+ // be r10 or r11. The lea instruction feeds a subsequent compare which checks
+ // if there is X available stack space. Making X larger effectively reserves
+ // that much additional space. The stack grows downward so subtract the value.
+ if (memcmp(Loc, "\x4c\x8d\x94\x24", 4) == 0 ||
+ memcmp(Loc, "\x4c\x8d\x9c\x24", 4) == 0) {
+ // The offset bytes are encoded four bytes after the start of the
+ // instruction.
+ write32le(Loc + 4, read32le(Loc + 4) - 0x4000);
+ return true;
+ }
+ return false;
+template <>
+bool X86_64<ELF32LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
+ uint8_t *End,
+ uint8_t StOther) const {
+ llvm_unreachable("Target doesn't support split stacks.");
+} // namespace
+// These nonstandard PLT entries are to migtigate Spectre v2 security
+// vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
+// branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT
+// entries, we use a CALL followed by MOV and RET to do the same thing as an
+// indirect jump. That instruction sequence is so-called "retpoline".
+// We have two types of retpoline PLTs as a size optimization. If `-z now`
+// is specified, all dynamic symbols are resolved at load-time. Thus, when
+// that option is given, we can omit code for symbol lazy resolution.
+namespace {
+template <class ELFT> class Retpoline : public X86_64<ELFT> {
+ Retpoline();
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+template <class ELFT> class RetpolineZNow : public X86_64<ELFT> {
+ RetpolineZNow();
+ void writeGotPlt(uint8_t *Buf, const Symbol &S) const override {}
+ void writePltHeader(uint8_t *Buf) const override;
+ void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+ int32_t Index, unsigned RelOff) const override;
+} // namespace
+template <class ELFT> Retpoline<ELFT>::Retpoline() {
+ TargetInfo::PltHeaderSize = 48;
+ TargetInfo::PltEntrySize = 32;
+template <class ELFT>
+void Retpoline<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
+ write64le(Buf, S.getPltVA() + 17);
+template <class ELFT> void Retpoline<ELFT>::writePltHeader(uint8_t *Buf) const {
+ const uint8_t Insn[] = {
+ 0xff, 0x35, 0, 0, 0, 0, // 0: pushq GOTPLT+8(%rip)
+ 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 6: mov GOTPLT+16(%rip), %r11
+ 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: callq next
+ 0xf3, 0x90, // 12: loop: pause
+ 0x0f, 0xae, 0xe8, // 14: lfence
+ 0xeb, 0xf9, // 17: jmp loop
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
+ 0x4c, 0x89, 0x1c, 0x24, // 20: next: mov %r11, (%rsp)
+ 0xc3, // 24: ret
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; padding
+ 0xcc, 0xcc, 0xcc, 0xcc, // 2c: int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ uint64_t GotPlt = In.GotPlt->getVA();
+ uint64_t Plt = In.Plt->getVA();
+ write32le(Buf + 2, GotPlt - Plt - 6 + 8);
+ write32le(Buf + 9, GotPlt - Plt - 13 + 16);
+template <class ELFT>
+void Retpoline<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Insn[] = {
+ 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0: mov foo@GOTPLT(%rip), %r11
+ 0xe8, 0, 0, 0, 0, // 7: callq plt+0x20
+ 0xe9, 0, 0, 0, 0, // c: jmp plt+0x12
+ 0x68, 0, 0, 0, 0, // 11: pushq <relocation index>
+ 0xe9, 0, 0, 0, 0, // 16: jmp plt+0
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1b: int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ uint64_t Off = getPltEntryOffset(Index);
+ write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
+ write32le(Buf + 8, -Off - 12 + 32);
+ write32le(Buf + 13, -Off - 17 + 18);
+ write32le(Buf + 18, Index);
+ write32le(Buf + 23, -Off - 27);
+template <class ELFT> RetpolineZNow<ELFT>::RetpolineZNow() {
+ TargetInfo::PltHeaderSize = 32;
+ TargetInfo::PltEntrySize = 16;
+template <class ELFT>
+void RetpolineZNow<ELFT>::writePltHeader(uint8_t *Buf) const {
+ const uint8_t Insn[] = {
+ 0xe8, 0x0b, 0x00, 0x00, 0x00, // 0: call next
+ 0xf3, 0x90, // 5: loop: pause
+ 0x0f, 0xae, 0xe8, // 7: lfence
+ 0xeb, 0xf9, // a: jmp loop
+ 0xcc, 0xcc, 0xcc, 0xcc, // c: int3; .align 16
+ 0x4c, 0x89, 0x1c, 0x24, // 10: next: mov %r11, (%rsp)
+ 0xc3, // 14: ret
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15: int3; padding
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
+ 0xcc, // 1f: int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+template <class ELFT>
+void RetpolineZNow<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {
+ const uint8_t Insn[] = {
+ 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r11
+ 0xe9, 0, 0, 0, 0, // jmp plt+0
+ 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
+ };
+ memcpy(Buf, Insn, sizeof(Insn));
+ write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
+ write32le(Buf + 8, -getPltEntryOffset(Index) - 12);
+template <class ELFT> static TargetInfo *getTargetInfo() {
+ if (Config->ZRetpolineplt) {
+ if (Config->ZNow) {
+ static RetpolineZNow<ELFT> T;
+ return &T;
+ }
+ static Retpoline<ELFT> T;
+ return &T;
+ }
+ static X86_64<ELFT> T;
+ return &T;
+TargetInfo *elf::getX32TargetInfo() { return getTargetInfo<ELF32LE>(); }
+TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo<ELF64LE>(); }
diff --git a/contrib/llvm/tools/lld/ELF/Bits.h b/contrib/llvm/tools/lld/ELF/Bits.h
new file mode 100644
index 000000000000..13d40322265e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Bits.h
@@ -0,0 +1,35 @@
+//===- Bits.h ---------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_ELF_BITS_H
+#define LLD_ELF_BITS_H
+#include "Config.h"
+#include "llvm/Support/Endian.h"
+namespace lld {
+namespace elf {
+inline uint64_t readUint(uint8_t *Buf) {
+ if (Config->Is64)
+ return llvm::support::endian::read64(Buf, Config->Endianness);
+ return llvm::support::endian::read32(Buf, Config->Endianness);
+inline void writeUint(uint8_t *Buf, uint64_t Val) {
+ if (Config->Is64)
+ llvm::support::endian::write64(Buf, Val, Config->Endianness);
+ else
+ llvm::support::endian::write32(Buf, Val, Config->Endianness);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/CMakeLists.txt b/contrib/llvm/tools/lld/ELF/CMakeLists.txt
new file mode 100644
index 000000000000..a1c23b0d49ac
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/CMakeLists.txt
@@ -0,0 +1,68 @@
+tablegen(LLVM Options.inc -gen-opt-parser-defs)
+ set(tablegen_deps intrinsics_gen)
+ AArch64ErrataFix.cpp
+ Arch/AArch64.cpp
+ Arch/AMDGPU.cpp
+ Arch/ARM.cpp
+ Arch/AVR.cpp
+ Arch/Hexagon.cpp
+ Arch/Mips.cpp
+ Arch/MipsArchTree.cpp
+ Arch/MSP430.cpp
+ Arch/PPC.cpp
+ Arch/PPC64.cpp
+ Arch/RISCV.cpp
+ Arch/SPARCV9.cpp
+ Arch/X86.cpp
+ Arch/X86_64.cpp
+ CallGraphSort.cpp
+ DWARF.cpp
+ Driver.cpp
+ DriverUtils.cpp
+ EhFrame.cpp
+ Filesystem.cpp
+ ICF.cpp
+ InputFiles.cpp
+ InputSection.cpp
+ LTO.cpp
+ LinkerScript.cpp
+ MapFile.cpp
+ MarkLive.cpp
+ OutputSections.cpp
+ Relocations.cpp
+ ScriptLexer.cpp
+ ScriptParser.cpp
+ SymbolTable.cpp
+ Symbols.cpp
+ SyntheticSections.cpp
+ Target.cpp
+ Thunks.cpp
+ Writer.cpp
+ BinaryFormat
+ BitWriter
+ Core
+ DebugInfoDWARF
+ MC
+ Object
+ Option
+ Support
+ lldCommon
+ ELFOptionsTableGen
+ ${tablegen_deps}
+ )
diff --git a/contrib/llvm/tools/lld/ELF/CallGraphSort.cpp b/contrib/llvm/tools/lld/ELF/CallGraphSort.cpp
new file mode 100644
index 000000000000..2a7d78664b8e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/CallGraphSort.cpp
@@ -0,0 +1,240 @@
+//===- CallGraphSort.cpp --------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
+/// for Large-Scale Data-Center Applications
+/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
+/// The goal of this algorithm is to improve runtime performance of the final
+/// executable by arranging code sections such that page table and i-cache
+/// misses are minimized.
+/// Definitions:
+/// * Cluster
+/// * An ordered list of input sections which are layed out as a unit. At the
+/// beginning of the algorithm each input section has its own cluster and
+/// the weight of the cluster is the sum of the weight of all incomming
+/// edges.
+/// * Call-Chain Clustering (C³) Heuristic
+/// * Defines when and how clusters are combined. Pick the highest weighted
+/// input section then add it to its most likely predecessor if it wouldn't
+/// penalize it too much.
+/// * Density
+/// * The weight of the cluster divided by the size of the cluster. This is a
+/// proxy for the ammount of execution time spent per byte of the cluster.
+/// It does so given a call graph profile by the following:
+/// * Build a weighted call graph from the call graph profile
+/// * Sort input sections by weight
+/// * For each input section starting with the highest weight
+/// * Find its most likely predecessor cluster
+/// * Check if the combined cluster would be too large, or would have too low
+/// a density.
+/// * If not, then combine the clusters.
+/// * Sort non-empty clusters by density
+#include "CallGraphSort.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+using namespace llvm;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+struct Edge {
+ int From;
+ uint64_t Weight;
+struct Cluster {
+ Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
+ double getDensity() const {
+ if (Size == 0)
+ return 0;
+ return double(Weight) / double(Size);
+ }
+ std::vector<int> Sections;
+ size_t Size = 0;
+ uint64_t Weight = 0;
+ uint64_t InitialWeight = 0;
+ Edge BestPred = {-1, 0};
+class CallGraphSort {
+ CallGraphSort();
+ DenseMap<const InputSectionBase *, int> run();
+ std::vector<Cluster> Clusters;
+ std::vector<const InputSectionBase *> Sections;
+ void groupClusters();
+// Maximum ammount the combined cluster density can be worse than the original
+// cluster to consider merging.
+constexpr int MAX_DENSITY_DEGRADATION = 8;
+// Maximum cluster size in bytes.
+constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
+} // end anonymous namespace
+typedef std::pair<const InputSectionBase *, const InputSectionBase *>
+ SectionPair;
+// Take the edge list in Config->CallGraphProfile, resolve symbol names to
+// Symbols, and generate a graph between InputSections with the provided
+// weights.
+CallGraphSort::CallGraphSort() {
+ MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
+ DenseMap<const InputSectionBase *, int> SecToCluster;
+ auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
+ auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
+ if (Res.second) {
+ Sections.push_back(IS);
+ Clusters.emplace_back(Clusters.size(), IS->getSize());
+ }
+ return Res.first->second;
+ };
+ // Create the graph.
+ for (std::pair<SectionPair, uint64_t> &C : Profile) {
+ const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
+ const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
+ uint64_t Weight = C.second;
+ // Ignore edges between input sections belonging to different output
+ // sections. This is done because otherwise we would end up with clusters
+ // containing input sections that can't actually be placed adjacently in the
+ // output. This messes with the cluster size and density calculations. We
+ // would also end up moving input sections in other output sections without
+ // moving them closer to what calls them.
+ if (FromSB->getOutputSection() != ToSB->getOutputSection())
+ continue;
+ int From = GetOrCreateNode(FromSB);
+ int To = GetOrCreateNode(ToSB);
+ Clusters[To].Weight += Weight;
+ if (From == To)
+ continue;
+ // Remember the best edge.
+ Cluster &ToC = Clusters[To];
+ if (ToC.BestPred.From == -1 || ToC.BestPred.Weight < Weight) {
+ ToC.BestPred.From = From;
+ ToC.BestPred.Weight = Weight;
+ }
+ }
+ for (Cluster &C : Clusters)
+ C.InitialWeight = C.Weight;
+// It's bad to merge clusters which would degrade the density too much.
+static bool isNewDensityBad(Cluster &A, Cluster &B) {
+ double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
+ return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
+static void mergeClusters(Cluster &Into, Cluster &From) {
+ Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
+ From.Sections.end());
+ Into.Size += From.Size;
+ Into.Weight += From.Weight;
+ From.Sections.clear();
+ From.Size = 0;
+ From.Weight = 0;
+// Group InputSections into clusters using the Call-Chain Clustering heuristic
+// then sort the clusters by density.
+void CallGraphSort::groupClusters() {
+ std::vector<int> SortedSecs(Clusters.size());
+ std::vector<Cluster *> SecToCluster(Clusters.size());
+ for (size_t I = 0; I < Clusters.size(); ++I) {
+ SortedSecs[I] = I;
+ SecToCluster[I] = &Clusters[I];
+ }
+ std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
+ return Clusters[B].getDensity() < Clusters[A].getDensity();
+ });
+ for (int SI : SortedSecs) {
+ // Clusters[SI] is the same as SecToClusters[SI] here because it has not
+ // been merged into another cluster yet.
+ Cluster &C = Clusters[SI];
+ // Don't consider merging if the edge is unlikely.
+ if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
+ continue;
+ Cluster *PredC = SecToCluster[C.BestPred.From];
+ if (PredC == &C)
+ continue;
+ if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
+ continue;
+ if (isNewDensityBad(*PredC, C))
+ continue;
+ // NOTE: Consider using a disjoint-set to track section -> cluster mapping
+ // if this is ever slow.
+ for (int SI : C.Sections)
+ SecToCluster[SI] = PredC;
+ mergeClusters(*PredC, C);
+ }
+ // Remove empty or dead nodes. Invalidates all cluster indices.
+ llvm::erase_if(Clusters, [](const Cluster &C) {
+ return C.Size == 0 || C.Sections.empty();
+ });
+ // Sort by density.
+ std::stable_sort(Clusters.begin(), Clusters.end(),
+ [](const Cluster &A, const Cluster &B) {
+ return A.getDensity() > B.getDensity();
+ });
+DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
+ groupClusters();
+ // Generate order.
+ DenseMap<const InputSectionBase *, int> OrderMap;
+ ssize_t CurOrder = 1;
+ for (const Cluster &C : Clusters)
+ for (int SecIndex : C.Sections)
+ OrderMap[Sections[SecIndex]] = CurOrder++;
+ return OrderMap;
+// Sort sections by the profile data provided by -callgraph-profile-file
+// This first builds a call graph based on the profile data then merges sections
+// according to the C³ huristic. All clusters are then sorted by a density
+// metric to further improve locality.
+DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
+ return CallGraphSort().run();
diff --git a/contrib/llvm/tools/lld/ELF/CallGraphSort.h b/contrib/llvm/tools/lld/ELF/CallGraphSort.h
new file mode 100644
index 000000000000..3f96dc88f435
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/CallGraphSort.h
@@ -0,0 +1,23 @@
+//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/DenseMap.h"
+namespace lld {
+namespace elf {
+class InputSectionBase;
+llvm::DenseMap<const InputSectionBase *, int> computeCallGraphProfileOrder();
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h
new file mode 100644
index 000000000000..588d5a6c3b4e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Config.h
@@ -0,0 +1,295 @@
+//===- Config.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/CachePruning.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Endian.h"
+#include <vector>
+namespace lld {
+namespace elf {
+class InputFile;
+class InputSectionBase;
+enum ELFKind {
+ ELFNoneKind,
+ ELF32LEKind,
+ ELF32BEKind,
+ ELF64LEKind,
+ ELF64BEKind
+// For --build-id.
+enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid };
+// For --discard-{all,locals,none}.
+enum class DiscardPolicy { Default, All, Locals, None };
+// For --icf={none,safe,all}.
+enum class ICFLevel { None, Safe, All };
+// For --strip-{all,debug}.
+enum class StripPolicy { None, All, Debug };
+// For --unresolved-symbols.
+enum class UnresolvedPolicy { ReportError, Warn, Ignore };
+// For --orphan-handling.
+enum class OrphanHandlingPolicy { Place, Warn, Error };
+// For --sort-section and linkerscript sorting rules.
+enum class SortSectionPolicy { Default, None, Alignment, Name, Priority };
+// For --target2
+enum class Target2Policy { Abs, Rel, GotRel };
+// For tracking ARM Float Argument PCS
+enum class ARMVFPArgKind { Default, Base, VFP, ToolChain };
+struct SymbolVersion {
+ llvm::StringRef Name;
+ bool IsExternCpp;
+ bool HasWildcard;
+// This struct contains symbols version definition that
+// can be found in version script if it is used for link.
+struct VersionDefinition {
+ llvm::StringRef Name;
+ uint16_t Id = 0;
+ std::vector<SymbolVersion> Globals;
+ size_t NameOff = 0; // Offset in the string table
+// This struct contains the global configuration for the linker.
+// Most fields are direct mapping from the command line options
+// and such fields have the same name as the corresponding options.
+// Most fields are initialized by the driver.
+struct Configuration {
+ uint8_t OSABI = 0;
+ llvm::CachePruningPolicy ThinLTOCachePolicy;
+ llvm::StringMap<uint64_t> SectionStartMap;
+ llvm::StringRef Chroot;
+ llvm::StringRef DynamicLinker;
+ llvm::StringRef DwoDir;
+ llvm::StringRef Entry;
+ llvm::StringRef Emulation;
+ llvm::StringRef Fini;
+ llvm::StringRef Init;
+ llvm::StringRef LTOAAPipeline;
+ llvm::StringRef LTONewPmPasses;
+ llvm::StringRef LTOObjPath;
+ llvm::StringRef LTOSampleProfile;
+ llvm::StringRef MapFile;
+ llvm::StringRef OutputFile;
+ llvm::StringRef OptRemarksFilename;
+ llvm::StringRef ProgName;
+ llvm::StringRef SoName;
+ llvm::StringRef Sysroot;
+ llvm::StringRef ThinLTOCacheDir;
+ llvm::StringRef ThinLTOIndexOnlyArg;
+ std::pair<llvm::StringRef, llvm::StringRef> ThinLTOObjectSuffixReplace;
+ std::pair<llvm::StringRef, llvm::StringRef> ThinLTOPrefixReplace;
+ std::string Rpath;
+ std::vector<VersionDefinition> VersionDefinitions;
+ std::vector<llvm::StringRef> AuxiliaryList;
+ std::vector<llvm::StringRef> FilterList;
+ std::vector<llvm::StringRef> SearchPaths;
+ std::vector<llvm::StringRef> SymbolOrderingFile;
+ std::vector<llvm::StringRef> Undefined;
+ std::vector<SymbolVersion> DynamicList;
+ std::vector<SymbolVersion> VersionScriptGlobals;
+ std::vector<SymbolVersion> VersionScriptLocals;
+ std::vector<uint8_t> BuildIdVector;
+ llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
+ uint64_t>
+ CallGraphProfile;
+ bool AllowMultipleDefinition;
+ bool AndroidPackDynRelocs;
+ bool ARMHasBlx = false;
+ bool ARMHasMovtMovw = false;
+ bool ARMJ1J2BranchEncoding = false;
+ bool AsNeeded = false;
+ bool Bsymbolic;
+ bool BsymbolicFunctions;
+ bool CallGraphProfileSort;
+ bool CheckSections;
+ bool CompressDebugSections;
+ bool Cref;
+ bool DefineCommon;
+ bool Demangle = true;
+ bool DisableVerify;
+ bool EhFrameHdr;
+ bool EmitLLVM;
+ bool EmitRelocs;
+ bool EnableNewDtags;
+ bool ExecuteOnly;
+ bool ExportDynamic;
+ bool FixCortexA53Errata843419;
+ bool FormatBinary = false;
+ bool GcSections;
+ bool GdbIndex;
+ bool GnuHash = false;
+ bool GnuUnique;
+ bool HasDynamicList = false;
+ bool HasDynSymTab;
+ bool IgnoreDataAddressEquality;
+ bool IgnoreFunctionAddressEquality;
+ bool LTODebugPassManager;
+ bool LTONewPassManager;
+ bool MergeArmExidx;
+ bool MipsN32Abi = false;
+ bool NoinhibitExec;
+ bool Nostdlib;
+ bool OFormatBinary;
+ bool Omagic;
+ bool OptRemarksWithHotness;
+ bool PicThunk;
+ bool Pie;
+ bool PrintGcSections;
+ bool PrintIcfSections;
+ bool Relocatable;
+ bool RelrPackDynRelocs;
+ bool SaveTemps;
+ bool SingleRoRx;
+ bool Shared;
+ bool Static = false;
+ bool SysvHash = false;
+ bool Target1Rel;
+ bool Trace;
+ bool ThinLTOEmitImportsFiles;
+ bool ThinLTOIndexOnly;
+ bool TocOptimize;
+ bool UndefinedVersion;
+ bool UseAndroidRelrTags = false;
+ bool WarnBackrefs;
+ bool WarnCommon;
+ bool WarnIfuncTextrel;
+ bool WarnMissingEntry;
+ bool WarnSymbolOrdering;
+ bool WriteAddends;
+ bool ZCombreloc;
+ bool ZCopyreloc;
+ bool ZExecstack;
+ bool ZGlobal;
+ bool ZHazardplt;
+ bool ZIfuncnoplt;
+ bool ZInitfirst;
+ bool ZInterpose;
+ bool ZKeepTextSectionPrefix;
+ bool ZNodefaultlib;
+ bool ZNodelete;
+ bool ZNodlopen;
+ bool ZNow;
+ bool ZOrigin;
+ bool ZRelro;
+ bool ZRodynamic;
+ bool ZText;
+ bool ZRetpolineplt;
+ bool ZWxneeded;
+ DiscardPolicy Discard;
+ ICFLevel ICF;
+ OrphanHandlingPolicy OrphanHandling;
+ SortSectionPolicy SortSection;
+ StripPolicy Strip;
+ UnresolvedPolicy UnresolvedSymbols;
+ Target2Policy Target2;
+ ARMVFPArgKind ARMVFPArgs = ARMVFPArgKind::Default;
+ BuildIdKind BuildId = BuildIdKind::None;
+ ELFKind EKind = ELFNoneKind;
+ uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL;
+ uint16_t EMachine = llvm::ELF::EM_NONE;
+ llvm::Optional<uint64_t> ImageBase;
+ uint64_t MaxPageSize;
+ uint64_t MipsGotSize;
+ uint64_t ZStackSize;
+ unsigned LTOPartitions;
+ unsigned LTOO;
+ unsigned Optimize;
+ unsigned ThinLTOJobs;
+ int32_t SplitStackAdjustSize;
+ // The following config options do not directly correspond to any
+ // particualr command line options.
+ // True if we need to pass through relocations in input files to the
+ // output file. Usually false because we consume relocations.
+ bool CopyRelocs;
+ // True if the target is ELF64. False if ELF32.
+ bool Is64;
+ // True if the target is little-endian. False if big-endian.
+ bool IsLE;
+ // endianness::little if IsLE is true. endianness::big otherwise.
+ llvm::support::endianness Endianness;
+ // True if the target is the little-endian MIPS64.
+ //
+ // The reason why we have this variable only for the MIPS is because
+ // we use this often. Some ELF headers for MIPS64EL are in a
+ // mixed-endian (which is horrible and I'd say that's a serious spec
+ // bug), and we need to know whether we are reading MIPS ELF files or
+ // not in various places.
+ //
+ // (Note that MIPS64EL is not a typo for MIPS64LE. This is the official
+ // name whatever that means. A fun hypothesis is that "EL" is short for
+ // little-endian written in the little-endian order, but I don't know
+ // if that's true.)
+ bool IsMips64EL;
+ // Holds set of ELF header flags for the target.
+ uint32_t EFlags = 0;
+ // The ELF spec defines two types of relocation table entries, RELA and
+ // REL. RELA is a triplet of (offset, info, addend) while REL is a
+ // tuple of (offset, info). Addends for REL are implicit and read from
+ // the location where the relocations are applied. So, REL is more
+ // compact than RELA but requires a bit of more work to process.
+ //
+ // (From the linker writer's view, this distinction is not necessary.
+ // If the ELF had chosen whichever and sticked with it, it would have
+ // been easier to write code to process relocations, but it's too late
+ // to change the spec.)
+ //
+ // Each ABI defines its relocation type. IsRela is true if target
+ // uses RELA. As far as we know, all 64-bit ABIs are using RELA. A
+ // few 32-bit ABIs are using RELA too.
+ bool IsRela;
+ // True if we are creating position-independent code.
+ bool Pic;
+ // 4 for ELF32, 8 for ELF64.
+ int Wordsize;
+// The only instance of Configuration struct.
+extern Configuration *Config;
+static inline void errorOrWarn(const Twine &Msg) {
+ if (!Config->NoinhibitExec)
+ error(Msg);
+ else
+ warn(Msg);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/DWARF.cpp b/contrib/llvm/tools/lld/ELF/DWARF.cpp
new file mode 100644
index 000000000000..17e1a4d600eb
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/DWARF.cpp
@@ -0,0 +1,109 @@
+//===- DWARF.cpp ----------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// The -gdb-index option instructs the linker to emit a .gdb_index section.
+// The section contains information to make gdb startup faster.
+// The format of the section is described at
+// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html.
+#include "DWARF.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/Memory.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Object/ELFObjectFile.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace lld;
+using namespace lld::elf;
+template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *Obj) {
+ for (InputSectionBase *Sec : Obj->getSections()) {
+ if (!Sec)
+ continue;
+ if (LLDDWARFSection *M =
+ StringSwitch<LLDDWARFSection *>(Sec->Name)
+ .Case(".debug_addr", &AddrSection)
+ .Case(".debug_gnu_pubnames", &GnuPubNamesSection)
+ .Case(".debug_gnu_pubtypes", &GnuPubTypesSection)
+ .Case(".debug_info", &InfoSection)
+ .Case(".debug_ranges", &RangeSection)
+ .Case(".debug_rnglists", &RngListsSection)
+ .Case(".debug_line", &LineSection)
+ .Default(nullptr)) {
+ M->Data = toStringRef(Sec->data());
+ M->Sec = Sec;
+ continue;
+ }
+ if (Sec->Name == ".debug_abbrev")
+ AbbrevSection = toStringRef(Sec->data());
+ else if (Sec->Name == ".debug_str")
+ StrSection = toStringRef(Sec->data());
+ else if (Sec->Name == ".debug_line_str")
+ LineStringSection = toStringRef(Sec->data());
+ }
+// Find if there is a relocation at Pos in Sec. The code is a bit
+// more complicated than usual because we need to pass a section index
+// to llvm since it has no idea about InputSection.
+template <class ELFT>
+template <class RelTy>
+LLDDwarfObj<ELFT>::findAux(const InputSectionBase &Sec, uint64_t Pos,
+ ArrayRef<RelTy> Rels) const {
+ auto It = std::lower_bound(
+ Rels.begin(), Rels.end(), Pos,
+ [](const RelTy &A, uint64_t B) { return A.r_offset < B; });
+ if (It == Rels.end() || It->r_offset != Pos)
+ return None;
+ const RelTy &Rel = *It;
+ const ObjFile<ELFT> *File = Sec.getFile<ELFT>();
+ uint32_t SymIndex = Rel.getSymbol(Config->IsMips64EL);
+ const typename ELFT::Sym &Sym = File->getELFSyms()[SymIndex];
+ uint32_t SecIndex = File->getSectionIndex(Sym);
+ // Broken debug info can point to a non-Defined symbol.
+ auto *DR = dyn_cast<Defined>(&File->getRelocTargetSym(Rel));
+ if (!DR) {
+ RelType Type = Rel.getType(Config->IsMips64EL);
+ if (Type != Target->NoneRel)
+ error(toString(File) + ": relocation " + lld::toString(Type) + " at 0x" +
+ llvm::utohexstr(Rel.r_offset) + " has unsupported target");
+ return None;
+ }
+ uint64_t Val = DR->Value + getAddend<ELFT>(Rel);
+ // FIXME: We should be consistent about always adding the file
+ // offset or not.
+ if (DR->Section->Flags & ELF::SHF_ALLOC)
+ Val += cast<InputSection>(DR->Section)->getOffsetInFile();
+ return RelocAddrEntry{SecIndex, Val};
+template <class ELFT>
+Optional<RelocAddrEntry> LLDDwarfObj<ELFT>::find(const llvm::DWARFSection &S,
+ uint64_t Pos) const {
+ auto &Sec = static_cast<const LLDDWARFSection &>(S);
+ if (Sec.Sec->AreRelocsRela)
+ return findAux(*Sec.Sec, Pos, Sec.Sec->template relas<ELFT>());
+ return findAux(*Sec.Sec, Pos, Sec.Sec->template rels<ELFT>());
+template class elf::LLDDwarfObj<ELF32LE>;
+template class elf::LLDDwarfObj<ELF32BE>;
+template class elf::LLDDwarfObj<ELF64LE>;
+template class elf::LLDDwarfObj<ELF64BE>;
diff --git a/contrib/llvm/tools/lld/ELF/DWARF.h b/contrib/llvm/tools/lld/ELF/DWARF.h
new file mode 100644
index 000000000000..8ecf02c77fb4
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/DWARF.h
@@ -0,0 +1,93 @@
+//===- DWARF.h -----------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_ELF_DWARF_H
+#define LLD_ELF_DWARF_H
+#include "InputFiles.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Object/ELF.h"
+namespace lld {
+namespace elf {
+class InputSection;
+struct LLDDWARFSection final : public llvm::DWARFSection {
+ InputSectionBase *Sec = nullptr;
+template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
+ explicit LLDDwarfObj(ObjFile<ELFT> *Obj);
+ void forEachInfoSections(
+ llvm::function_ref<void(const llvm::DWARFSection &)> F) const override {
+ F(InfoSection);
+ }
+ const llvm::DWARFSection &getRangeSection() const override {
+ return RangeSection;
+ }
+ const llvm::DWARFSection &getRnglistsSection() const override {
+ return RngListsSection;
+ }
+ const llvm::DWARFSection &getLineSection() const override {
+ return LineSection;
+ }
+ const llvm::DWARFSection &getAddrSection() const override {
+ return AddrSection;
+ }
+ const llvm::DWARFSection &getGnuPubNamesSection() const override {
+ return GnuPubNamesSection;
+ }
+ const llvm::DWARFSection &getGnuPubTypesSection() const override {
+ return GnuPubTypesSection;
+ }
+ StringRef getFileName() const override { return ""; }
+ StringRef getAbbrevSection() const override { return AbbrevSection; }
+ StringRef getStringSection() const override { return StrSection; }
+ StringRef getLineStringSection() const override { return LineStringSection; }
+ bool isLittleEndian() const override {
+ return ELFT::TargetEndianness == llvm::support::little;
+ }
+ llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &Sec,
+ uint64_t Pos) const override;
+ template <class RelTy>
+ llvm::Optional<llvm::RelocAddrEntry> findAux(const InputSectionBase &Sec,
+ uint64_t Pos,
+ ArrayRef<RelTy> Rels) const;
+ LLDDWARFSection GnuPubNamesSection;
+ LLDDWARFSection GnuPubTypesSection;
+ LLDDWARFSection InfoSection;
+ LLDDWARFSection RangeSection;
+ LLDDWARFSection RngListsSection;
+ LLDDWARFSection LineSection;
+ LLDDWARFSection AddrSection;
+ StringRef AbbrevSection;
+ StringRef StrSection;
+ StringRef LineStringSection;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp
new file mode 100644
index 000000000000..407f1734f143
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Driver.cpp
@@ -0,0 +1,1651 @@
+//===- Driver.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// The driver drives the entire linking process. It is responsible for
+// parsing command line options and doing whatever it is instructed to do.
+// One notable thing in the LLD's driver when compared to other linkers is
+// that the LLD's driver is agnostic on the host operating system.
+// Other linkers usually have implicit default values (such as a dynamic
+// linker path or library paths) for each host OS.
+// I don't think implicit default values are useful because they are
+// usually explicitly specified by the compiler driver. They can even
+// be harmful when you are doing cross-linking. Therefore, in LLD, we
+// simply trust the compiler driver to pass all required options and
+// don't try to make effort on our side.
+#include "Driver.h"
+#include "Config.h"
+#include "Filesystem.h"
+#include "ICF.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "LinkerScript.h"
+#include "MarkLive.h"
+#include "OutputSections.h"
+#include "ScriptParser.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Writer.h"
+#include "lld/Common/Args.h"
+#include "lld/Common/Driver.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/TargetOptionsCommandFlags.h"
+#include "lld/Common/Threads.h"
+#include "lld/Common/Version.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/TarWriter.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+#include <utility>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::sys;
+using namespace llvm::support;
+using namespace lld;
+using namespace lld::elf;
+Configuration *elf::Config;
+LinkerDriver *elf::Driver;
+static void setConfigs(opt::InputArgList &Args);
+bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
+ raw_ostream &Error) {
+ errorHandler().LogName = args::getFilenameWithoutExe(Args[0]);
+ errorHandler().ErrorLimitExceededMsg =
+ "too many errors emitted, stopping now (use "
+ "-error-limit=0 to see all errors)";
+ errorHandler().ErrorOS = &Error;
+ errorHandler().ExitEarly = CanExitEarly;
+ errorHandler().ColorDiagnostics = Error.has_colors();
+ InputSections.clear();
+ OutputSections.clear();
+ BinaryFiles.clear();
+ BitcodeFiles.clear();
+ ObjectFiles.clear();
+ SharedFiles.clear();
+ Config = make<Configuration>();
+ Driver = make<LinkerDriver>();
+ Script = make<LinkerScript>();
+ Symtab = make<SymbolTable>();
+ Tar = nullptr;
+ memset(&In, 0, sizeof(In));
+ Config->ProgName = Args[0];
+ Driver->main(Args);
+ // Exit immediately if we don't need to return to the caller.
+ // This saves time because the overhead of calling destructors
+ // for all globally-allocated objects is not negligible.
+ if (CanExitEarly)
+ exitLld(errorCount() ? 1 : 0);
+ freeArena();
+ return !errorCount();
+// Parses a linker -m option.
+static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) {
+ uint8_t OSABI = 0;
+ StringRef S = Emul;
+ if (S.endswith("_fbsd")) {
+ S = S.drop_back(5);
+ }
+ std::pair<ELFKind, uint16_t> Ret =
+ StringSwitch<std::pair<ELFKind, uint16_t>>(S)
+ .Cases("aarch64elf", "aarch64linux", "aarch64_elf64_le_vec",
+ {ELF64LEKind, EM_AARCH64})
+ .Cases("armelf", "armelf_linux_eabi", {ELF32LEKind, EM_ARM})
+ .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
+ .Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS})
+ .Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS})
+ .Case("elf32lriscv", {ELF32LEKind, EM_RISCV})
+ .Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC})
+ .Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
+ .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
+ .Case("elf64lriscv", {ELF64LEKind, EM_RISCV})
+ .Case("elf64ppc", {ELF64BEKind, EM_PPC64})
+ .Case("elf64lppc", {ELF64LEKind, EM_PPC64})
+ .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
+ .Case("elf_i386", {ELF32LEKind, EM_386})
+ .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU})
+ .Default({ELFNoneKind, EM_NONE});
+ if (Ret.first == ELFNoneKind)
+ error("unknown emulation: " + Emul);
+ return std::make_tuple(Ret.first, Ret.second, OSABI);
+// Returns slices of MB by parsing MB as an archive file.
+// Each slice consists of a member file in the archive.
+std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers(
+ MemoryBufferRef MB) {
+ std::unique_ptr<Archive> File =
+ CHECK(Archive::create(MB),
+ MB.getBufferIdentifier() + ": failed to parse archive");
+ std::vector<std::pair<MemoryBufferRef, uint64_t>> V;
+ Error Err = Error::success();
+ bool AddToTar = File->isThin() && Tar;
+ for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
+ Archive::Child C =
+ CHECK(COrErr, MB.getBufferIdentifier() +
+ ": could not get the child of the archive");
+ MemoryBufferRef MBRef =
+ CHECK(C.getMemoryBufferRef(),
+ MB.getBufferIdentifier() +
+ ": could not get the buffer for a child of the archive");
+ if (AddToTar)
+ Tar->append(relativeToRoot(check(C.getFullName())), MBRef.getBuffer());
+ V.push_back(std::make_pair(MBRef, C.getChildOffset()));
+ }
+ if (Err)
+ fatal(MB.getBufferIdentifier() + ": Archive::children failed: " +
+ toString(std::move(Err)));
+ // Take ownership of memory buffers created for members of thin archives.
+ for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers())
+ make<std::unique_ptr<MemoryBuffer>>(std::move(MB));
+ return V;
+// Opens a file and create a file object. Path has to be resolved already.
+void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
+ using namespace sys::fs;
+ Optional<MemoryBufferRef> Buffer = readFile(Path);
+ if (!Buffer.hasValue())
+ return;
+ MemoryBufferRef MBRef = *Buffer;
+ if (Config->FormatBinary) {
+ Files.push_back(make<BinaryFile>(MBRef));
+ return;
+ }
+ switch (identify_magic(MBRef.getBuffer())) {
+ case file_magic::unknown:
+ readLinkerScript(MBRef);
+ return;
+ case file_magic::archive: {
+ // Handle -whole-archive.
+ if (InWholeArchive) {
+ for (const auto &P : getArchiveMembers(MBRef))
+ Files.push_back(createObjectFile(P.first, Path, P.second));
+ return;
+ }
+ std::unique_ptr<Archive> File =
+ CHECK(Archive::create(MBRef), Path + ": failed to parse archive");
+ // If an archive file has no symbol table, it is likely that a user
+ // is attempting LTO and using a default ar command that doesn't
+ // understand the LLVM bitcode file. It is a pretty common error, so
+ // we'll handle it as if it had a symbol table.
+ if (!File->isEmpty() && !File->hasSymbolTable()) {
+ for (const auto &P : getArchiveMembers(MBRef))
+ Files.push_back(make<LazyObjFile>(P.first, Path, P.second));
+ return;
+ }
+ // Handle the regular case.
+ Files.push_back(make<ArchiveFile>(std::move(File)));
+ return;
+ }
+ case file_magic::elf_shared_object:
+ if (Config->Static || Config->Relocatable) {
+ error("attempted static link of dynamic object " + Path);
+ return;
+ }
+ // DSOs usually have DT_SONAME tags in their ELF headers, and the
+ // sonames are used to identify DSOs. But if they are missing,
+ // they are identified by filenames. We don't know whether the new
+ // file has a DT_SONAME or not because we haven't parsed it yet.
+ // Here, we set the default soname for the file because we might
+ // need it later.
+ //
+ // If a file was specified by -lfoo, the directory part is not
+ // significant, as a user did not specify it. This behavior is
+ // compatible with GNU.
+ Files.push_back(
+ createSharedFile(MBRef, WithLOption ? path::filename(Path) : Path));
+ return;
+ case file_magic::bitcode:
+ case file_magic::elf_relocatable:
+ if (InLib)
+ Files.push_back(make<LazyObjFile>(MBRef, "", 0));
+ else
+ Files.push_back(createObjectFile(MBRef));
+ break;
+ default:
+ error(Path + ": unknown file type");
+ }
+// Add a given library by searching it from input search paths.
+void LinkerDriver::addLibrary(StringRef Name) {
+ if (Optional<std::string> Path = searchLibrary(Name))
+ addFile(*Path, /*WithLOption=*/true);
+ else
+ error("unable to find library -l" + Name);
+// This function is called on startup. We need this for LTO since
+// LTO calls LLVM functions to compile bitcode files to native code.
+// Technically this can be delayed until we read bitcode files, but
+// we don't bother to do lazily because the initialization is fast.
+static void initLLVM() {
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+// Some command line options or some combinations of them are not allowed.
+// This function checks for such errors.
+static void checkOptions() {
+ // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
+ // table which is a relatively new feature.
+ if (Config->EMachine == EM_MIPS && Config->GnuHash)
+ error("the .gnu.hash section is not compatible with the MIPS target");
+ if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64)
+ error("--fix-cortex-a53-843419 is only supported on AArch64 targets");
+ if (Config->TocOptimize && Config->EMachine != EM_PPC64)
+ error("--toc-optimize is only supported on the PowerPC64 target");
+ if (Config->Pie && Config->Shared)
+ error("-shared and -pie may not be used together");
+ if (!Config->Shared && !Config->FilterList.empty())
+ error("-F may not be used without -shared");
+ if (!Config->Shared && !Config->AuxiliaryList.empty())
+ error("-f may not be used without -shared");
+ if (!Config->Relocatable && !Config->DefineCommon)
+ error("-no-define-common not supported in non relocatable output");
+ if (Config->Relocatable) {
+ if (Config->Shared)
+ error("-r and -shared may not be used together");
+ if (Config->GcSections)
+ error("-r and --gc-sections may not be used together");
+ if (Config->GdbIndex)
+ error("-r and --gdb-index may not be used together");
+ if (Config->ICF != ICFLevel::None)
+ error("-r and --icf may not be used together");
+ if (Config->Pie)
+ error("-r and -pie may not be used together");
+ }
+ if (Config->ExecuteOnly) {
+ if (Config->EMachine != EM_AARCH64)
+ error("-execute-only is only supported on AArch64 targets");
+ if (Config->SingleRoRx && !Script->HasSectionsCommand)
+ error("-execute-only and -no-rosegment cannot be used together");
+ }
+static const char *getReproduceOption(opt::InputArgList &Args) {
+ if (auto *Arg = Args.getLastArg(OPT_reproduce))
+ return Arg->getValue();
+ return getenv("LLD_REPRODUCE");
+static bool hasZOption(opt::InputArgList &Args, StringRef Key) {
+ for (auto *Arg : Args.filtered(OPT_z))
+ if (Key == Arg->getValue())
+ return true;
+ return false;
+static bool getZFlag(opt::InputArgList &Args, StringRef K1, StringRef K2,
+ bool Default) {
+ for (auto *Arg : Args.filtered_reverse(OPT_z)) {
+ if (K1 == Arg->getValue())
+ return true;
+ if (K2 == Arg->getValue())
+ return false;
+ }
+ return Default;
+static bool isKnownZFlag(StringRef S) {
+ return S == "combreloc" || S == "copyreloc" || S == "defs" ||
+ S == "execstack" || S == "global" || S == "hazardplt" ||
+ S == "ifunc-noplt" ||
+ S == "initfirst" || S == "interpose" ||
+ S == "keep-text-section-prefix" || S == "lazy" || S == "muldefs" ||
+ S == "nocombreloc" || S == "nocopyreloc" || S == "nodefaultlib" ||
+ S == "nodelete" || S == "nodlopen" || S == "noexecstack" ||
+ S == "nokeep-text-section-prefix" || S == "norelro" || S == "notext" ||
+ S == "now" || S == "origin" || S == "relro" || S == "retpolineplt" ||
+ S == "rodynamic" || S == "text" || S == "wxneeded" ||
+ S.startswith("max-page-size=") || S.startswith("stack-size=");
+// Report an error for an unknown -z option.
+static void checkZOptions(opt::InputArgList &Args) {
+ for (auto *Arg : Args.filtered(OPT_z))
+ if (!isKnownZFlag(Arg->getValue()))
+ error("unknown -z value: " + StringRef(Arg->getValue()));
+void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
+ ELFOptTable Parser;
+ opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
+ // Interpret this flag early because error() depends on them.
+ errorHandler().ErrorLimit = args::getInteger(Args, OPT_error_limit, 20);
+ // Handle -help
+ if (Args.hasArg(OPT_help)) {
+ printHelp();
+ return;
+ }
+ // Handle -v or -version.
+ //
+ // A note about "compatible with GNU linkers" message: this is a hack for
+ // scripts generated by GNU Libtool 2.4.6 (released in February 2014 and
+ // still the newest version in March 2017) or earlier to recognize LLD as
+ // a GNU compatible linker. As long as an output for the -v option
+ // contains "GNU" or "with BFD", they recognize us as GNU-compatible.
+ //
+ // This is somewhat ugly hack, but in reality, we had no choice other
+ // than doing this. Considering the very long release cycle of Libtool,
+ // it is not easy to improve it to recognize LLD as a GNU compatible
+ // linker in a timely manner. Even if we can make it, there are still a
+ // lot of "configure" scripts out there that are generated by old version
+ // of Libtool. We cannot convince every software developer to migrate to
+ // the latest version and re-generate scripts. So we have this hack.
+ if (Args.hasArg(OPT_v) || Args.hasArg(OPT_version))
+ message(getLLDVersion() + " (compatible with GNU linkers)");
+ if (const char *Path = getReproduceOption(Args)) {
+ // Note that --reproduce is a debug option so you can ignore it
+ // if you are trying to understand the whole picture of the code.
+ Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
+ TarWriter::create(Path, path::stem(Path));
+ if (ErrOrWriter) {
+ Tar = std::move(*ErrOrWriter);
+ Tar->append("response.txt", createResponseFile(Args));
+ Tar->append("version.txt", getLLDVersion() + "\n");
+ } else {
+ error("--reproduce: " + toString(ErrOrWriter.takeError()));
+ }
+ }
+ readConfigs(Args);
+ checkZOptions(Args);
+ // The behavior of -v or --version is a bit strange, but this is
+ // needed for compatibility with GNU linkers.
+ if (Args.hasArg(OPT_v) && !Args.hasArg(OPT_INPUT))
+ return;
+ if (Args.hasArg(OPT_version))
+ return;
+ initLLVM();
+ createFiles(Args);
+ if (errorCount())
+ return;
+ inferMachineType();
+ setConfigs(Args);
+ checkOptions();
+ if (errorCount())
+ return;
+ switch (Config->EKind) {
+ case ELF32LEKind:
+ link<ELF32LE>(Args);
+ return;
+ case ELF32BEKind:
+ link<ELF32BE>(Args);
+ return;
+ case ELF64LEKind:
+ link<ELF64LE>(Args);
+ return;
+ case ELF64BEKind:
+ link<ELF64BE>(Args);
+ return;
+ default:
+ llvm_unreachable("unknown Config->EKind");
+ }
+static std::string getRpath(opt::InputArgList &Args) {
+ std::vector<StringRef> V = args::getStrings(Args, OPT_rpath);
+ return llvm::join(V.begin(), V.end(), ":");
+// Determines what we should do if there are remaining unresolved
+// symbols after the name resolution.
+static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) {
+ UnresolvedPolicy ErrorOrWarn = Args.hasFlag(OPT_error_unresolved_symbols,
+ OPT_warn_unresolved_symbols, true)
+ ? UnresolvedPolicy::ReportError
+ : UnresolvedPolicy::Warn;
+ // Process the last of -unresolved-symbols, -no-undefined or -z defs.
+ for (auto *Arg : llvm::reverse(Args)) {
+ switch (Arg->getOption().getID()) {
+ case OPT_unresolved_symbols: {
+ StringRef S = Arg->getValue();
+ if (S == "ignore-all" || S == "ignore-in-object-files")
+ return UnresolvedPolicy::Ignore;
+ if (S == "ignore-in-shared-libs" || S == "report-all")
+ return ErrorOrWarn;
+ error("unknown --unresolved-symbols value: " + S);
+ continue;
+ }
+ case OPT_no_undefined:
+ return ErrorOrWarn;
+ case OPT_z:
+ if (StringRef(Arg->getValue()) == "defs")
+ return ErrorOrWarn;
+ continue;
+ }
+ }
+ // -shared implies -unresolved-symbols=ignore-all because missing
+ // symbols are likely to be resolved at runtime using other DSOs.
+ if (Config->Shared)
+ return UnresolvedPolicy::Ignore;
+ return ErrorOrWarn;
+static Target2Policy getTarget2(opt::InputArgList &Args) {
+ StringRef S = Args.getLastArgValue(OPT_target2, "got-rel");
+ if (S == "rel")
+ return Target2Policy::Rel;
+ if (S == "abs")
+ return Target2Policy::Abs;
+ if (S == "got-rel")
+ return Target2Policy::GotRel;
+ error("unknown --target2 option: " + S);
+ return Target2Policy::GotRel;
+static bool isOutputFormatBinary(opt::InputArgList &Args) {
+ StringRef S = Args.getLastArgValue(OPT_oformat, "elf");
+ if (S == "binary")
+ return true;
+ if (!S.startswith("elf"))
+ error("unknown --oformat value: " + S);
+ return false;
+static DiscardPolicy getDiscard(opt::InputArgList &Args) {
+ if (Args.hasArg(OPT_relocatable))
+ return DiscardPolicy::None;
+ auto *Arg =
+ Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none);
+ if (!Arg)
+ return DiscardPolicy::Default;
+ if (Arg->getOption().getID() == OPT_discard_all)
+ return DiscardPolicy::All;
+ if (Arg->getOption().getID() == OPT_discard_locals)
+ return DiscardPolicy::Locals;
+ return DiscardPolicy::None;
+static StringRef getDynamicLinker(opt::InputArgList &Args) {
+ auto *Arg = Args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker);
+ if (!Arg || Arg->getOption().getID() == OPT_no_dynamic_linker)
+ return "";
+ return Arg->getValue();
+static ICFLevel getICF(opt::InputArgList &Args) {
+ auto *Arg = Args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_all);
+ if (!Arg || Arg->getOption().getID() == OPT_icf_none)
+ return ICFLevel::None;
+ if (Arg->getOption().getID() == OPT_icf_safe)
+ return ICFLevel::Safe;
+ return ICFLevel::All;
+static StripPolicy getStrip(opt::InputArgList &Args) {
+ if (Args.hasArg(OPT_relocatable))
+ return StripPolicy::None;
+ auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug);
+ if (!Arg)
+ return StripPolicy::None;
+ if (Arg->getOption().getID() == OPT_strip_all)
+ return StripPolicy::All;
+ return StripPolicy::Debug;
+static uint64_t parseSectionAddress(StringRef S, const opt::Arg &Arg) {
+ uint64_t VA = 0;
+ if (S.startswith("0x"))
+ S = S.drop_front(2);
+ if (!to_integer(S, VA, 16))
+ error("invalid argument: " + toString(Arg));
+ return VA;
+static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) {
+ StringMap<uint64_t> Ret;
+ for (auto *Arg : Args.filtered(OPT_section_start)) {
+ StringRef Name;
+ StringRef Addr;
+ std::tie(Name, Addr) = StringRef(Arg->getValue()).split('=');
+ Ret[Name] = parseSectionAddress(Addr, *Arg);
+ }
+ if (auto *Arg = Args.getLastArg(OPT_Ttext))
+ Ret[".text"] = parseSectionAddress(Arg->getValue(), *Arg);
+ if (auto *Arg = Args.getLastArg(OPT_Tdata))
+ Ret[".data"] = parseSectionAddress(Arg->getValue(), *Arg);
+ if (auto *Arg = Args.getLastArg(OPT_Tbss))
+ Ret[".bss"] = parseSectionAddress(Arg->getValue(), *Arg);
+ return Ret;
+static SortSectionPolicy getSortSection(opt::InputArgList &Args) {
+ StringRef S = Args.getLastArgValue(OPT_sort_section);
+ if (S == "alignment")
+ return SortSectionPolicy::Alignment;
+ if (S == "name")
+ return SortSectionPolicy::Name;
+ if (!S.empty())
+ error("unknown --sort-section rule: " + S);
+ return SortSectionPolicy::Default;
+static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &Args) {
+ StringRef S = Args.getLastArgValue(OPT_orphan_handling, "place");
+ if (S == "warn")
+ return OrphanHandlingPolicy::Warn;
+ if (S == "error")
+ return OrphanHandlingPolicy::Error;
+ if (S != "place")
+ error("unknown --orphan-handling mode: " + S);
+ return OrphanHandlingPolicy::Place;
+// Parse --build-id or --build-id=<style>. We handle "tree" as a
+// synonym for "sha1" because all our hash functions including
+// -build-id=sha1 are actually tree hashes for performance reasons.
+static std::pair<BuildIdKind, std::vector<uint8_t>>
+getBuildId(opt::InputArgList &Args) {
+ auto *Arg = Args.getLastArg(OPT_build_id, OPT_build_id_eq);
+ if (!Arg)
+ return {BuildIdKind::None, {}};
+ if (Arg->getOption().getID() == OPT_build_id)
+ return {BuildIdKind::Fast, {}};
+ StringRef S = Arg->getValue();
+ if (S == "fast")
+ return {BuildIdKind::Fast, {}};
+ if (S == "md5")
+ return {BuildIdKind::Md5, {}};
+ if (S == "sha1" || S == "tree")
+ return {BuildIdKind::Sha1, {}};
+ if (S == "uuid")
+ return {BuildIdKind::Uuid, {}};
+ if (S.startswith("0x"))
+ return {BuildIdKind::Hexstring, parseHex(S.substr(2))};
+ if (S != "none")
+ error("unknown --build-id style: " + S);
+ return {BuildIdKind::None, {}};
+static std::pair<bool, bool> getPackDynRelocs(opt::InputArgList &Args) {
+ StringRef S = Args.getLastArgValue(OPT_pack_dyn_relocs, "none");
+ if (S == "android")
+ return {true, false};
+ if (S == "relr")
+ return {false, true};
+ if (S == "android+relr")
+ return {true, true};
+ if (S != "none")
+ error("unknown -pack-dyn-relocs format: " + S);
+ return {false, false};
+static void readCallGraph(MemoryBufferRef MB) {
+ // Build a map from symbol name to section
+ DenseMap<StringRef, Symbol *> Map;
+ for (InputFile *File : ObjectFiles)
+ for (Symbol *Sym : File->getSymbols())
+ Map[Sym->getName()] = Sym;
+ auto FindSection = [&](StringRef Name) -> InputSectionBase * {
+ Symbol *Sym = Map.lookup(Name);
+ if (!Sym) {
+ if (Config->WarnSymbolOrdering)
+ warn(MB.getBufferIdentifier() + ": no such symbol: " + Name);
+ return nullptr;
+ }
+ maybeWarnUnorderableSymbol(Sym);
+ if (Defined *DR = dyn_cast_or_null<Defined>(Sym))
+ return dyn_cast_or_null<InputSectionBase>(DR->Section);
+ return nullptr;
+ };
+ for (StringRef Line : args::getLines(MB)) {
+ SmallVector<StringRef, 3> Fields;
+ Line.split(Fields, ' ');
+ uint64_t Count;
+ if (Fields.size() != 3 || !to_integer(Fields[2], Count)) {
+ error(MB.getBufferIdentifier() + ": parse error");
+ return;
+ }
+ if (InputSectionBase *From = FindSection(Fields[0]))
+ if (InputSectionBase *To = FindSection(Fields[1]))
+ Config->CallGraphProfile[std::make_pair(From, To)] += Count;
+ }
+template <class ELFT> static void readCallGraphsFromObjectFiles() {
+ for (auto File : ObjectFiles) {
+ auto *Obj = cast<ObjFile<ELFT>>(File);
+ for (const Elf_CGProfile_Impl<ELFT> &CGPE : Obj->CGProfile) {
+ auto *FromSym = dyn_cast<Defined>(&Obj->getSymbol(CGPE.cgp_from));
+ auto *ToSym = dyn_cast<Defined>(&Obj->getSymbol(CGPE.cgp_to));
+ if (!FromSym || !ToSym)
+ continue;
+ auto *From = dyn_cast_or_null<InputSectionBase>(FromSym->Section);
+ auto *To = dyn_cast_or_null<InputSectionBase>(ToSym->Section);
+ if (From && To)
+ Config->CallGraphProfile[{From, To}] += CGPE.cgp_weight;
+ }
+ }
+static bool getCompressDebugSections(opt::InputArgList &Args) {
+ StringRef S = Args.getLastArgValue(OPT_compress_debug_sections, "none");
+ if (S == "none")
+ return false;
+ if (S != "zlib")
+ error("unknown --compress-debug-sections value: " + S);
+ if (!zlib::isAvailable())
+ error("--compress-debug-sections: zlib is not available");
+ return true;
+static std::pair<StringRef, StringRef> getOldNewOptions(opt::InputArgList &Args,
+ unsigned Id) {
+ auto *Arg = Args.getLastArg(Id);
+ if (!Arg)
+ return {"", ""};
+ StringRef S = Arg->getValue();
+ std::pair<StringRef, StringRef> Ret = S.split(';');
+ if (Ret.second.empty())
+ error(Arg->getSpelling() + " expects 'old;new' format, but got " + S);
+ return Ret;
+// Parse the symbol ordering file and warn for any duplicate entries.
+static std::vector<StringRef> getSymbolOrderingFile(MemoryBufferRef MB) {
+ SetVector<StringRef> Names;
+ for (StringRef S : args::getLines(MB))
+ if (!Names.insert(S) && Config->WarnSymbolOrdering)
+ warn(MB.getBufferIdentifier() + ": duplicate ordered symbol: " + S);
+ return Names.takeVector();
+static void parseClangOption(StringRef Opt, const Twine &Msg) {
+ std::string Err;
+ raw_string_ostream OS(Err);
+ const char *Argv[] = {Config->ProgName.data(), Opt.data()};
+ if (cl::ParseCommandLineOptions(2, Argv, "", &OS))
+ return;
+ OS.flush();
+ error(Msg + ": " + StringRef(Err).trim());
+// Initializes Config members by the command line options.
+void LinkerDriver::readConfigs(opt::InputArgList &Args) {
+ errorHandler().Verbose = Args.hasArg(OPT_verbose);
+ errorHandler().FatalWarnings =
+ Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
+ ThreadsEnabled = Args.hasFlag(OPT_threads, OPT_no_threads, true);
+ Config->AllowMultipleDefinition =
+ Args.hasFlag(OPT_allow_multiple_definition,
+ OPT_no_allow_multiple_definition, false) ||
+ hasZOption(Args, "muldefs");
+ Config->AuxiliaryList = args::getStrings(Args, OPT_auxiliary);
+ Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic);
+ Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions);
+ Config->CheckSections =
+ Args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
+ Config->Chroot = Args.getLastArgValue(OPT_chroot);
+ Config->CompressDebugSections = getCompressDebugSections(Args);
+ Config->Cref = Args.hasFlag(OPT_cref, OPT_no_cref, false);
+ Config->DefineCommon = Args.hasFlag(OPT_define_common, OPT_no_define_common,
+ !Args.hasArg(OPT_relocatable));
+ Config->Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, true);
+ Config->DisableVerify = Args.hasArg(OPT_disable_verify);
+ Config->Discard = getDiscard(Args);
+ Config->DwoDir = Args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
+ Config->DynamicLinker = getDynamicLinker(Args);
+ Config->EhFrameHdr =
+ Args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
+ Config->EmitLLVM = Args.hasArg(OPT_plugin_opt_emit_llvm, false);
+ Config->EmitRelocs = Args.hasArg(OPT_emit_relocs);
+ Config->CallGraphProfileSort = Args.hasFlag(
+ OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
+ Config->EnableNewDtags =
+ Args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
+ Config->Entry = Args.getLastArgValue(OPT_entry);
+ Config->ExecuteOnly =
+ Args.hasFlag(OPT_execute_only, OPT_no_execute_only, false);
+ Config->ExportDynamic =
+ Args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false);
+ Config->FilterList = args::getStrings(Args, OPT_filter);
+ Config->Fini = Args.getLastArgValue(OPT_fini, "_fini");
+ Config->FixCortexA53Errata843419 = Args.hasArg(OPT_fix_cortex_a53_843419);
+ Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
+ Config->GnuUnique = Args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
+ Config->GdbIndex = Args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
+ Config->ICF = getICF(Args);
+ Config->IgnoreDataAddressEquality =
+ Args.hasArg(OPT_ignore_data_address_equality);
+ Config->IgnoreFunctionAddressEquality =
+ Args.hasArg(OPT_ignore_function_address_equality);
+ Config->Init = Args.getLastArgValue(OPT_init, "_init");
+ Config->LTOAAPipeline = Args.getLastArgValue(OPT_lto_aa_pipeline);
+ Config->LTODebugPassManager = Args.hasArg(OPT_lto_debug_pass_manager);
+ Config->LTONewPassManager = Args.hasArg(OPT_lto_new_pass_manager);
+ Config->LTONewPmPasses = Args.getLastArgValue(OPT_lto_newpm_passes);
+ Config->LTOO = args::getInteger(Args, OPT_lto_O, 2);
+ Config->LTOObjPath = Args.getLastArgValue(OPT_plugin_opt_obj_path_eq);
+ Config->LTOPartitions = args::getInteger(Args, OPT_lto_partitions, 1);
+ Config->LTOSampleProfile = Args.getLastArgValue(OPT_lto_sample_profile);
+ Config->MapFile = Args.getLastArgValue(OPT_Map);
+ Config->MipsGotSize = args::getInteger(Args, OPT_mips_got_size, 0xfff0);
+ Config->MergeArmExidx =
+ Args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true);
+ Config->NoinhibitExec = Args.hasArg(OPT_noinhibit_exec);
+ Config->Nostdlib = Args.hasArg(OPT_nostdlib);
+ Config->OFormatBinary = isOutputFormatBinary(Args);
+ Config->Omagic = Args.hasFlag(OPT_omagic, OPT_no_omagic, false);
+ Config->OptRemarksFilename = Args.getLastArgValue(OPT_opt_remarks_filename);
+ Config->OptRemarksWithHotness = Args.hasArg(OPT_opt_remarks_with_hotness);
+ Config->Optimize = args::getInteger(Args, OPT_O, 1);
+ Config->OrphanHandling = getOrphanHandling(Args);
+ Config->OutputFile = Args.getLastArgValue(OPT_o);
+ Config->Pie = Args.hasFlag(OPT_pie, OPT_no_pie, false);
+ Config->PrintIcfSections =
+ Args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false);
+ Config->PrintGcSections =
+ Args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
+ Config->Rpath = getRpath(Args);
+ Config->Relocatable = Args.hasArg(OPT_relocatable);
+ Config->SaveTemps = Args.hasArg(OPT_save_temps);
+ Config->SearchPaths = args::getStrings(Args, OPT_library_path);
+ Config->SectionStartMap = getSectionStartMap(Args);
+ Config->Shared = Args.hasArg(OPT_shared);
+ Config->SingleRoRx = Args.hasArg(OPT_no_rosegment);
+ Config->SoName = Args.getLastArgValue(OPT_soname);
+ Config->SortSection = getSortSection(Args);
+ Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384);
+ Config->Strip = getStrip(Args);
+ Config->Sysroot = Args.getLastArgValue(OPT_sysroot);
+ Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
+ Config->Target2 = getTarget2(Args);
+ Config->ThinLTOCacheDir = Args.getLastArgValue(OPT_thinlto_cache_dir);
+ Config->ThinLTOCachePolicy = CHECK(
+ parseCachePruningPolicy(Args.getLastArgValue(OPT_thinlto_cache_policy)),
+ "--thinlto-cache-policy: invalid cache policy");
+ Config->ThinLTOEmitImportsFiles =
+ Args.hasArg(OPT_plugin_opt_thinlto_emit_imports_files);
+ Config->ThinLTOIndexOnly = Args.hasArg(OPT_plugin_opt_thinlto_index_only) ||
+ Args.hasArg(OPT_plugin_opt_thinlto_index_only_eq);
+ Config->ThinLTOIndexOnlyArg =
+ Args.getLastArgValue(OPT_plugin_opt_thinlto_index_only_eq);
+ Config->ThinLTOJobs = args::getInteger(Args, OPT_thinlto_jobs, -1u);
+ Config->ThinLTOObjectSuffixReplace =
+ getOldNewOptions(Args, OPT_plugin_opt_thinlto_object_suffix_replace_eq);
+ Config->ThinLTOPrefixReplace =
+ getOldNewOptions(Args, OPT_plugin_opt_thinlto_prefix_replace_eq);
+ Config->Trace = Args.hasArg(OPT_trace);
+ Config->Undefined = args::getStrings(Args, OPT_undefined);
+ Config->UndefinedVersion =
+ Args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, true);
+ Config->UseAndroidRelrTags = Args.hasFlag(
+ OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false);
+ Config->UnresolvedSymbols = getUnresolvedSymbolPolicy(Args);
+ Config->WarnBackrefs =
+ Args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false);
+ Config->WarnCommon = Args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
+ Config->WarnIfuncTextrel =
+ Args.hasFlag(OPT_warn_ifunc_textrel, OPT_no_warn_ifunc_textrel, false);
+ Config->WarnSymbolOrdering =
+ Args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
+ Config->ZCombreloc = getZFlag(Args, "combreloc", "nocombreloc", true);
+ Config->ZCopyreloc = getZFlag(Args, "copyreloc", "nocopyreloc", true);
+ Config->ZExecstack = getZFlag(Args, "execstack", "noexecstack", false);
+ Config->ZGlobal = hasZOption(Args, "global");
+ Config->ZHazardplt = hasZOption(Args, "hazardplt");
+ Config->ZIfuncnoplt = hasZOption(Args, "ifunc-noplt");
+ Config->ZInitfirst = hasZOption(Args, "initfirst");
+ Config->ZInterpose = hasZOption(Args, "interpose");
+ Config->ZKeepTextSectionPrefix = getZFlag(
+ Args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
+ Config->ZNodefaultlib = hasZOption(Args, "nodefaultlib");
+ Config->ZNodelete = hasZOption(Args, "nodelete");
+ Config->ZNodlopen = hasZOption(Args, "nodlopen");
+ Config->ZNow = getZFlag(Args, "now", "lazy", false);
+ Config->ZOrigin = hasZOption(Args, "origin");
+ Config->ZRelro = getZFlag(Args, "relro", "norelro", true);
+ Config->ZRetpolineplt = hasZOption(Args, "retpolineplt");
+ Config->ZRodynamic = hasZOption(Args, "rodynamic");
+ Config->ZStackSize = args::getZOptionValue(Args, OPT_z, "stack-size", 0);
+ Config->ZText = getZFlag(Args, "text", "notext", true);
+ Config->ZWxneeded = hasZOption(Args, "wxneeded");
+ // Parse LTO options.
+ if (auto *Arg = Args.getLastArg(OPT_plugin_opt_mcpu_eq))
+ parseClangOption(Saver.save("-mcpu=" + StringRef(Arg->getValue())),
+ Arg->getSpelling());
+ for (auto *Arg : Args.filtered(OPT_plugin_opt))
+ parseClangOption(Arg->getValue(), Arg->getSpelling());
+ // Parse -mllvm options.
+ for (auto *Arg : Args.filtered(OPT_mllvm))
+ parseClangOption(Arg->getValue(), Arg->getSpelling());
+ if (Config->LTOO > 3)
+ error("invalid optimization level for LTO: " + Twine(Config->LTOO));
+ if (Config->LTOPartitions == 0)
+ error("--lto-partitions: number of threads must be > 0");
+ if (Config->ThinLTOJobs == 0)
+ error("--thinlto-jobs: number of threads must be > 0");
+ if (Config->SplitStackAdjustSize < 0)
+ error("--split-stack-adjust-size: size must be >= 0");
+ // Parse ELF{32,64}{LE,BE} and CPU type.
+ if (auto *Arg = Args.getLastArg(OPT_m)) {
+ StringRef S = Arg->getValue();
+ std::tie(Config->EKind, Config->EMachine, Config->OSABI) =
+ parseEmulation(S);
+ Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32");
+ Config->Emulation = S;
+ }
+ // Parse -hash-style={sysv,gnu,both}.
+ if (auto *Arg = Args.getLastArg(OPT_hash_style)) {
+ StringRef S = Arg->getValue();
+ if (S == "sysv")
+ Config->SysvHash = true;
+ else if (S == "gnu")
+ Config->GnuHash = true;
+ else if (S == "both")
+ Config->SysvHash = Config->GnuHash = true;
+ else
+ error("unknown -hash-style: " + S);
+ }
+ if (Args.hasArg(OPT_print_map))
+ Config->MapFile = "-";
+ // --omagic is an option to create old-fashioned executables in which
+ // .text segments are writable. Today, the option is still in use to
+ // create special-purpose programs such as boot loaders. It doesn't
+ // make sense to create PT_GNU_RELRO for such executables.
+ if (Config->Omagic)
+ Config->ZRelro = false;
+ std::tie(Config->BuildId, Config->BuildIdVector) = getBuildId(Args);
+ std::tie(Config->AndroidPackDynRelocs, Config->RelrPackDynRelocs) =
+ getPackDynRelocs(Args);
+ if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file))
+ if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
+ Config->SymbolOrderingFile = getSymbolOrderingFile(*Buffer);
+ // If --retain-symbol-file is used, we'll keep only the symbols listed in
+ // the file and discard all others.
+ if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) {
+ Config->DefaultSymbolVersion = VER_NDX_LOCAL;
+ if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
+ for (StringRef S : args::getLines(*Buffer))
+ Config->VersionScriptGlobals.push_back(
+ {S, /*IsExternCpp*/ false, /*HasWildcard*/ false});
+ }
+ bool HasExportDynamic =
+ Args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false);
+ // Parses -dynamic-list and -export-dynamic-symbol. They make some
+ // symbols private. Note that -export-dynamic takes precedence over them
+ // as it says all symbols should be exported.
+ if (!HasExportDynamic) {
+ for (auto *Arg : Args.filtered(OPT_dynamic_list))
+ if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
+ readDynamicList(*Buffer);
+ for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol))
+ Config->DynamicList.push_back(
+ {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false});
+ }
+ // If --export-dynamic-symbol=foo is given and symbol foo is defined in
+ // an object file in an archive file, that object file should be pulled
+ // out and linked. (It doesn't have to behave like that from technical
+ // point of view, but this is needed for compatibility with GNU.)
+ for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol))
+ Config->Undefined.push_back(Arg->getValue());
+ for (auto *Arg : Args.filtered(OPT_version_script))
+ if (Optional<std::string> Path = searchScript(Arg->getValue())) {
+ if (Optional<MemoryBufferRef> Buffer = readFile(*Path))
+ readVersionScript(*Buffer);
+ } else {
+ error(Twine("cannot find version script ") + Arg->getValue());
+ }
+// Some Config members do not directly correspond to any particular
+// command line options, but computed based on other Config values.
+// This function initialize such members. See Config.h for the details
+// of these values.
+static void setConfigs(opt::InputArgList &Args) {
+ ELFKind K = Config->EKind;
+ uint16_t M = Config->EMachine;
+ Config->CopyRelocs = (Config->Relocatable || Config->EmitRelocs);
+ Config->Is64 = (K == ELF64LEKind || K == ELF64BEKind);
+ Config->IsLE = (K == ELF32LEKind || K == ELF64LEKind);
+ Config->Endianness = Config->IsLE ? endianness::little : endianness::big;
+ Config->IsMips64EL = (K == ELF64LEKind && M == EM_MIPS);
+ Config->Pic = Config->Pie || Config->Shared;
+ Config->PicThunk = Args.hasArg(OPT_pic_veneer, Config->Pic);
+ Config->Wordsize = Config->Is64 ? 8 : 4;
+ // ELF defines two different ways to store relocation addends as shown below:
+ //
+ // Rel: Addends are stored to the location where relocations are applied.
+ // Rela: Addends are stored as part of relocation entry.
+ //
+ // In other words, Rela makes it easy to read addends at the price of extra
+ // 4 or 8 byte for each relocation entry. We don't know why ELF defined two
+ // different mechanisms in the first place, but this is how the spec is
+ // defined.
+ //
+ // You cannot choose which one, Rel or Rela, you want to use. Instead each
+ // ABI defines which one you need to use. The following expression expresses
+ // that.
+ Config->IsRela = M == EM_AARCH64 || M == EM_AMDGPU || M == EM_HEXAGON ||
+ M == EM_PPC || M == EM_PPC64 || M == EM_RISCV ||
+ M == EM_X86_64;
+ // If the output uses REL relocations we must store the dynamic relocation
+ // addends to the output sections. We also store addends for RELA relocations
+ // if --apply-dynamic-relocs is used.
+ // We default to not writing the addends when using RELA relocations since
+ // any standard conforming tool can find it in r_addend.
+ Config->WriteAddends = Args.hasFlag(OPT_apply_dynamic_relocs,
+ OPT_no_apply_dynamic_relocs, false) ||
+ !Config->IsRela;
+ Config->TocOptimize =
+ Args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, M == EM_PPC64);
+// Returns a value of "-format" option.
+static bool isFormatBinary(StringRef S) {
+ if (S == "binary")
+ return true;
+ if (S == "elf" || S == "default")
+ return false;
+ error("unknown -format value: " + S +
+ " (supported formats: elf, default, binary)");
+ return false;
+void LinkerDriver::createFiles(opt::InputArgList &Args) {
+ // For --{push,pop}-state.
+ std::vector<std::tuple<bool, bool, bool>> Stack;
+ // Iterate over argv to process input files and positional arguments.
+ for (auto *Arg : Args) {
+ switch (Arg->getOption().getUnaliasedOption().getID()) {
+ case OPT_library:
+ addLibrary(Arg->getValue());
+ break;
+ case OPT_INPUT:
+ addFile(Arg->getValue(), /*WithLOption=*/false);
+ break;
+ case OPT_defsym: {
+ StringRef From;
+ StringRef To;
+ std::tie(From, To) = StringRef(Arg->getValue()).split('=');
+ if (From.empty() || To.empty())
+ error("-defsym: syntax error: " + StringRef(Arg->getValue()));
+ else
+ readDefsym(From, MemoryBufferRef(To, "-defsym"));
+ break;
+ }
+ case OPT_script:
+ if (Optional<std::string> Path = searchScript(Arg->getValue())) {
+ if (Optional<MemoryBufferRef> MB = readFile(*Path))
+ readLinkerScript(*MB);
+ break;
+ }
+ error(Twine("cannot find linker script ") + Arg->getValue());
+ break;
+ case OPT_as_needed:
+ Config->AsNeeded = true;
+ break;
+ case OPT_format:
+ Config->FormatBinary = isFormatBinary(Arg->getValue());
+ break;
+ case OPT_no_as_needed:
+ Config->AsNeeded = false;
+ break;
+ case OPT_Bstatic:
+ Config->Static = true;
+ break;
+ case OPT_Bdynamic:
+ Config->Static = false;
+ break;
+ case OPT_whole_archive:
+ InWholeArchive = true;
+ break;
+ case OPT_no_whole_archive:
+ InWholeArchive = false;
+ break;
+ case OPT_just_symbols:
+ if (Optional<MemoryBufferRef> MB = readFile(Arg->getValue())) {
+ Files.push_back(createObjectFile(*MB));
+ Files.back()->JustSymbols = true;
+ }
+ break;
+ case OPT_start_group:
+ if (InputFile::IsInGroup)
+ error("nested --start-group");
+ InputFile::IsInGroup = true;
+ break;
+ case OPT_end_group:
+ if (!InputFile::IsInGroup)
+ error("stray --end-group");
+ InputFile::IsInGroup = false;
+ ++InputFile::NextGroupId;
+ break;
+ case OPT_start_lib:
+ if (InLib)
+ error("nested --start-lib");
+ if (InputFile::IsInGroup)
+ error("may not nest --start-lib in --start-group");
+ InLib = true;
+ InputFile::IsInGroup = true;
+ break;
+ case OPT_end_lib:
+ if (!InLib)
+ error("stray --end-lib");
+ InLib = false;
+ InputFile::IsInGroup = false;
+ ++InputFile::NextGroupId;
+ break;
+ case OPT_push_state:
+ Stack.emplace_back(Config->AsNeeded, Config->Static, InWholeArchive);
+ break;
+ case OPT_pop_state:
+ if (Stack.empty()) {
+ error("unbalanced --push-state/--pop-state");
+ break;
+ }
+ std::tie(Config->AsNeeded, Config->Static, InWholeArchive) = Stack.back();
+ Stack.pop_back();
+ break;
+ }
+ }
+ if (Files.empty() && errorCount() == 0)
+ error("no input files");
+// If -m <machine_type> was not given, infer it from object files.
+void LinkerDriver::inferMachineType() {
+ if (Config->EKind != ELFNoneKind)
+ return;
+ for (InputFile *F : Files) {
+ if (F->EKind == ELFNoneKind)
+ continue;
+ Config->EKind = F->EKind;
+ Config->EMachine = F->EMachine;
+ Config->OSABI = F->OSABI;
+ Config->MipsN32Abi = Config->EMachine == EM_MIPS && isMipsN32Abi(F);
+ return;
+ }
+ error("target emulation unknown: -m or at least one .o file required");
+// Parse -z max-page-size=<value>. The default value is defined by
+// each target.
+static uint64_t getMaxPageSize(opt::InputArgList &Args) {
+ uint64_t Val = args::getZOptionValue(Args, OPT_z, "max-page-size",
+ Target->DefaultMaxPageSize);
+ if (!isPowerOf2_64(Val))
+ error("max-page-size: value isn't a power of 2");
+ return Val;
+// Parses -image-base option.
+static Optional<uint64_t> getImageBase(opt::InputArgList &Args) {
+ // Because we are using "Config->MaxPageSize" here, this function has to be
+ // called after the variable is initialized.
+ auto *Arg = Args.getLastArg(OPT_image_base);
+ if (!Arg)
+ return None;
+ StringRef S = Arg->getValue();
+ uint64_t V;
+ if (!to_integer(S, V)) {
+ error("-image-base: number expected, but got " + S);
+ return 0;
+ }
+ if ((V % Config->MaxPageSize) != 0)
+ warn("-image-base: address isn't multiple of page size: " + S);
+ return V;
+// Parses `--exclude-libs=lib,lib,...`.
+// The library names may be delimited by commas or colons.
+static DenseSet<StringRef> getExcludeLibs(opt::InputArgList &Args) {
+ DenseSet<StringRef> Ret;
+ for (auto *Arg : Args.filtered(OPT_exclude_libs)) {
+ StringRef S = Arg->getValue();
+ for (;;) {
+ size_t Pos = S.find_first_of(",:");
+ if (Pos == StringRef::npos)
+ break;
+ Ret.insert(S.substr(0, Pos));
+ S = S.substr(Pos + 1);
+ }
+ Ret.insert(S);
+ }
+ return Ret;
+// Handles the -exclude-libs option. If a static library file is specified
+// by the -exclude-libs option, all public symbols from the archive become
+// private unless otherwise specified by version scripts or something.
+// A special library name "ALL" means all archive files.
+// This is not a popular option, but some programs such as bionic libc use it.
+template <class ELFT>
+static void excludeLibs(opt::InputArgList &Args) {
+ DenseSet<StringRef> Libs = getExcludeLibs(Args);
+ bool All = Libs.count("ALL");
+ auto Visit = [&](InputFile *File) {
+ if (!File->ArchiveName.empty())
+ if (All || Libs.count(path::filename(File->ArchiveName)))
+ for (Symbol *Sym : File->getSymbols())
+ if (!Sym->isLocal() && Sym->File == File)
+ Sym->VersionId = VER_NDX_LOCAL;
+ };
+ for (InputFile *File : ObjectFiles)
+ Visit(File);
+ for (BitcodeFile *File : BitcodeFiles)
+ Visit(File);
+// Force Sym to be entered in the output. Used for -u or equivalent.
+template <class ELFT> static void handleUndefined(StringRef Name) {
+ Symbol *Sym = Symtab->find(Name);
+ if (!Sym)
+ return;
+ // Since symbol S may not be used inside the program, LTO may
+ // eliminate it. Mark the symbol as "used" to prevent it.
+ Sym->IsUsedInRegularObj = true;
+ if (Sym->isLazy())
+ Symtab->fetchLazy<ELFT>(Sym);
+template <class ELFT> static void handleLibcall(StringRef Name) {
+ Symbol *Sym = Symtab->find(Name);
+ if (!Sym || !Sym->isLazy())
+ return;
+ MemoryBufferRef MB;
+ if (auto *LO = dyn_cast<LazyObject>(Sym))
+ MB = LO->File->MB;
+ else
+ MB = cast<LazyArchive>(Sym)->getMemberBuffer();
+ if (isBitcode(MB))
+ Symtab->fetchLazy<ELFT>(Sym);
+// If all references to a DSO happen to be weak, the DSO is not added
+// to DT_NEEDED. If that happens, we need to eliminate shared symbols
+// created from the DSO. Otherwise, they become dangling references
+// that point to a non-existent DSO.
+template <class ELFT> static void demoteSharedSymbols() {
+ for (Symbol *Sym : Symtab->getSymbols()) {
+ if (auto *S = dyn_cast<SharedSymbol>(Sym)) {
+ if (!S->getFile<ELFT>().IsNeeded) {
+ bool Used = S->Used;
+ replaceSymbol<Undefined>(S, nullptr, S->getName(), STB_WEAK, S->StOther,
+ S->Type);
+ S->Used = Used;
+ }
+ }
+ }
+// The section referred to by S is considered address-significant. Set the
+// KeepUnique flag on the section if appropriate.
+static void markAddrsig(Symbol *S) {
+ if (auto *D = dyn_cast_or_null<Defined>(S))
+ if (D->Section)
+ // We don't need to keep text sections unique under --icf=all even if they
+ // are address-significant.
+ if (Config->ICF == ICFLevel::Safe || !(D->Section->Flags & SHF_EXECINSTR))
+ D->Section->KeepUnique = true;
+// Record sections that define symbols mentioned in --keep-unique <symbol>
+// and symbols referred to by address-significance tables. These sections are
+// ineligible for ICF.
+template <class ELFT>
+static void findKeepUniqueSections(opt::InputArgList &Args) {
+ for (auto *Arg : Args.filtered(OPT_keep_unique)) {
+ StringRef Name = Arg->getValue();
+ auto *D = dyn_cast_or_null<Defined>(Symtab->find(Name));
+ if (!D || !D->Section) {
+ warn("could not find symbol " + Name + " to keep unique");
+ continue;
+ }
+ D->Section->KeepUnique = true;
+ }
+ // --icf=all --ignore-data-address-equality means that we can ignore
+ // the dynsym and address-significance tables entirely.
+ if (Config->ICF == ICFLevel::All && Config->IgnoreDataAddressEquality)
+ return;
+ // Symbols in the dynsym could be address-significant in other executables
+ // or DSOs, so we conservatively mark them as address-significant.
+ for (Symbol *S : Symtab->getSymbols())
+ if (S->includeInDynsym())
+ markAddrsig(S);
+ // Visit the address-significance table in each object file and mark each
+ // referenced symbol as address-significant.
+ for (InputFile *F : ObjectFiles) {
+ auto *Obj = cast<ObjFile<ELFT>>(F);
+ ArrayRef<Symbol *> Syms = Obj->getSymbols();
+ if (Obj->AddrsigSec) {
+ ArrayRef<uint8_t> Contents =
+ check(Obj->getObj().getSectionContents(Obj->AddrsigSec));
+ const uint8_t *Cur = Contents.begin();
+ while (Cur != Contents.end()) {
+ unsigned Size;
+ const char *Err;
+ uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
+ if (Err)
+ fatal(toString(F) + ": could not decode addrsig section: " + Err);
+ markAddrsig(Syms[SymIndex]);
+ Cur += Size;
+ }
+ } else {
+ // If an object file does not have an address-significance table,
+ // conservatively mark all of its symbols as address-significant.
+ for (Symbol *S : Syms)
+ markAddrsig(S);
+ }
+ }
+template <class ELFT> static Symbol *addUndefined(StringRef Name) {
+ return Symtab->addUndefined<ELFT>(Name, STB_GLOBAL, STV_DEFAULT, 0, false,
+ nullptr);
+// The --wrap option is a feature to rename symbols so that you can write
+// wrappers for existing functions. If you pass `-wrap=foo`, all
+// occurrences of symbol `foo` are resolved to `wrap_foo` (so, you are
+// expected to write `wrap_foo` function as a wrapper). The original
+// symbol becomes accessible as `real_foo`, so you can call that from your
+// wrapper.
+// This data structure is instantiated for each -wrap option.
+struct WrappedSymbol {
+ Symbol *Sym;
+ Symbol *Real;
+ Symbol *Wrap;
+// Handles -wrap option.
+// This function instantiates wrapper symbols. At this point, they seem
+// like they are not being used at all, so we explicitly set some flags so
+// that LTO won't eliminate them.
+template <class ELFT>
+static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &Args) {
+ std::vector<WrappedSymbol> V;
+ DenseSet<StringRef> Seen;
+ for (auto *Arg : Args.filtered(OPT_wrap)) {
+ StringRef Name = Arg->getValue();
+ if (!Seen.insert(Name).second)
+ continue;
+ Symbol *Sym = Symtab->find(Name);
+ if (!Sym)
+ continue;
+ Symbol *Real = addUndefined<ELFT>(Saver.save("__real_" + Name));
+ Symbol *Wrap = addUndefined<ELFT>(Saver.save("__wrap_" + Name));
+ V.push_back({Sym, Real, Wrap});
+ // We want to tell LTO not to inline symbols to be overwritten
+ // because LTO doesn't know the final symbol contents after renaming.
+ Real->CanInline = false;
+ Sym->CanInline = false;
+ // Tell LTO not to eliminate these symbols.
+ Sym->IsUsedInRegularObj = true;
+ Wrap->IsUsedInRegularObj = true;
+ }
+ return V;
+// Do renaming for -wrap by updating pointers to symbols.
+// When this function is executed, only InputFiles and symbol table
+// contain pointers to symbol objects. We visit them to replace pointers,
+// so that wrapped symbols are swapped as instructed by the command line.
+template <class ELFT> static void wrapSymbols(ArrayRef<WrappedSymbol> Wrapped) {
+ DenseMap<Symbol *, Symbol *> Map;
+ for (const WrappedSymbol &W : Wrapped) {
+ Map[W.Sym] = W.Wrap;
+ Map[W.Real] = W.Sym;
+ }
+ // Update pointers in input files.
+ parallelForEach(ObjectFiles, [&](InputFile *File) {
+ std::vector<Symbol *> &Syms = File->getMutableSymbols();
+ for (size_t I = 0, E = Syms.size(); I != E; ++I)
+ if (Symbol *S = Map.lookup(Syms[I]))
+ Syms[I] = S;
+ });
+ // Update pointers in the symbol table.
+ for (const WrappedSymbol &W : Wrapped)
+ Symtab->wrap(W.Sym, W.Real, W.Wrap);
+static const char *LibcallRoutineNames[] = {
+#define HANDLE_LIBCALL(code, name) name,
+#include "llvm/IR/RuntimeLibcalls.def"
+// Do actual linking. Note that when this function is called,
+// all linker scripts have already been parsed.
+template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
+ Target = getTarget();
+ InX<ELFT>::VerSym = nullptr;
+ InX<ELFT>::VerNeed = nullptr;
+ Config->MaxPageSize = getMaxPageSize(Args);
+ Config->ImageBase = getImageBase(Args);
+ // If a -hash-style option was not given, set to a default value,
+ // which varies depending on the target.
+ if (!Args.hasArg(OPT_hash_style)) {
+ if (Config->EMachine == EM_MIPS)
+ Config->SysvHash = true;
+ else
+ Config->SysvHash = Config->GnuHash = true;
+ }
+ // Default output filename is "a.out" by the Unix tradition.
+ if (Config->OutputFile.empty())
+ Config->OutputFile = "a.out";
+ // Fail early if the output file or map file is not writable. If a user has a
+ // long link, e.g. due to a large LTO link, they do not wish to run it and
+ // find that it failed because there was a mistake in their command-line.
+ if (auto E = tryCreateFile(Config->OutputFile))
+ error("cannot open output file " + Config->OutputFile + ": " + E.message());
+ if (auto E = tryCreateFile(Config->MapFile))
+ error("cannot open map file " + Config->MapFile + ": " + E.message());
+ if (errorCount())
+ return;
+ // Use default entry point name if no name was given via the command
+ // line nor linker scripts. For some reason, MIPS entry point name is
+ // different from others.
+ Config->WarnMissingEntry =
+ (!Config->Entry.empty() || (!Config->Shared && !Config->Relocatable));
+ if (Config->Entry.empty() && !Config->Relocatable)
+ Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start";
+ // Handle --trace-symbol.
+ for (auto *Arg : Args.filtered(OPT_trace_symbol))
+ Symtab->trace(Arg->getValue());
+ // Add all files to the symbol table. This will add almost all
+ // symbols that we need to the symbol table.
+ for (InputFile *F : Files)
+ Symtab->addFile<ELFT>(F);
+ // Now that we have every file, we can decide if we will need a
+ // dynamic symbol table.
+ // We need one if we were asked to export dynamic symbols or if we are
+ // producing a shared library.
+ // We also need one if any shared libraries are used and for pie executables
+ // (probably because the dynamic linker needs it).
+ Config->HasDynSymTab =
+ !SharedFiles.empty() || Config->Pic || Config->ExportDynamic;
+ // Some symbols (such as __ehdr_start) are defined lazily only when there
+ // are undefined symbols for them, so we add these to trigger that logic.
+ for (StringRef Name : Script->ReferencedSymbols)
+ addUndefined<ELFT>(Name);
+ // Handle the `--undefined <sym>` options.
+ for (StringRef S : Config->Undefined)
+ handleUndefined<ELFT>(S);
+ // If an entry symbol is in a static archive, pull out that file now.
+ handleUndefined<ELFT>(Config->Entry);
+ // If any of our inputs are bitcode files, the LTO code generator may create
+ // references to certain library functions that might not be explicit in the
+ // bitcode file's symbol table. If any of those library functions are defined
+ // in a bitcode file in an archive member, we need to arrange to use LTO to
+ // compile those archive members by adding them to the link beforehand.
+ //
+ // However, adding all libcall symbols to the link can have undesired
+ // consequences. For example, the libgcc implementation of
+ // __sync_val_compare_and_swap_8 on 32-bit ARM pulls in an .init_array entry
+ // that aborts the program if the Linux kernel does not support 64-bit
+ // atomics, which would prevent the program from running even if it does not
+ // use 64-bit atomics.
+ //
+ // Therefore, we only add libcall symbols to the link before LTO if we have
+ // to, i.e. if the symbol's definition is in bitcode. Any other required
+ // libcall symbols will be added to the link after LTO when we add the LTO
+ // object file to the link.
+ if (!BitcodeFiles.empty())
+ for (const char *S : LibcallRoutineNames)
+ handleLibcall<ELFT>(S);
+ // Return if there were name resolution errors.
+ if (errorCount())
+ return;
+ // Now when we read all script files, we want to finalize order of linker
+ // script commands, which can be not yet final because of INSERT commands.
+ Script->processInsertCommands();
+ // We want to declare linker script's symbols early,
+ // so that we can version them.
+ // They also might be exported if referenced by DSOs.
+ Script->declareSymbols();
+ // Handle the -exclude-libs option.
+ if (Args.hasArg(OPT_exclude_libs))
+ excludeLibs<ELFT>(Args);
+ // Create ElfHeader early. We need a dummy section in
+ // addReservedSymbols to mark the created symbols as not absolute.
+ Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC);
+ Out::ElfHeader->Size = sizeof(typename ELFT::Ehdr);
+ // Create wrapped symbols for -wrap option.
+ std::vector<WrappedSymbol> Wrapped = addWrappedSymbols<ELFT>(Args);
+ // We need to create some reserved symbols such as _end. Create them.
+ if (!Config->Relocatable)
+ addReservedSymbols();
+ // Apply version scripts.
+ //
+ // For a relocatable output, version scripts don't make sense, and
+ // parsing a symbol version string (e.g. dropping "@ver1" from a symbol
+ // name "foo@ver1") rather do harm, so we don't call this if -r is given.
+ if (!Config->Relocatable)
+ Symtab->scanVersionScript();
+ // Do link-time optimization if given files are LLVM bitcode files.
+ // This compiles bitcode files into real object files.
+ //
+ // With this the symbol table should be complete. After this, no new names
+ // except a few linker-synthesized ones will be added to the symbol table.
+ Symtab->addCombinedLTOObject<ELFT>();
+ if (errorCount())
+ return;
+ // If -thinlto-index-only is given, we should create only "index
+ // files" and not object files. Index file creation is already done
+ // in addCombinedLTOObject, so we are done if that's the case.
+ if (Config->ThinLTOIndexOnly)
+ return;
+ // Likewise, --plugin-opt=emit-llvm is an option to make LTO create
+ // an output file in bitcode and exit, so that you can just get a
+ // combined bitcode file.
+ if (Config->EmitLLVM)
+ return;
+ // Apply symbol renames for -wrap.
+ if (!Wrapped.empty())
+ wrapSymbols<ELFT>(Wrapped);
+ // Now that we have a complete list of input files.
+ // Beyond this point, no new files are added.
+ // Aggregate all input sections into one place.
+ for (InputFile *F : ObjectFiles)
+ for (InputSectionBase *S : F->getSections())
+ if (S && S != &InputSection::Discarded)
+ InputSections.push_back(S);
+ for (BinaryFile *F : BinaryFiles)
+ for (InputSectionBase *S : F->getSections())
+ InputSections.push_back(cast<InputSection>(S));
+ // We do not want to emit debug sections if --strip-all
+ // or -strip-debug are given.
+ if (Config->Strip != StripPolicy::None)
+ llvm::erase_if(InputSections, [](InputSectionBase *S) {
+ return S->Name.startswith(".debug") || S->Name.startswith(".zdebug");
+ });
+ Config->EFlags = Target->calcEFlags();
+ if (Config->EMachine == EM_ARM) {
+ // FIXME: These warnings can be removed when lld only uses these features
+ // when the input objects have been compiled with an architecture that
+ // supports them.
+ if (Config->ARMHasBlx == false)
+ warn("lld uses blx instruction, no object with architecture supporting "
+ "feature detected");
+ }
+ // This adds a .comment section containing a version string. We have to add it
+ // before mergeSections because the .comment section is a mergeable section.
+ if (!Config->Relocatable)
+ InputSections.push_back(createCommentSection());
+ // Do size optimizations: garbage collection, merging of SHF_MERGE sections
+ // and identical code folding.
+ splitSections<ELFT>();
+ markLive<ELFT>();
+ demoteSharedSymbols<ELFT>();
+ mergeSections();
+ if (Config->ICF != ICFLevel::None) {
+ findKeepUniqueSections<ELFT>(Args);
+ doIcf<ELFT>();
+ }
+ // Read the callgraph now that we know what was gced or icfed
+ if (Config->CallGraphProfileSort) {
+ if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
+ if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
+ readCallGraph(*Buffer);
+ readCallGraphsFromObjectFiles<ELFT>();
+ }
+ // Write the result to the file.
+ writeResult<ELFT>();
diff --git a/contrib/llvm/tools/lld/ELF/Driver.h b/contrib/llvm/tools/lld/ELF/Driver.h
new file mode 100644
index 000000000000..81d7f608e588
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Driver.h
@@ -0,0 +1,73 @@
+//===- Driver.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "SymbolTable.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Common/Reproduce.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/raw_ostream.h"
+namespace lld {
+namespace elf {
+extern class LinkerDriver *Driver;
+class LinkerDriver {
+ void main(ArrayRef<const char *> Args);
+ void addFile(StringRef Path, bool WithLOption);
+ void addLibrary(StringRef Name);
+ void readConfigs(llvm::opt::InputArgList &Args);
+ void createFiles(llvm::opt::InputArgList &Args);
+ void inferMachineType();
+ template <class ELFT> void link(llvm::opt::InputArgList &Args);
+ // True if we are in --whole-archive and --no-whole-archive.
+ bool InWholeArchive = false;
+ // True if we are in --start-lib and --end-lib.
+ bool InLib = false;
+ std::vector<InputFile *> Files;
+// Parses command line options.
+class ELFOptTable : public llvm::opt::OptTable {
+ ELFOptTable();
+ llvm::opt::InputArgList parse(ArrayRef<const char *> Argv);
+// Create enum with OPT_xxx values for each option in Options.td
+enum {
+#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
+#include "Options.inc"
+#undef OPTION
+void printHelp();
+std::string createResponseFile(const llvm::opt::InputArgList &Args);
+llvm::Optional<std::string> findFromSearchPaths(StringRef Path);
+llvm::Optional<std::string> searchScript(StringRef Path);
+llvm::Optional<std::string> searchLibrary(StringRef Path);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/DriverUtils.cpp b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp
new file mode 100644
index 000000000000..e51d02e38da1
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/DriverUtils.cpp
@@ -0,0 +1,238 @@
+//===- DriverUtils.cpp ----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains utility functions for the driver. Because there
+// are so many small functions, we created this separate file to make
+// Driver.cpp less cluttered.
+#include "Driver.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Reproduce.h"
+#include "lld/Common/Version.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+using namespace llvm;
+using namespace llvm::sys;
+using namespace llvm::opt;
+using namespace lld;
+using namespace lld::elf;
+// Create OptTable
+// Create prefix string literals used in Options.td
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Options.inc"
+#undef PREFIX
+// Create table mapping all options defined in Options.td
+static const opt::OptTable::Info OptInfo[] = {
+#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
+ {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \
+ X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
+#include "Options.inc"
+#undef OPTION
+ELFOptTable::ELFOptTable() : OptTable(OptInfo) {}
+// Set color diagnostics according to -color-diagnostics={auto,always,never}
+// or -no-color-diagnostics flags.
+static void handleColorDiagnostics(opt::InputArgList &Args) {
+ auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq,
+ OPT_no_color_diagnostics);
+ if (!Arg)
+ return;
+ if (Arg->getOption().getID() == OPT_color_diagnostics) {
+ errorHandler().ColorDiagnostics = true;
+ } else if (Arg->getOption().getID() == OPT_no_color_diagnostics) {
+ errorHandler().ColorDiagnostics = false;
+ } else {
+ StringRef S = Arg->getValue();
+ if (S == "always")
+ errorHandler().ColorDiagnostics = true;
+ else if (S == "never")
+ errorHandler().ColorDiagnostics = false;
+ else if (S != "auto")
+ error("unknown option: --color-diagnostics=" + S);
+ }
+static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) {
+ if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) {
+ StringRef S = Arg->getValue();
+ if (S != "windows" && S != "posix")
+ error("invalid response file quoting: " + S);
+ if (S == "windows")
+ return cl::TokenizeWindowsCommandLine;
+ return cl::TokenizeGNUCommandLine;
+ }
+ if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32)
+ return cl::TokenizeWindowsCommandLine;
+ return cl::TokenizeGNUCommandLine;
+// Gold LTO plugin takes a `--plugin-opt foo=bar` option as an alias for
+// `--plugin-opt=foo=bar`. We want to handle `--plugin-opt=foo=` as an
+// option name and `bar` as a value. Unfortunately, OptParser cannot
+// handle an option with a space in it.
+// In this function, we concatenate command line arguments so that
+// `--plugin-opt <foo>` is converted to `--plugin-opt=<foo>`. This is a
+// bit hacky, but looks like it is still better than handling --plugin-opt
+// options by hand.
+static void concatLTOPluginOptions(SmallVectorImpl<const char *> &Args) {
+ SmallVector<const char *, 256> V;
+ for (size_t I = 0, E = Args.size(); I != E; ++I) {
+ StringRef S = Args[I];
+ if ((S == "-plugin-opt" || S == "--plugin-opt") && I + 1 != E) {
+ V.push_back(Saver.save(S + "=" + Args[I + 1]).data());
+ ++I;
+ } else {
+ V.push_back(Args[I]);
+ }
+ }
+ Args = std::move(V);
+// Parses a given list of options.
+opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) {
+ // Make InputArgList from string vectors.
+ unsigned MissingIndex;
+ unsigned MissingCount;
+ SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size());
+ // We need to get the quoting style for response files before parsing all
+ // options so we parse here before and ignore all the options but
+ // --rsp-quoting.
+ opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
+ // Expand response files (arguments in the form of @<filename>)
+ // and then parse the argument again.
+ cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec);
+ concatLTOPluginOptions(Vec);
+ Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
+ handleColorDiagnostics(Args);
+ if (MissingCount)
+ error(Twine(Args.getArgString(MissingIndex)) + ": missing argument");
+ for (auto *Arg : Args.filtered(OPT_UNKNOWN))
+ error("unknown argument: " + Arg->getSpelling());
+ return Args;
+void elf::printHelp() {
+ ELFOptTable().PrintHelp(
+ outs(), (Config->ProgName + " [options] file...").str().c_str(), "lld",
+ false /*ShowHidden*/, true /*ShowAllAliases*/);
+ outs() << "\n";
+ // Scripts generated by Libtool versions up to at least 2.4.6 (the most
+ // recent version as of March 2017) expect /: supported targets:.* elf/
+ // in a message for the -help option. If it doesn't match, the scripts
+ // assume that the linker doesn't support very basic features such as
+ // shared libraries. Therefore, we need to print out at least "elf".
+ outs() << Config->ProgName << ": supported targets: elf\n";
+// Reconstructs command line arguments so that so that you can re-run
+// the same command with the same inputs. This is for --reproduce.
+std::string elf::createResponseFile(const opt::InputArgList &Args) {
+ SmallString<0> Data;
+ raw_svector_ostream OS(Data);
+ OS << "--chroot .\n";
+ // Copy the command line to the output while rewriting paths.
+ for (auto *Arg : Args) {
+ switch (Arg->getOption().getUnaliasedOption().getID()) {
+ case OPT_reproduce:
+ break;
+ case OPT_INPUT:
+ OS << quote(rewritePath(Arg->getValue())) << "\n";
+ break;
+ case OPT_o:
+ // If -o path contains directories, "lld @response.txt" will likely
+ // fail because the archive we are creating doesn't contain empty
+ // directories for the output path (-o doesn't create directories).
+ // Strip directories to prevent the issue.
+ OS << "-o " << quote(sys::path::filename(Arg->getValue())) << "\n";
+ break;
+ case OPT_dynamic_list:
+ case OPT_library_path:
+ case OPT_rpath:
+ case OPT_script:
+ case OPT_symbol_ordering_file:
+ case OPT_sysroot:
+ case OPT_version_script:
+ OS << Arg->getSpelling() << " " << quote(rewritePath(Arg->getValue()))
+ << "\n";
+ break;
+ default:
+ OS << toString(*Arg) << "\n";
+ }
+ }
+ return Data.str();
+// Find a file by concatenating given paths. If a resulting path
+// starts with "=", the character is replaced with a --sysroot value.
+static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) {
+ SmallString<128> S;
+ if (Path1.startswith("="))
+ path::append(S, Config->Sysroot, Path1.substr(1), Path2);
+ else
+ path::append(S, Path1, Path2);
+ if (fs::exists(S))
+ return S.str().str();
+ return None;
+Optional<std::string> elf::findFromSearchPaths(StringRef Path) {
+ for (StringRef Dir : Config->SearchPaths)
+ if (Optional<std::string> S = findFile(Dir, Path))
+ return S;
+ return None;
+// This is for -lfoo. We'll look for libfoo.so or libfoo.a from
+// search paths.
+Optional<std::string> elf::searchLibrary(StringRef Name) {
+ if (Name.startswith(":"))
+ return findFromSearchPaths(Name.substr(1));
+ for (StringRef Dir : Config->SearchPaths) {
+ if (!Config->Static)
+ if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".so"))
+ return S;
+ if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".a"))
+ return S;
+ }
+ return None;
+// If a linker/version script doesn't exist in the current directory, we also
+// look for the script in the '-L' search paths. This matches the behaviour of
+// '-T', --version-script=, and linker script INPUT() command in ld.bfd.
+Optional<std::string> elf::searchScript(StringRef Name) {
+ if (fs::exists(Name))
+ return Name.str();
+ return findFromSearchPaths(Name);
diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.cpp b/contrib/llvm/tools/lld/ELF/EhFrame.cpp
new file mode 100644
index 000000000000..95d444bdc2a1
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/EhFrame.cpp
@@ -0,0 +1,198 @@
+//===- EhFrame.cpp -------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// .eh_frame section contains information on how to unwind the stack when
+// an exception is thrown. The section consists of sequence of CIE and FDE
+// records. The linker needs to merge CIEs and associate FDEs to CIEs.
+// That means the linker has to understand the format of the section.
+// This file contains a few utility functions to read .eh_frame contents.
+#include "EhFrame.h"
+#include "Config.h"
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Strings.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Object/ELF.h"
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::dwarf;
+using namespace llvm::object;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+class EhReader {
+ EhReader(InputSectionBase *S, ArrayRef<uint8_t> D) : IS(S), D(D) {}
+ size_t readEhRecordSize();
+ uint8_t getFdeEncoding();
+ template <class P> void failOn(const P *Loc, const Twine &Msg) {
+ fatal("corrupted .eh_frame: " + Msg + "\n>>> defined in " +
+ IS->getObjMsg((const uint8_t *)Loc - IS->data().data()));
+ }
+ uint8_t readByte();
+ void skipBytes(size_t Count);
+ StringRef readString();
+ void skipLeb128();
+ void skipAugP();
+ InputSectionBase *IS;
+ ArrayRef<uint8_t> D;
+size_t elf::readEhRecordSize(InputSectionBase *S, size_t Off) {
+ return EhReader(S, S->data().slice(Off)).readEhRecordSize();
+// .eh_frame section is a sequence of records. Each record starts with
+// a 4 byte length field. This function reads the length.
+size_t EhReader::readEhRecordSize() {
+ if (D.size() < 4)
+ failOn(D.data(), "CIE/FDE too small");
+ // First 4 bytes of CIE/FDE is the size of the record.
+ // If it is 0xFFFFFFFF, the next 8 bytes contain the size instead,
+ // but we do not support that format yet.
+ uint64_t V = read32(D.data());
+ if (V == UINT32_MAX)
+ failOn(D.data(), "CIE/FDE too large");
+ uint64_t Size = V + 4;
+ if (Size > D.size())
+ failOn(D.data(), "CIE/FDE ends past the end of the section");
+ return Size;
+// Read a byte and advance D by one byte.
+uint8_t EhReader::readByte() {
+ if (D.empty())
+ failOn(D.data(), "unexpected end of CIE");
+ uint8_t B = D.front();
+ D = D.slice(1);
+ return B;
+void EhReader::skipBytes(size_t Count) {
+ if (D.size() < Count)
+ failOn(D.data(), "CIE is too small");
+ D = D.slice(Count);
+// Read a null-terminated string.
+StringRef EhReader::readString() {
+ const uint8_t *End = std::find(D.begin(), D.end(), '\0');
+ if (End == D.end())
+ failOn(D.data(), "corrupted CIE (failed to read string)");
+ StringRef S = toStringRef(D.slice(0, End - D.begin()));
+ D = D.slice(S.size() + 1);
+ return S;
+// Skip an integer encoded in the LEB128 format.
+// Actual number is not of interest because only the runtime needs it.
+// But we need to be at least able to skip it so that we can read
+// the field that follows a LEB128 number.
+void EhReader::skipLeb128() {
+ const uint8_t *ErrPos = D.data();
+ while (!D.empty()) {
+ uint8_t Val = D.front();
+ D = D.slice(1);
+ if ((Val & 0x80) == 0)
+ return;
+ }
+ failOn(ErrPos, "corrupted CIE (failed to read LEB128)");
+static size_t getAugPSize(unsigned Enc) {
+ switch (Enc & 0x0f) {
+ case DW_EH_PE_absptr:
+ case DW_EH_PE_signed:
+ return Config->Wordsize;
+ case DW_EH_PE_udata2:
+ case DW_EH_PE_sdata2:
+ return 2;
+ case DW_EH_PE_udata4:
+ case DW_EH_PE_sdata4:
+ return 4;
+ case DW_EH_PE_udata8:
+ case DW_EH_PE_sdata8:
+ return 8;
+ }
+ return 0;
+void EhReader::skipAugP() {
+ uint8_t Enc = readByte();
+ if ((Enc & 0xf0) == DW_EH_PE_aligned)
+ failOn(D.data() - 1, "DW_EH_PE_aligned encoding is not supported");
+ size_t Size = getAugPSize(Enc);
+ if (Size == 0)
+ failOn(D.data() - 1, "unknown FDE encoding");
+ if (Size >= D.size())
+ failOn(D.data() - 1, "corrupted CIE");
+ D = D.slice(Size);
+uint8_t elf::getFdeEncoding(EhSectionPiece *P) {
+ return EhReader(P->Sec, P->data()).getFdeEncoding();
+uint8_t EhReader::getFdeEncoding() {
+ skipBytes(8);
+ int Version = readByte();
+ if (Version != 1 && Version != 3)
+ failOn(D.data() - 1,
+ "FDE version 1 or 3 expected, but got " + Twine(Version));
+ StringRef Aug = readString();
+ // Skip code and data alignment factors.
+ skipLeb128();
+ skipLeb128();
+ // Skip the return address register. In CIE version 1 this is a single
+ // byte. In CIE version 3 this is an unsigned LEB128.
+ if (Version == 1)
+ readByte();
+ else
+ skipLeb128();
+ // We only care about an 'R' value, but other records may precede an 'R'
+ // record. Unfortunately records are not in TLV (type-length-value) format,
+ // so we need to teach the linker how to skip records for each type.
+ for (char C : Aug) {
+ if (C == 'R')
+ return readByte();
+ if (C == 'z') {
+ skipLeb128();
+ continue;
+ }
+ if (C == 'P') {
+ skipAugP();
+ continue;
+ }
+ if (C == 'L') {
+ readByte();
+ continue;
+ }
+ failOn(Aug.data(), "unknown .eh_frame augmentation string: " + Aug);
+ }
+ return DW_EH_PE_absptr;
diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.h b/contrib/llvm/tools/lld/ELF/EhFrame.h
new file mode 100644
index 000000000000..5112891a911e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/EhFrame.h
@@ -0,0 +1,25 @@
+//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+namespace lld {
+namespace elf {
+class InputSectionBase;
+struct EhSectionPiece;
+size_t readEhRecordSize(InputSectionBase *S, size_t Off);
+uint8_t getFdeEncoding(EhSectionPiece *P);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Filesystem.cpp b/contrib/llvm/tools/lld/ELF/Filesystem.cpp
new file mode 100644
index 000000000000..5cf240eeca56
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Filesystem.cpp
@@ -0,0 +1,86 @@
+//===- Filesystem.cpp -----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains a few utility functions to handle files.
+#include "Filesystem.h"
+#include "Config.h"
+#include "lld/Common/Threads.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
+#include <unistd.h>
+#include <thread>
+using namespace llvm;
+using namespace lld;
+using namespace lld::elf;
+// Removes a given file asynchronously. This is a performance hack,
+// so remove this when operating systems are improved.
+// On Linux (and probably on other Unix-like systems), unlink(2) is a
+// noticeably slow system call. As of 2016, unlink takes 250
+// milliseconds to remove a 1 GB file on ext4 filesystem on my machine.
+// To create a new result file, we first remove existing file. So, if
+// you repeatedly link a 1 GB program in a regular compile-link-debug
+// cycle, every cycle wastes 250 milliseconds only to remove a file.
+// Since LLD can link a 1 GB binary in about 5 seconds, that waste
+// actually counts.
+// This function spawns a background thread to remove the file.
+// The calling thread returns almost immediately.
+void elf::unlinkAsync(StringRef Path) {
+// Removing a file is async on windows.
+#if defined(_WIN32)
+ sys::fs::remove(Path);
+ if (!ThreadsEnabled || !sys::fs::exists(Path) ||
+ !sys::fs::is_regular_file(Path))
+ return;
+ // We cannot just remove path from a different thread because we are now going
+ // to create path as a new file.
+ // Instead we open the file and unlink it on this thread. The unlink is fast
+ // since the open fd guarantees that it is not removing the last reference.
+ int FD;
+ std::error_code EC = sys::fs::openFileForRead(Path, FD);
+ sys::fs::remove(Path);
+ // close and therefore remove TempPath in background.
+ if (!EC)
+ std::thread([=] { ::close(FD); }).detach();
+// Simulate file creation to see if Path is writable.
+// Determining whether a file is writable or not is amazingly hard,
+// and after all the only reliable way of doing that is to actually
+// create a file. But we don't want to do that in this function
+// because LLD shouldn't update any file if it will end in a failure.
+// We also don't want to reimplement heuristics to determine if a
+// file is writable. So we'll let FileOutputBuffer do the work.
+// FileOutputBuffer doesn't touch a desitnation file until commit()
+// is called. We use that class without calling commit() to predict
+// if the given file is writable.
+std::error_code elf::tryCreateFile(StringRef Path) {
+ if (Path.empty())
+ return std::error_code();
+ if (Path == "-")
+ return std::error_code();
+ return errorToErrorCode(FileOutputBuffer::create(Path, 1).takeError());
diff --git a/contrib/llvm/tools/lld/ELF/Filesystem.h b/contrib/llvm/tools/lld/ELF/Filesystem.h
new file mode 100644
index 000000000000..987a74a6bcb6
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Filesystem.h
@@ -0,0 +1,23 @@
+//===- Filesystem.h ---------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include <system_error>
+namespace lld {
+namespace elf {
+void unlinkAsync(StringRef Path);
+std::error_code tryCreateFile(StringRef Path);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/ICF.cpp b/contrib/llvm/tools/lld/ELF/ICF.cpp
new file mode 100644
index 000000000000..d08ac73ded80
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/ICF.cpp
@@ -0,0 +1,512 @@
+//===- ICF.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// ICF is short for Identical Code Folding. This is a size optimization to
+// identify and merge two or more read-only sections (typically functions)
+// that happened to have the same contents. It usually reduces output size
+// by a few percent.
+// In ICF, two sections are considered identical if they have the same
+// section flags, section data, and relocations. Relocations are tricky,
+// because two relocations are considered the same if they have the same
+// relocation types, values, and if they point to the same sections *in
+// terms of ICF*.
+// Here is an example. If foo and bar defined below are compiled to the
+// same machine instructions, ICF can and should merge the two, although
+// their relocations point to each other.
+// void foo() { bar(); }
+// void bar() { foo(); }
+// If you merge the two, their relocations point to the same section and
+// thus you know they are mergeable, but how do you know they are
+// mergeable in the first place? This is not an easy problem to solve.
+// What we are doing in LLD is to partition sections into equivalence
+// classes. Sections in the same equivalence class when the algorithm
+// terminates are considered identical. Here are details:
+// 1. First, we partition sections using their hash values as keys. Hash
+// values contain section types, section contents and numbers of
+// relocations. During this step, relocation targets are not taken into
+// account. We just put sections that apparently differ into different
+// equivalence classes.
+// 2. Next, for each equivalence class, we visit sections to compare
+// relocation targets. Relocation targets are considered equivalent if
+// their targets are in the same equivalence class. Sections with
+// different relocation targets are put into different equivalence
+// clases.
+// 3. If we split an equivalence class in step 2, two relocations
+// previously target the same equivalence class may now target
+// different equivalence classes. Therefore, we repeat step 2 until a
+// convergence is obtained.
+// 4. For each equivalence class C, pick an arbitrary section in C, and
+// merge all the other sections in C with it.
+// For small programs, this algorithm needs 3-5 iterations. For large
+// programs such as Chromium, it takes more than 20 iterations.
+// This algorithm was mentioned as an "optimistic algorithm" in [1],
+// though gold implements a different algorithm than this.
+// We parallelize each step so that multiple threads can work on different
+// equivalence classes concurrently. That gave us a large performance
+// boost when applying ICF on large programs. For example, MSVC link.exe
+// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output
+// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a
+// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still
+// faster than MSVC or gold though.
+// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding
+// in the Gold Linker
+// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf
+#include "ICF.h"
+#include "Config.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Writer.h"
+#include "lld/Common/Threads.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/xxhash.h"
+#include <algorithm>
+#include <atomic>
+using namespace lld;
+using namespace lld::elf;
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+namespace {
+template <class ELFT> class ICF {
+ void run();
+ void segregate(size_t Begin, size_t End, bool Constant);
+ template <class RelTy>
+ bool constantEq(const InputSection *A, ArrayRef<RelTy> RelsA,
+ const InputSection *B, ArrayRef<RelTy> RelsB);
+ template <class RelTy>
+ bool variableEq(const InputSection *A, ArrayRef<RelTy> RelsA,
+ const InputSection *B, ArrayRef<RelTy> RelsB);
+ bool equalsConstant(const InputSection *A, const InputSection *B);
+ bool equalsVariable(const InputSection *A, const InputSection *B);
+ size_t findBoundary(size_t Begin, size_t End);
+ void forEachClassRange(size_t Begin, size_t End,
+ llvm::function_ref<void(size_t, size_t)> Fn);
+ void forEachClass(llvm::function_ref<void(size_t, size_t)> Fn);
+ std::vector<InputSection *> Sections;
+ // We repeat the main loop while `Repeat` is true.
+ std::atomic<bool> Repeat;
+ // The main loop counter.
+ int Cnt = 0;
+ // We have two locations for equivalence classes. On the first iteration
+ // of the main loop, Class[0] has a valid value, and Class[1] contains
+ // garbage. We read equivalence classes from slot 0 and write to slot 1.
+ // So, Class[0] represents the current class, and Class[1] represents
+ // the next class. On each iteration, we switch their roles and use them
+ // alternately.
+ //
+ // Why are we doing this? Recall that other threads may be working on
+ // other equivalence classes in parallel. They may read sections that we
+ // are updating. We cannot update equivalence classes in place because
+ // it breaks the invariance that all possibly-identical sections must be
+ // in the same equivalence class at any moment. In other words, the for
+ // loop to update equivalence classes is not atomic, and that is
+ // observable from other threads. By writing new classes to other
+ // places, we can keep the invariance.
+ //
+ // Below, `Current` has the index of the current class, and `Next` has
+ // the index of the next class. If threading is enabled, they are either
+ // (0, 1) or (1, 0).
+ //
+ // Note on single-thread: if that's the case, they are always (0, 0)
+ // because we can safely read the next class without worrying about race
+ // conditions. Using the same location makes this algorithm converge
+ // faster because it uses results of the same iteration earlier.
+ int Current = 0;
+ int Next = 0;
+// Returns true if section S is subject of ICF.
+static bool isEligible(InputSection *S) {
+ if (!S->Live || S->KeepUnique || !(S->Flags & SHF_ALLOC))
+ return false;
+ // Don't merge writable sections. .data.rel.ro sections are marked as writable
+ // but are semantically read-only.
+ if ((S->Flags & SHF_WRITE) && S->Name != ".data.rel.ro" &&
+ !S->Name.startswith(".data.rel.ro."))
+ return false;
+ // SHF_LINK_ORDER sections are ICF'd as a unit with their dependent sections,
+ // so we don't consider them for ICF individually.
+ if (S->Flags & SHF_LINK_ORDER)
+ return false;
+ // Don't merge synthetic sections as their Data member is not valid and empty.
+ // The Data member needs to be valid for ICF as it is used by ICF to determine
+ // the equality of section contents.
+ if (isa<SyntheticSection>(S))
+ return false;
+ // .init and .fini contains instructions that must be executed to initialize
+ // and finalize the process. They cannot and should not be merged.
+ if (S->Name == ".init" || S->Name == ".fini")
+ return false;
+ // A user program may enumerate sections named with a C identifier using
+ // __start_* and __stop_* symbols. We cannot ICF any such sections because
+ // that could change program semantics.
+ if (isValidCIdentifier(S->Name))
+ return false;
+ return true;
+// Split an equivalence class into smaller classes.
+template <class ELFT>
+void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) {
+ // This loop rearranges sections in [Begin, End) so that all sections
+ // that are equal in terms of equals{Constant,Variable} are contiguous
+ // in [Begin, End).
+ //
+ // The algorithm is quadratic in the worst case, but that is not an
+ // issue in practice because the number of the distinct sections in
+ // each range is usually very small.
+ while (Begin < End) {
+ // Divide [Begin, End) into two. Let Mid be the start index of the
+ // second group.
+ auto Bound =
+ std::stable_partition(Sections.begin() + Begin + 1,
+ Sections.begin() + End, [&](InputSection *S) {
+ if (Constant)
+ return equalsConstant(Sections[Begin], S);
+ return equalsVariable(Sections[Begin], S);
+ });
+ size_t Mid = Bound - Sections.begin();
+ // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by
+ // updating the sections in [Begin, Mid). We use Mid as an equivalence
+ // class ID because every group ends with a unique index.
+ for (size_t I = Begin; I < Mid; ++I)
+ Sections[I]->Class[Next] = Mid;
+ // If we created a group, we need to iterate the main loop again.
+ if (Mid != End)
+ Repeat = true;
+ Begin = Mid;
+ }
+// Compare two lists of relocations.
+template <class ELFT>
+template <class RelTy>
+bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA,
+ const InputSection *SecB, ArrayRef<RelTy> RB) {
+ for (size_t I = 0; I < RA.size(); ++I) {
+ if (RA[I].r_offset != RB[I].r_offset ||
+ RA[I].getType(Config->IsMips64EL) != RB[I].getType(Config->IsMips64EL))
+ return false;
+ uint64_t AddA = getAddend<ELFT>(RA[I]);
+ uint64_t AddB = getAddend<ELFT>(RB[I]);
+ Symbol &SA = SecA->template getFile<ELFT>()->getRelocTargetSym(RA[I]);
+ Symbol &SB = SecB->template getFile<ELFT>()->getRelocTargetSym(RB[I]);
+ if (&SA == &SB) {
+ if (AddA == AddB)
+ continue;
+ return false;
+ }
+ auto *DA = dyn_cast<Defined>(&SA);
+ auto *DB = dyn_cast<Defined>(&SB);
+ // Placeholder symbols generated by linker scripts look the same now but
+ // may have different values later.
+ if (!DA || !DB || DA->ScriptDefined || DB->ScriptDefined)
+ return false;
+ // Relocations referring to absolute symbols are constant-equal if their
+ // values are equal.
+ if (!DA->Section && !DB->Section && DA->Value + AddA == DB->Value + AddB)
+ continue;
+ if (!DA->Section || !DB->Section)
+ return false;
+ if (DA->Section->kind() != DB->Section->kind())
+ return false;
+ // Relocations referring to InputSections are constant-equal if their
+ // section offsets are equal.
+ if (isa<InputSection>(DA->Section)) {
+ if (DA->Value + AddA == DB->Value + AddB)
+ continue;
+ return false;
+ }
+ // Relocations referring to MergeInputSections are constant-equal if their
+ // offsets in the output section are equal.
+ auto *X = dyn_cast<MergeInputSection>(DA->Section);
+ if (!X)
+ return false;
+ auto *Y = cast<MergeInputSection>(DB->Section);
+ if (X->getParent() != Y->getParent())
+ return false;
+ uint64_t OffsetA =
+ SA.isSection() ? X->getOffset(AddA) : X->getOffset(DA->Value) + AddA;
+ uint64_t OffsetB =
+ SB.isSection() ? Y->getOffset(AddB) : Y->getOffset(DB->Value) + AddB;
+ if (OffsetA != OffsetB)
+ return false;
+ }
+ return true;
+// Compare "non-moving" part of two InputSections, namely everything
+// except relocation targets.
+template <class ELFT>
+bool ICF<ELFT>::equalsConstant(const InputSection *A, const InputSection *B) {
+ if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags ||
+ A->getSize() != B->getSize() || A->data() != B->data())
+ return false;
+ // If two sections have different output sections, we cannot merge them.
+ // FIXME: This doesn't do the right thing in the case where there is a linker
+ // script. We probably need to move output section assignment before ICF to
+ // get the correct behaviour here.
+ if (getOutputSectionName(A) != getOutputSectionName(B))
+ return false;
+ if (A->AreRelocsRela)
+ return constantEq(A, A->template relas<ELFT>(), B,
+ B->template relas<ELFT>());
+ return constantEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>());
+// Compare two lists of relocations. Returns true if all pairs of
+// relocations point to the same section in terms of ICF.
+template <class ELFT>
+template <class RelTy>
+bool ICF<ELFT>::variableEq(const InputSection *SecA, ArrayRef<RelTy> RA,
+ const InputSection *SecB, ArrayRef<RelTy> RB) {
+ assert(RA.size() == RB.size());
+ for (size_t I = 0; I < RA.size(); ++I) {
+ // The two sections must be identical.
+ Symbol &SA = SecA->template getFile<ELFT>()->getRelocTargetSym(RA[I]);
+ Symbol &SB = SecB->template getFile<ELFT>()->getRelocTargetSym(RB[I]);
+ if (&SA == &SB)
+ continue;
+ auto *DA = cast<Defined>(&SA);
+ auto *DB = cast<Defined>(&SB);
+ // We already dealt with absolute and non-InputSection symbols in
+ // constantEq, and for InputSections we have already checked everything
+ // except the equivalence class.
+ if (!DA->Section)
+ continue;
+ auto *X = dyn_cast<InputSection>(DA->Section);
+ if (!X)
+ continue;
+ auto *Y = cast<InputSection>(DB->Section);
+ // Ineligible sections are in the special equivalence class 0.
+ // They can never be the same in terms of the equivalence class.
+ if (X->Class[Current] == 0)
+ return false;
+ if (X->Class[Current] != Y->Class[Current])
+ return false;
+ };
+ return true;
+// Compare "moving" part of two InputSections, namely relocation targets.
+template <class ELFT>
+bool ICF<ELFT>::equalsVariable(const InputSection *A, const InputSection *B) {
+ if (A->AreRelocsRela)
+ return variableEq(A, A->template relas<ELFT>(), B,
+ B->template relas<ELFT>());
+ return variableEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>());
+template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) {
+ uint32_t Class = Sections[Begin]->Class[Current];
+ for (size_t I = Begin + 1; I < End; ++I)
+ if (Class != Sections[I]->Class[Current])
+ return I;
+ return End;
+// Sections in the same equivalence class are contiguous in Sections
+// vector. Therefore, Sections vector can be considered as contiguous
+// groups of sections, grouped by the class.
+// This function calls Fn on every group within [Begin, End).
+template <class ELFT>
+void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End,
+ llvm::function_ref<void(size_t, size_t)> Fn) {
+ while (Begin < End) {
+ size_t Mid = findBoundary(Begin, End);
+ Fn(Begin, Mid);
+ Begin = Mid;
+ }
+// Call Fn on each equivalence class.
+template <class ELFT>
+void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) {
+ // If threading is disabled or the number of sections are
+ // too small to use threading, call Fn sequentially.
+ if (!ThreadsEnabled || Sections.size() < 1024) {
+ forEachClassRange(0, Sections.size(), Fn);
+ ++Cnt;
+ return;
+ }
+ Current = Cnt % 2;
+ Next = (Cnt + 1) % 2;
+ // Shard into non-overlapping intervals, and call Fn in parallel.
+ // The sharding must be completed before any calls to Fn are made
+ // so that Fn can modify the Chunks in its shard without causing data
+ // races.
+ const size_t NumShards = 256;
+ size_t Step = Sections.size() / NumShards;
+ size_t Boundaries[NumShards + 1];
+ Boundaries[0] = 0;
+ Boundaries[NumShards] = Sections.size();
+ parallelForEachN(1, NumShards, [&](size_t I) {
+ Boundaries[I] = findBoundary((I - 1) * Step, Sections.size());
+ });
+ parallelForEachN(1, NumShards + 1, [&](size_t I) {
+ if (Boundaries[I - 1] < Boundaries[I])
+ forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
+ });
+ ++Cnt;
+// Combine the hashes of the sections referenced by the given section into its
+// hash.
+template <class ELFT, class RelTy>
+static void combineRelocHashes(unsigned Cnt, InputSection *IS,
+ ArrayRef<RelTy> Rels) {
+ uint32_t Hash = IS->Class[Cnt % 2];
+ for (RelTy Rel : Rels) {
+ Symbol &S = IS->template getFile<ELFT>()->getRelocTargetSym(Rel);
+ if (auto *D = dyn_cast<Defined>(&S))
+ if (auto *RelSec = dyn_cast_or_null<InputSection>(D->Section))
+ Hash += RelSec->Class[Cnt % 2];
+ }
+ // Set MSB to 1 to avoid collisions with non-hash IDs.
+ IS->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
+static void print(const Twine &S) {
+ if (Config->PrintIcfSections)
+ message(S);
+// The main function of ICF.
+template <class ELFT> void ICF<ELFT>::run() {
+ // Collect sections to merge.
+ for (InputSectionBase *Sec : InputSections)
+ if (auto *S = dyn_cast<InputSection>(Sec))
+ if (isEligible(S))
+ Sections.push_back(S);
+ // Initially, we use hash values to partition sections.
+ parallelForEach(Sections, [&](InputSection *S) {
+ S->Class[0] = xxHash64(S->data());
+ });
+ for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
+ parallelForEach(Sections, [&](InputSection *S) {
+ if (S->AreRelocsRela)
+ combineRelocHashes<ELFT>(Cnt, S, S->template relas<ELFT>());
+ else
+ combineRelocHashes<ELFT>(Cnt, S, S->template rels<ELFT>());
+ });
+ }
+ // From now on, sections in Sections vector are ordered so that sections
+ // in the same equivalence class are consecutive in the vector.
+ std::stable_sort(Sections.begin(), Sections.end(),
+ [](InputSection *A, InputSection *B) {
+ return A->Class[0] < B->Class[0];
+ });
+ // Compare static contents and assign unique IDs for each static content.
+ forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); });
+ // Split groups by comparing relocations until convergence is obtained.
+ do {
+ Repeat = false;
+ forEachClass(
+ [&](size_t Begin, size_t End) { segregate(Begin, End, false); });
+ } while (Repeat);
+ log("ICF needed " + Twine(Cnt) + " iterations");
+ // Merge sections by the equivalence class.
+ forEachClassRange(0, Sections.size(), [&](size_t Begin, size_t End) {
+ if (End - Begin == 1)
+ return;
+ print("selected section " + toString(Sections[Begin]));
+ for (size_t I = Begin + 1; I < End; ++I) {
+ print(" removing identical section " + toString(Sections[I]));
+ Sections[Begin]->replace(Sections[I]);
+ // At this point we know sections merged are fully identical and hence
+ // we want to remove duplicate implicit dependencies such as link order
+ // and relocation sections.
+ for (InputSection *IS : Sections[I]->DependentSections)
+ IS->Live = false;
+ }
+ });
+// ICF entry point function.
+template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); }
+template void elf::doIcf<ELF32LE>();
+template void elf::doIcf<ELF32BE>();
+template void elf::doIcf<ELF64LE>();
+template void elf::doIcf<ELF64BE>();
diff --git a/contrib/llvm/tools/lld/ELF/ICF.h b/contrib/llvm/tools/lld/ELF/ICF.h
new file mode 100644
index 000000000000..a6c8636ead6d
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/ICF.h
@@ -0,0 +1,21 @@
+//===- ICF.h --------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_ELF_ICF_H
+#define LLD_ELF_ICF_H
+namespace lld {
+namespace elf {
+template <class ELFT> void doIcf();
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp
new file mode 100644
index 000000000000..bc7e61072e64
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp
@@ -0,0 +1,1345 @@
+//===- InputFiles.cpp -----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "LinkerScript.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/ARMAttributeParser.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/TarWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::sys;
+using namespace llvm::sys::fs;
+using namespace lld;
+using namespace lld::elf;
+bool InputFile::IsInGroup;
+uint32_t InputFile::NextGroupId;
+std::vector<BinaryFile *> elf::BinaryFiles;
+std::vector<BitcodeFile *> elf::BitcodeFiles;
+std::vector<LazyObjFile *> elf::LazyObjFiles;
+std::vector<InputFile *> elf::ObjectFiles;
+std::vector<InputFile *> elf::SharedFiles;
+std::unique_ptr<TarWriter> elf::Tar;
+InputFile::InputFile(Kind K, MemoryBufferRef M)
+ : MB(M), GroupId(NextGroupId), FileKind(K) {
+ // All files within the same --{start,end}-group get the same group ID.
+ // Otherwise, a new file will get a new group ID.
+ if (!IsInGroup)
+ ++NextGroupId;
+Optional<MemoryBufferRef> elf::readFile(StringRef Path) {
+ // The --chroot option changes our virtual root directory.
+ // This is useful when you are dealing with files created by --reproduce.
+ if (!Config->Chroot.empty() && Path.startswith("/"))
+ Path = Saver.save(Config->Chroot + Path);
+ log(Path);
+ auto MBOrErr = MemoryBuffer::getFile(Path, -1, false);
+ if (auto EC = MBOrErr.getError()) {
+ error("cannot open " + Path + ": " + EC.message());
+ return None;
+ }
+ std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
+ MemoryBufferRef MBRef = MB->getMemBufferRef();
+ make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership
+ if (Tar)
+ Tar->append(relativeToRoot(Path), MBRef.getBuffer());
+ return MBRef;
+// Concatenates arguments to construct a string representing an error location.
+static std::string createFileLineMsg(StringRef Path, unsigned Line) {
+ std::string Filename = path::filename(Path);
+ std::string Lineno = ":" + std::to_string(Line);
+ if (Filename == Path)
+ return Filename + Lineno;
+ return Filename + Lineno + " (" + Path.str() + Lineno + ")";
+template <class ELFT>
+static std::string getSrcMsgAux(ObjFile<ELFT> &File, const Symbol &Sym,
+ InputSectionBase &Sec, uint64_t Offset) {
+ // In DWARF, functions and variables are stored to different places.
+ // First, lookup a function for a given offset.
+ if (Optional<DILineInfo> Info = File.getDILineInfo(&Sec, Offset))
+ return createFileLineMsg(Info->FileName, Info->Line);
+ // If it failed, lookup again as a variable.
+ if (Optional<std::pair<std::string, unsigned>> FileLine =
+ File.getVariableLoc(Sym.getName()))
+ return createFileLineMsg(FileLine->first, FileLine->second);
+ // File.SourceFile contains STT_FILE symbol, and that is a last resort.
+ return File.SourceFile;
+std::string InputFile::getSrcMsg(const Symbol &Sym, InputSectionBase &Sec,
+ uint64_t Offset) {
+ if (kind() != ObjKind)
+ return "";
+ switch (Config->EKind) {
+ default:
+ llvm_unreachable("Invalid kind");
+ case ELF32LEKind:
+ return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), Sym, Sec, Offset);
+ case ELF32BEKind:
+ return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), Sym, Sec, Offset);
+ case ELF64LEKind:
+ return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), Sym, Sec, Offset);
+ case ELF64BEKind:
+ return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), Sym, Sec, Offset);
+ }
+template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
+ Dwarf = llvm::make_unique<DWARFContext>(make_unique<LLDDwarfObj<ELFT>>(this));
+ for (std::unique_ptr<DWARFUnit> &CU : Dwarf->compile_units()) {
+ auto Report = [](Error Err) {
+ handleAllErrors(std::move(Err),
+ [](ErrorInfoBase &Info) { warn(Info.message()); });
+ };
+ Expected<const DWARFDebugLine::LineTable *> ExpectedLT =
+ Dwarf->getLineTableForUnit(CU.get(), Report);
+ const DWARFDebugLine::LineTable *LT = nullptr;
+ if (ExpectedLT)
+ LT = *ExpectedLT;
+ else
+ Report(ExpectedLT.takeError());
+ if (!LT)
+ continue;
+ LineTables.push_back(LT);
+ // Loop over variable records and insert them to VariableLoc.
+ for (const auto &Entry : CU->dies()) {
+ DWARFDie Die(CU.get(), &Entry);
+ // Skip all tags that are not variables.
+ if (Die.getTag() != dwarf::DW_TAG_variable)
+ continue;
+ // Skip if a local variable because we don't need them for generating
+ // error messages. In general, only non-local symbols can fail to be
+ // linked.
+ if (!dwarf::toUnsigned(Die.find(dwarf::DW_AT_external), 0))
+ continue;
+ // Get the source filename index for the variable.
+ unsigned File = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_file), 0);
+ if (!LT->hasFileAtIndex(File))
+ continue;
+ // Get the line number on which the variable is declared.
+ unsigned Line = dwarf::toUnsigned(Die.find(dwarf::DW_AT_decl_line), 0);
+ // Here we want to take the variable name to add it into VariableLoc.
+ // Variable can have regular and linkage name associated. At first, we try
+ // to get linkage name as it can be different, for example when we have
+ // two variables in different namespaces of the same object. Use common
+ // name otherwise, but handle the case when it also absent in case if the
+ // input object file lacks some debug info.
+ StringRef Name =
+ dwarf::toString(Die.find(dwarf::DW_AT_linkage_name),
+ dwarf::toString(Die.find(dwarf::DW_AT_name), ""));
+ if (!Name.empty())
+ VariableLoc.insert({Name, {LT, File, Line}});
+ }
+ }
+// Returns the pair of file name and line number describing location of data
+// object (variable, array, etc) definition.
+template <class ELFT>
+Optional<std::pair<std::string, unsigned>>
+ObjFile<ELFT>::getVariableLoc(StringRef Name) {
+ llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
+ // Return if we have no debug information about data object.
+ auto It = VariableLoc.find(Name);
+ if (It == VariableLoc.end())
+ return None;
+ // Take file name string from line table.
+ std::string FileName;
+ if (!It->second.LT->getFileNameByIndex(
+ It->second.File, nullptr,
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FileName))
+ return None;
+ return std::make_pair(FileName, It->second.Line);
+// Returns source line information for a given offset
+// using DWARF debug info.
+template <class ELFT>
+Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *S,
+ uint64_t Offset) {
+ llvm::call_once(InitDwarfLine, [this]() { initializeDwarf(); });
+ // Use fake address calcuated by adding section file offset and offset in
+ // section. See comments for ObjectInfo class.
+ DILineInfo Info;
+ for (const llvm::DWARFDebugLine::LineTable *LT : LineTables)
+ if (LT->getFileLineInfoForAddress(
+ S->getOffsetInFile() + Offset, nullptr,
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info))
+ return Info;
+ return None;
+// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
+std::string lld::toString(const InputFile *F) {
+ if (!F)
+ return "<internal>";
+ if (F->ToStringCache.empty()) {
+ if (F->ArchiveName.empty())
+ F->ToStringCache = F->getName();
+ else
+ F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str();
+ }
+ return F->ToStringCache;
+template <class ELFT>
+ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {
+ if (ELFT::TargetEndianness == support::little)
+ EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind;
+ else
+ EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind;
+ EMachine = getObj().getHeader()->e_machine;
+ OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI];
+template <class ELFT>
+typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalELFSyms() {
+ return makeArrayRef(ELFSyms.begin() + FirstGlobal, ELFSyms.end());
+template <class ELFT>
+uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const {
+ return CHECK(getObj().getSectionIndex(&Sym, ELFSyms, SymtabSHNDX), this);
+template <class ELFT>
+void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections,
+ const Elf_Shdr *Symtab) {
+ FirstGlobal = Symtab->sh_info;
+ ELFSyms = CHECK(getObj().symbols(Symtab), this);
+ if (FirstGlobal == 0 || FirstGlobal > ELFSyms.size())
+ fatal(toString(this) + ": invalid sh_info in symbol table");
+ StringTable =
+ CHECK(getObj().getStringTableForSymtab(*Symtab, Sections), this);
+template <class ELFT>
+ObjFile<ELFT>::ObjFile(MemoryBufferRef M, StringRef ArchiveName)
+ : ELFFileBase<ELFT>(Base::ObjKind, M) {
+ this->ArchiveName = ArchiveName;
+template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() {
+ if (this->Symbols.empty())
+ return {};
+ return makeArrayRef(this->Symbols).slice(1, this->FirstGlobal - 1);
+template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() {
+ return makeArrayRef(this->Symbols).slice(this->FirstGlobal);
+template <class ELFT>
+void ObjFile<ELFT>::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
+ // Read a section table. JustSymbols is usually false.
+ if (this->JustSymbols)
+ initializeJustSymbols();
+ else
+ initializeSections(ComdatGroups);
+ // Read a symbol table.
+ initializeSymbols();
+// Sections with SHT_GROUP and comdat bits define comdat section groups.
+// They are identified and deduplicated by group name. This function
+// returns a group name.
+template <class ELFT>
+StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
+ const Elf_Shdr &Sec) {
+ // Group signatures are stored as symbol names in object files.
+ // sh_info contains a symbol index, so we fetch a symbol and read its name.
+ if (this->ELFSyms.empty())
+ this->initSymtab(
+ Sections, CHECK(object::getSection<ELFT>(Sections, Sec.sh_link), this));
+ const Elf_Sym *Sym =
+ CHECK(object::getSymbol<ELFT>(this->ELFSyms, Sec.sh_info), this);
+ StringRef Signature = CHECK(Sym->getName(this->StringTable), this);
+ // As a special case, if a symbol is a section symbol and has no name,
+ // we use a section name as a signature.
+ //
+ // Such SHT_GROUP sections are invalid from the perspective of the ELF
+ // standard, but GNU gold 1.14 (the newest version as of July 2017) or
+ // older produce such sections as outputs for the -r option, so we need
+ // a bug-compatibility.
+ if (Signature.empty() && Sym->getType() == STT_SECTION)
+ return getSectionName(Sec);
+ return Signature;
+template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
+ // On a regular link we don't merge sections if -O0 (default is -O1). This
+ // sometimes makes the linker significantly faster, although the output will
+ // be bigger.
+ //
+ // Doing the same for -r would create a problem as it would combine sections
+ // with different sh_entsize. One option would be to just copy every SHF_MERGE
+ // section as is to the output. While this would produce a valid ELF file with
+ // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
+ // they see two .debug_str. We could have separate logic for combining
+ // SHF_MERGE sections based both on their name and sh_entsize, but that seems
+ // to be more trouble than it is worth. Instead, we just use the regular (-O1)
+ // logic for -r.
+ if (Config->Optimize == 0 && !Config->Relocatable)
+ return false;
+ // A mergeable section with size 0 is useless because they don't have
+ // any data to merge. A mergeable string section with size 0 can be
+ // argued as invalid because it doesn't end with a null character.
+ // We'll avoid a mess by handling them as if they were non-mergeable.
+ if (Sec.sh_size == 0)
+ return false;
+ // Check for sh_entsize. The ELF spec is not clear about the zero
+ // sh_entsize. It says that "the member [sh_entsize] contains 0 if
+ // the section does not hold a table of fixed-size entries". We know
+ // that Rust 1.13 produces a string mergeable section with a zero
+ // sh_entsize. Here we just accept it rather than being picky about it.
+ uint64_t EntSize = Sec.sh_entsize;
+ if (EntSize == 0)
+ return false;
+ if (Sec.sh_size % EntSize)
+ fatal(toString(this) +
+ ": SHF_MERGE section size must be a multiple of sh_entsize");
+ uint64_t Flags = Sec.sh_flags;
+ if (!(Flags & SHF_MERGE))
+ return false;
+ if (Flags & SHF_WRITE)
+ fatal(toString(this) + ": writable SHF_MERGE section is not supported");
+ return true;
+// This is for --just-symbols.
+// --just-symbols is a very minor feature that allows you to link your
+// output against other existing program, so that if you load both your
+// program and the other program into memory, your output can refer the
+// other program's symbols.
+// When the option is given, we link "just symbols". The section table is
+// initialized with null pointers.
+template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
+ ArrayRef<Elf_Shdr> ObjSections = CHECK(this->getObj().sections(), this);
+ this->Sections.resize(ObjSections.size());
+ for (const Elf_Shdr &Sec : ObjSections) {
+ if (Sec.sh_type != SHT_SYMTAB)
+ continue;
+ this->initSymtab(ObjSections, &Sec);
+ return;
+ }
+template <class ELFT>
+void ObjFile<ELFT>::initializeSections(
+ DenseSet<CachedHashStringRef> &ComdatGroups) {
+ const ELFFile<ELFT> &Obj = this->getObj();
+ ArrayRef<Elf_Shdr> ObjSections = CHECK(Obj.sections(), this);
+ uint64_t Size = ObjSections.size();
+ this->Sections.resize(Size);
+ this->SectionStringTable =
+ CHECK(Obj.getSectionStringTable(ObjSections), this);
+ for (size_t I = 0, E = ObjSections.size(); I < E; I++) {
+ if (this->Sections[I] == &InputSection::Discarded)
+ continue;
+ const Elf_Shdr &Sec = ObjSections[I];
+ if (Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
+ CGProfile = check(
+ this->getObj().template getSectionContentsAsArray<Elf_CGProfile>(
+ &Sec));
+ // SHF_EXCLUDE'ed sections are discarded by the linker. However,
+ // if -r is given, we'll let the final link discard such sections.
+ // This is compatible with GNU.
+ if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) {
+ if (Sec.sh_type == SHT_LLVM_ADDRSIG) {
+ // We ignore the address-significance table if we know that the object
+ // file was created by objcopy or ld -r. This is because these tools
+ // will reorder the symbols in the symbol table, invalidating the data
+ // in the address-significance table, which refers to symbols by index.
+ if (Sec.sh_link != 0)
+ this->AddrsigSec = &Sec;
+ else if (Config->ICF == ICFLevel::Safe)
+ warn(toString(this) + ": --icf=safe is incompatible with object "
+ "files created using objcopy or ld -r");
+ }
+ this->Sections[I] = &InputSection::Discarded;
+ continue;
+ }
+ switch (Sec.sh_type) {
+ case SHT_GROUP: {
+ // De-duplicate section groups by their signatures.
+ StringRef Signature = getShtGroupSignature(ObjSections, Sec);
+ this->Sections[I] = &InputSection::Discarded;
+ ArrayRef<Elf_Word> Entries =
+ CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
+ if (Entries.empty())
+ fatal(toString(this) + ": empty SHT_GROUP");
+ // The first word of a SHT_GROUP section contains flags. Currently,
+ // the standard defines only "GRP_COMDAT" flag for the COMDAT group.
+ // An group with the empty flag doesn't define anything; such sections
+ // are just skipped.
+ if (Entries[0] == 0)
+ continue;
+ if (Entries[0] != GRP_COMDAT)
+ fatal(toString(this) + ": unsupported SHT_GROUP format");
+ bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
+ if (IsNew) {
+ if (Config->Relocatable)
+ this->Sections[I] = createInputSection(Sec);
+ continue;
+ }
+ // Otherwise, discard group members.
+ for (uint32_t SecIndex : Entries.slice(1)) {
+ if (SecIndex >= Size)
+ fatal(toString(this) +
+ ": invalid section index in group: " + Twine(SecIndex));
+ this->Sections[SecIndex] = &InputSection::Discarded;
+ }
+ break;
+ }
+ case SHT_SYMTAB:
+ this->initSymtab(ObjSections, &Sec);
+ break;
+ this->SymtabSHNDX = CHECK(Obj.getSHNDXTable(Sec, ObjSections), this);
+ break;
+ case SHT_STRTAB:
+ case SHT_NULL:
+ break;
+ default:
+ this->Sections[I] = createInputSection(Sec);
+ }
+ // .ARM.exidx sections have a reverse dependency on the InputSection they
+ // have a SHF_LINK_ORDER dependency, this is identified by the sh_link.
+ if (Sec.sh_flags & SHF_LINK_ORDER) {
+ InputSectionBase *LinkSec = nullptr;
+ if (Sec.sh_link < this->Sections.size())
+ LinkSec = this->Sections[Sec.sh_link];
+ if (!LinkSec)
+ fatal(toString(this) +
+ ": invalid sh_link index: " + Twine(Sec.sh_link));
+ InputSection *IS = cast<InputSection>(this->Sections[I]);
+ LinkSec->DependentSections.push_back(IS);
+ if (!isa<InputSection>(LinkSec))
+ error("a section " + IS->Name +
+ " with SHF_LINK_ORDER should not refer a non-regular "
+ "section: " +
+ toString(LinkSec));
+ }
+ }
+// flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
+// the input objects have been compiled.
+static void updateARMVFPArgs(const ARMAttributeParser &Attributes,
+ const InputFile *F) {
+ if (!Attributes.hasAttribute(ARMBuildAttrs::ABI_VFP_args))
+ // If an ABI tag isn't present then it is implicitly given the value of 0
+ // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
+ // including some in glibc that don't use FP args (and should have value 3)
+ // don't have the attribute so we do not consider an implicit value of 0
+ // as a clash.
+ return;
+ unsigned VFPArgs = Attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
+ ARMVFPArgKind Arg;
+ switch (VFPArgs) {
+ case ARMBuildAttrs::BaseAAPCS:
+ Arg = ARMVFPArgKind::Base;
+ break;
+ case ARMBuildAttrs::HardFPAAPCS:
+ Arg = ARMVFPArgKind::VFP;
+ break;
+ case ARMBuildAttrs::ToolChainFPPCS:
+ // Tool chain specific convention that conforms to neither AAPCS variant.
+ Arg = ARMVFPArgKind::ToolChain;
+ break;
+ case ARMBuildAttrs::CompatibleFPAAPCS:
+ // Object compatible with all conventions.
+ return;
+ default:
+ error(toString(F) + ": unknown Tag_ABI_VFP_args value: " + Twine(VFPArgs));
+ return;
+ }
+ // Follow ld.bfd and error if there is a mix of calling conventions.
+ if (Config->ARMVFPArgs != Arg && Config->ARMVFPArgs != ARMVFPArgKind::Default)
+ error(toString(F) + ": incompatible Tag_ABI_VFP_args");
+ else
+ Config->ARMVFPArgs = Arg;
+// The ARM support in lld makes some use of instructions that are not available
+// on all ARM architectures. Namely:
+// - Use of BLX instruction for interworking between ARM and Thumb state.
+// - Use of the extended Thumb branch encoding in relocation.
+// - Use of the MOVT/MOVW instructions in Thumb Thunks.
+// The ARM Attributes section contains information about the architecture chosen
+// at compile time. We follow the convention that if at least one input object
+// is compiled with an architecture that supports these features then lld is
+// permitted to use them.
+static void updateSupportedARMFeatures(const ARMAttributeParser &Attributes) {
+ if (!Attributes.hasAttribute(ARMBuildAttrs::CPU_arch))
+ return;
+ auto Arch = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
+ switch (Arch) {
+ case ARMBuildAttrs::Pre_v4:
+ case ARMBuildAttrs::v4:
+ case ARMBuildAttrs::v4T:
+ // Architectures prior to v5 do not support BLX instruction
+ break;
+ case ARMBuildAttrs::v5T:
+ case ARMBuildAttrs::v5TE:
+ case ARMBuildAttrs::v5TEJ:
+ case ARMBuildAttrs::v6:
+ case ARMBuildAttrs::v6KZ:
+ case ARMBuildAttrs::v6K:
+ Config->ARMHasBlx = true;
+ // Architectures used in pre-Cortex processors do not support
+ // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
+ // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
+ break;
+ default:
+ // All other Architectures have BLX and extended branch encoding
+ Config->ARMHasBlx = true;
+ Config->ARMJ1J2BranchEncoding = true;
+ if (Arch != ARMBuildAttrs::v6_M && Arch != ARMBuildAttrs::v6S_M)
+ // All Architectures used in Cortex processors with the exception
+ // of v6-M and v6S-M have the MOVT and MOVW instructions.
+ Config->ARMHasMovtMovw = true;
+ break;
+ }
+template <class ELFT>
+InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
+ uint32_t Idx = Sec.sh_info;
+ if (Idx >= this->Sections.size())
+ fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx));
+ InputSectionBase *Target = this->Sections[Idx];
+ // Strictly speaking, a relocation section must be included in the
+ // group of the section it relocates. However, LLVM 3.3 and earlier
+ // would fail to do so, so we gracefully handle that case.
+ if (Target == &InputSection::Discarded)
+ return nullptr;
+ if (!Target)
+ fatal(toString(this) + ": unsupported relocation reference");
+ return Target;
+// Create a regular InputSection class that has the same contents
+// as a given section.
+static InputSection *toRegularSection(MergeInputSection *Sec) {
+ return make<InputSection>(Sec->File, Sec->Flags, Sec->Type, Sec->Alignment,
+ Sec->data(), Sec->Name);
+template <class ELFT>
+InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
+ StringRef Name = getSectionName(Sec);
+ switch (Sec.sh_type) {
+ if (Config->EMachine != EM_ARM)
+ break;
+ ARMAttributeParser Attributes;
+ ArrayRef<uint8_t> Contents = check(this->getObj().getSectionContents(&Sec));
+ Attributes.Parse(Contents, /*isLittle*/ Config->EKind == ELF32LEKind);
+ updateSupportedARMFeatures(Attributes);
+ updateARMVFPArgs(Attributes, this);
+ // FIXME: Retain the first attribute section we see. The eglibc ARM
+ // dynamic loaders require the presence of an attribute section for dlopen
+ // to work. In a full implementation we would merge all attribute sections.
+ if (In.ARMAttributes == nullptr) {
+ In.ARMAttributes = make<InputSection>(*this, Sec, Name);
+ return In.ARMAttributes;
+ }
+ return &InputSection::Discarded;
+ }
+ case SHT_RELA:
+ case SHT_REL: {
+ // Find a relocation target section and associate this section with that.
+ // Target may have been discarded if it is in a different section group
+ // and the group is discarded, even though it's a violation of the
+ // spec. We handle that situation gracefully by discarding dangling
+ // relocation sections.
+ InputSectionBase *Target = getRelocTarget(Sec);
+ if (!Target)
+ return nullptr;
+ // This section contains relocation information.
+ // If -r is given, we do not interpret or apply relocation
+ // but just copy relocation sections to output.
+ if (Config->Relocatable) {
+ InputSection *RelocSec = make<InputSection>(*this, Sec, Name);
+ // We want to add a dependency to target, similar like we do for
+ // -emit-relocs below. This is useful for the case when linker script
+ // contains the "/DISCARD/". It is perhaps uncommon to use a script with
+ // -r, but we faced it in the Linux kernel and have to handle such case
+ // and not to crash.
+ Target->DependentSections.push_back(RelocSec);
+ return RelocSec;
+ }
+ if (Target->FirstRelocation)
+ fatal(toString(this) +
+ ": multiple relocation sections to one section are not supported");
+ // ELF spec allows mergeable sections with relocations, but they are
+ // rare, and it is in practice hard to merge such sections by contents,
+ // because applying relocations at end of linking changes section
+ // contents. So, we simply handle such sections as non-mergeable ones.
+ // Degrading like this is acceptable because section merging is optional.
+ if (auto *MS = dyn_cast<MergeInputSection>(Target)) {
+ Target = toRegularSection(MS);
+ this->Sections[Sec.sh_info] = Target;
+ }
+ if (Sec.sh_type == SHT_RELA) {
+ ArrayRef<Elf_Rela> Rels = CHECK(this->getObj().relas(&Sec), this);
+ Target->FirstRelocation = Rels.begin();
+ Target->NumRelocations = Rels.size();
+ Target->AreRelocsRela = true;
+ } else {
+ ArrayRef<Elf_Rel> Rels = CHECK(this->getObj().rels(&Sec), this);
+ Target->FirstRelocation = Rels.begin();
+ Target->NumRelocations = Rels.size();
+ Target->AreRelocsRela = false;
+ }
+ assert(isUInt<31>(Target->NumRelocations));
+ // Relocation sections processed by the linker are usually removed
+ // from the output, so returning `nullptr` for the normal case.
+ // However, if -emit-relocs is given, we need to leave them in the output.
+ // (Some post link analysis tools need this information.)
+ if (Config->EmitRelocs) {
+ InputSection *RelocSec = make<InputSection>(*this, Sec, Name);
+ // We will not emit relocation section if target was discarded.
+ Target->DependentSections.push_back(RelocSec);
+ return RelocSec;
+ }
+ return nullptr;
+ }
+ }
+ // The GNU linker uses .note.GNU-stack section as a marker indicating
+ // that the code in the object file does not expect that the stack is
+ // executable (in terms of NX bit). If all input files have the marker,
+ // the GNU linker adds a PT_GNU_STACK segment to tells the loader to
+ // make the stack non-executable. Most object files have this section as
+ // of 2017.
+ //
+ // But making the stack non-executable is a norm today for security
+ // reasons. Failure to do so may result in a serious security issue.
+ // Therefore, we make LLD always add PT_GNU_STACK unless it is
+ // explicitly told to do otherwise (by -z execstack). Because the stack
+ // executable-ness is controlled solely by command line options,
+ // .note.GNU-stack sections are simply ignored.
+ if (Name == ".note.GNU-stack")
+ return &InputSection::Discarded;
+ // Split stacks is a feature to support a discontiguous stack,
+ // commonly used in the programming language Go. For the details,
+ // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
+ // for split stack will include a .note.GNU-split-stack section.
+ if (Name == ".note.GNU-split-stack") {
+ if (Config->Relocatable) {
+ error("cannot mix split-stack and non-split-stack in a relocatable link");
+ return &InputSection::Discarded;
+ }
+ this->SplitStack = true;
+ return &InputSection::Discarded;
+ }
+ // An object file cmpiled for split stack, but where some of the
+ // functions were compiled with the no_split_stack_attribute will
+ // include a .note.GNU-no-split-stack section.
+ if (Name == ".note.GNU-no-split-stack") {
+ this->SomeNoSplitStack = true;
+ return &InputSection::Discarded;
+ }
+ // The linkonce feature is a sort of proto-comdat. Some glibc i386 object
+ // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce
+ // sections. Drop those sections to avoid duplicate symbol errors.
+ // FIXME: This is glibc PR20543, we should remove this hack once that has been
+ // fixed for a while.
+ if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" ||
+ Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx")
+ return &InputSection::Discarded;
+ // If we are creating a new .build-id section, strip existing .build-id
+ // sections so that the output won't have more than one .build-id.
+ // This is not usually a problem because input object files normally don't
+ // have .build-id sections, but you can create such files by
+ // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it.
+ if (Name == ".note.gnu.build-id" && Config->BuildId != BuildIdKind::None)
+ return &InputSection::Discarded;
+ // The linker merges EH (exception handling) frames and creates a
+ // .eh_frame_hdr section for runtime. So we handle them with a special
+ // class. For relocatable outputs, they are just passed through.
+ if (Name == ".eh_frame" && !Config->Relocatable)
+ return make<EhInputSection>(*this, Sec, Name);
+ if (shouldMerge(Sec))
+ return make<MergeInputSection>(*this, Sec, Name);
+ return make<InputSection>(*this, Sec, Name);
+template <class ELFT>
+StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
+ return CHECK(this->getObj().getSectionName(&Sec, SectionStringTable), this);
+template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
+ this->Symbols.reserve(this->ELFSyms.size());
+ for (const Elf_Sym &Sym : this->ELFSyms)
+ this->Symbols.push_back(createSymbol(&Sym));
+template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
+ int Binding = Sym->getBinding();
+ uint32_t SecIdx = this->getSectionIndex(*Sym);
+ if (SecIdx >= this->Sections.size())
+ fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
+ InputSectionBase *Sec = this->Sections[SecIdx];
+ uint8_t StOther = Sym->st_other;
+ uint8_t Type = Sym->getType();
+ uint64_t Value = Sym->st_value;
+ uint64_t Size = Sym->st_size;
+ if (Binding == STB_LOCAL) {
+ if (Sym->getType() == STT_FILE)
+ SourceFile = CHECK(Sym->getName(this->StringTable), this);
+ if (this->StringTable.size() <= Sym->st_name)
+ fatal(toString(this) + ": invalid symbol name offset");
+ StringRefZ Name = this->StringTable.data() + Sym->st_name;
+ if (Sym->st_shndx == SHN_UNDEF)
+ return make<Undefined>(this, Name, Binding, StOther, Type);
+ return make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
+ }
+ StringRef Name = CHECK(Sym->getName(this->StringTable), this);
+ switch (Sym->st_shndx) {
+ case SHN_UNDEF:
+ return Symtab->addUndefined<ELFT>(Name, Binding, StOther, Type,
+ /*CanOmitFromDynSym=*/false, this);
+ case SHN_COMMON:
+ if (Value == 0 || Value >= UINT32_MAX)
+ fatal(toString(this) + ": common symbol '" + Name +
+ "' has invalid alignment: " + Twine(Value));
+ return Symtab->addCommon(Name, Size, Value, Binding, StOther, Type, *this);
+ }
+ switch (Binding) {
+ default:
+ fatal(toString(this) + ": unexpected binding: " + Twine(Binding));
+ case STB_GLOBAL:
+ case STB_WEAK:
+ if (Sec == &InputSection::Discarded)
+ return Symtab->addUndefined<ELFT>(Name, Binding, StOther, Type,
+ /*CanOmitFromDynSym=*/false, this);
+ return Symtab->addDefined(Name, StOther, Type, Value, Size, Binding, Sec,
+ this);
+ }
+ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&File)
+ : InputFile(ArchiveKind, File->getMemoryBufferRef()),
+ File(std::move(File)) {}
+template <class ELFT> void ArchiveFile::parse() {
+ for (const Archive::Symbol &Sym : File->symbols())
+ Symtab->addLazyArchive<ELFT>(Sym.getName(), *this, Sym);
+// Returns a buffer pointing to a member file containing a given symbol.
+InputFile *ArchiveFile::fetch(const Archive::Symbol &Sym) {
+ Archive::Child C =
+ CHECK(Sym.getMember(), toString(this) +
+ ": could not get the member for symbol " +
+ Sym.getName());
+ if (!Seen.insert(C.getChildOffset()).second)
+ return nullptr;
+ MemoryBufferRef MB =
+ CHECK(C.getMemoryBufferRef(),
+ toString(this) +
+ ": could not get the buffer for the member defining symbol " +
+ Sym.getName());
+ if (Tar && C.getParent()->isThin())
+ Tar->append(relativeToRoot(CHECK(C.getFullName(), this)), MB.getBuffer());
+ InputFile *File = createObjectFile(
+ MB, getName(), C.getParent()->isThin() ? 0 : C.getChildOffset());
+ File->GroupId = GroupId;
+ return File;
+template <class ELFT>
+SharedFile<ELFT>::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
+ : ELFFileBase<ELFT>(Base::SharedKind, M), SoName(DefaultSoName),
+ IsNeeded(!Config->AsNeeded) {}
+// Partially parse the shared object file so that we can call
+// getSoName on this object.
+template <class ELFT> void SharedFile<ELFT>::parseSoName() {
+ const Elf_Shdr *DynamicSec = nullptr;
+ const ELFFile<ELFT> Obj = this->getObj();
+ ArrayRef<Elf_Shdr> Sections = CHECK(Obj.sections(), this);
+ // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
+ for (const Elf_Shdr &Sec : Sections) {
+ switch (Sec.sh_type) {
+ default:
+ continue;
+ case SHT_DYNSYM:
+ this->initSymtab(Sections, &Sec);
+ break;
+ DynamicSec = &Sec;
+ break;
+ this->SymtabSHNDX = CHECK(Obj.getSHNDXTable(Sec, Sections), this);
+ break;
+ case SHT_GNU_versym:
+ this->VersymSec = &Sec;
+ break;
+ case SHT_GNU_verdef:
+ this->VerdefSec = &Sec;
+ break;
+ }
+ }
+ if (this->VersymSec && this->ELFSyms.empty())
+ error("SHT_GNU_versym should be associated with symbol table");
+ // Search for a DT_SONAME tag to initialize this->SoName.
+ if (!DynamicSec)
+ return;
+ ArrayRef<Elf_Dyn> Arr =
+ CHECK(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), this);
+ for (const Elf_Dyn &Dyn : Arr) {
+ if (Dyn.d_tag == DT_SONAME) {
+ uint64_t Val = Dyn.getVal();
+ if (Val >= this->StringTable.size())
+ fatal(toString(this) + ": invalid DT_SONAME entry");
+ SoName = this->StringTable.data() + Val;
+ return;
+ }
+ }
+// Parses ".gnu.version" section which is a parallel array for the symbol table.
+// If a given file doesn't have ".gnu.version" section, returns VER_NDX_GLOBAL.
+template <class ELFT> std::vector<uint32_t> SharedFile<ELFT>::parseVersyms() {
+ size_t Size = this->ELFSyms.size() - this->FirstGlobal;
+ if (!VersymSec)
+ return std::vector<uint32_t>(Size, VER_NDX_GLOBAL);
+ const char *Base = this->MB.getBuffer().data();
+ const Elf_Versym *Versym =
+ reinterpret_cast<const Elf_Versym *>(Base + VersymSec->sh_offset) +
+ this->FirstGlobal;
+ std::vector<uint32_t> Ret(Size);
+ for (size_t I = 0; I < Size; ++I)
+ Ret[I] = Versym[I].vs_index;
+ return Ret;
+// Parse the version definitions in the object file if present. Returns a vector
+// whose nth element contains a pointer to the Elf_Verdef for version identifier
+// n. Version identifiers that are not definitions map to nullptr.
+template <class ELFT>
+std::vector<const typename ELFT::Verdef *> SharedFile<ELFT>::parseVerdefs() {
+ if (!VerdefSec)
+ return {};
+ // We cannot determine the largest verdef identifier without inspecting
+ // every Elf_Verdef, but both bfd and gold assign verdef identifiers
+ // sequentially starting from 1, so we predict that the largest identifier
+ // will be VerdefCount.
+ unsigned VerdefCount = VerdefSec->sh_info;
+ std::vector<const Elf_Verdef *> Verdefs(VerdefCount + 1);
+ // Build the Verdefs array by following the chain of Elf_Verdef objects
+ // from the start of the .gnu.version_d section.
+ const char *Base = this->MB.getBuffer().data();
+ const char *Verdef = Base + VerdefSec->sh_offset;
+ for (unsigned I = 0; I != VerdefCount; ++I) {
+ auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef);
+ Verdef += CurVerdef->vd_next;
+ unsigned VerdefIndex = CurVerdef->vd_ndx;
+ Verdefs.resize(VerdefIndex + 1);
+ Verdefs[VerdefIndex] = CurVerdef;
+ }
+ return Verdefs;
+// We do not usually care about alignments of data in shared object
+// files because the loader takes care of it. However, if we promote a
+// DSO symbol to point to .bss due to copy relocation, we need to keep
+// the original alignment requirements. We infer it in this function.
+template <class ELFT>
+uint32_t SharedFile<ELFT>::getAlignment(ArrayRef<Elf_Shdr> Sections,
+ const Elf_Sym &Sym) {
+ uint64_t Ret = UINT64_MAX;
+ if (Sym.st_value)
+ Ret = 1ULL << countTrailingZeros((uint64_t)Sym.st_value);
+ if (0 < Sym.st_shndx && Sym.st_shndx < Sections.size())
+ Ret = std::min<uint64_t>(Ret, Sections[Sym.st_shndx].sh_addralign);
+ return (Ret > UINT32_MAX) ? 0 : Ret;
+// Fully parse the shared object file. This must be called after parseSoName().
+// This function parses symbol versions. If a DSO has version information,
+// the file has a ".gnu.version_d" section which contains symbol version
+// definitions. Each symbol is associated to one version through a table in
+// ".gnu.version" section. That table is a parallel array for the symbol
+// table, and each table entry contains an index in ".gnu.version_d".
+// The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
+// VER_NDX_GLOBAL. There's no table entry for these special versions in
+// ".gnu.version_d".
+// The file format for symbol versioning is perhaps a bit more complicated
+// than necessary, but you can easily understand the code if you wrap your
+// head around the data structure described above.
+template <class ELFT> void SharedFile<ELFT>::parseRest() {
+ Verdefs = parseVerdefs(); // parse .gnu.version_d
+ std::vector<uint32_t> Versyms = parseVersyms(); // parse .gnu.version
+ ArrayRef<Elf_Shdr> Sections = CHECK(this->getObj().sections(), this);
+ // System libraries can have a lot of symbols with versions. Using a
+ // fixed buffer for computing the versions name (foo@ver) can save a
+ // lot of allocations.
+ SmallString<0> VersionedNameBuffer;
+ // Add symbols to the symbol table.
+ ArrayRef<Elf_Sym> Syms = this->getGlobalELFSyms();
+ for (size_t I = 0; I < Syms.size(); ++I) {
+ const Elf_Sym &Sym = Syms[I];
+ // ELF spec requires that all local symbols precede weak or global
+ // symbols in each symbol table, and the index of first non-local symbol
+ // is stored to sh_info. If a local symbol appears after some non-local
+ // symbol, that's a violation of the spec.
+ StringRef Name = CHECK(Sym.getName(this->StringTable), this);
+ if (Sym.getBinding() == STB_LOCAL) {
+ warn("found local symbol '" + Name +
+ "' in global part of symbol table in file " + toString(this));
+ continue;
+ }
+ if (Sym.isUndefined()) {
+ Symbol *S = Symtab->addUndefined<ELFT>(Name, Sym.getBinding(),
+ Sym.st_other, Sym.getType(),
+ /*CanOmitFromDynSym=*/false, this);
+ S->ExportDynamic = true;
+ continue;
+ }
+ // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly
+ // assigns VER_NDX_LOCAL to this section global symbol. Here is a
+ // workaround for this bug.
+ uint32_t Idx = Versyms[I] & ~VERSYM_HIDDEN;
+ if (Config->EMachine == EM_MIPS && Idx == VER_NDX_LOCAL &&
+ Name == "_gp_disp")
+ continue;
+ uint64_t Alignment = getAlignment(Sections, Sym);
+ if (!(Versyms[I] & VERSYM_HIDDEN))
+ Symtab->addShared(Name, *this, Sym, Alignment, Idx);
+ // Also add the symbol with the versioned name to handle undefined symbols
+ // with explicit versions.
+ if (Idx == VER_NDX_GLOBAL)
+ continue;
+ if (Idx >= Verdefs.size() || Idx == VER_NDX_LOCAL) {
+ error("corrupt input file: version definition index " + Twine(Idx) +
+ " for symbol " + Name + " is out of bounds\n>>> defined in " +
+ toString(this));
+ continue;
+ }
+ StringRef VerName =
+ this->StringTable.data() + Verdefs[Idx]->getAux()->vda_name;
+ VersionedNameBuffer.clear();
+ Name = (Name + "@" + VerName).toStringRef(VersionedNameBuffer);
+ Symtab->addShared(Saver.save(Name), *this, Sym, Alignment, Idx);
+ }
+static ELFKind getBitcodeELFKind(const Triple &T) {
+ if (T.isLittleEndian())
+ return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
+ return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
+static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
+ switch (T.getArch()) {
+ case Triple::aarch64:
+ return EM_AARCH64;
+ case Triple::amdgcn:
+ case Triple::r600:
+ return EM_AMDGPU;
+ case Triple::arm:
+ case Triple::thumb:
+ return EM_ARM;
+ case Triple::avr:
+ return EM_AVR;
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
+ return EM_MIPS;
+ case Triple::msp430:
+ return EM_MSP430;
+ case Triple::ppc:
+ return EM_PPC;
+ case Triple::ppc64:
+ case Triple::ppc64le:
+ return EM_PPC64;
+ case Triple::x86:
+ return T.isOSIAMCU() ? EM_IAMCU : EM_386;
+ case Triple::x86_64:
+ return EM_X86_64;
+ default:
+ error(Path + ": could not infer e_machine from bitcode target triple " +
+ T.str());
+ return EM_NONE;
+ }
+BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName,
+ uint64_t OffsetInArchive)
+ : InputFile(BitcodeKind, MB) {
+ this->ArchiveName = ArchiveName;
+ std::string Path = MB.getBufferIdentifier().str();
+ if (Config->ThinLTOIndexOnly)
+ Path = replaceThinLTOSuffix(MB.getBufferIdentifier());
+ // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
+ // name. If two archives define two members with the same name, this
+ // causes a collision which result in only one of the objects being taken
+ // into consideration at LTO time (which very likely causes undefined
+ // symbols later in the link stage). So we append file offset to make
+ // filename unique.
+ MemoryBufferRef MBRef(
+ MB.getBuffer(),
+ Saver.save(ArchiveName + Path +
+ (ArchiveName.empty() ? "" : utostr(OffsetInArchive))));
+ Obj = CHECK(lto::InputFile::create(MBRef), this);
+ Triple T(Obj->getTargetTriple());
+ EKind = getBitcodeELFKind(T);
+ EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T);
+static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
+ switch (GvVisibility) {
+ case GlobalValue::DefaultVisibility:
+ return STV_DEFAULT;
+ case GlobalValue::HiddenVisibility:
+ return STV_HIDDEN;
+ case GlobalValue::ProtectedVisibility:
+ }
+ llvm_unreachable("unknown visibility");
+template <class ELFT>
+static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
+ const lto::InputFile::Symbol &ObjSym,
+ BitcodeFile &F) {
+ StringRef Name = Saver.save(ObjSym.getName());
+ uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL;
+ uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE;
+ uint8_t Visibility = mapVisibility(ObjSym.getVisibility());
+ bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable();
+ int C = ObjSym.getComdatIndex();
+ if (C != -1 && !KeptComdats[C])
+ return Symtab->addUndefined<ELFT>(Name, Binding, Visibility, Type,
+ CanOmitFromDynSym, &F);
+ if (ObjSym.isUndefined())
+ return Symtab->addUndefined<ELFT>(Name, Binding, Visibility, Type,
+ CanOmitFromDynSym, &F);
+ if (ObjSym.isCommon())
+ return Symtab->addCommon(Name, ObjSym.getCommonSize(),
+ ObjSym.getCommonAlignment(), Binding, Visibility,
+ return Symtab->addBitcode(Name, Binding, Visibility, Type, CanOmitFromDynSym,
+ F);
+template <class ELFT>
+void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) {
+ std::vector<bool> KeptComdats;
+ for (StringRef S : Obj->getComdatTable())
+ KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second);
+ for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
+ Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, *this));
+static ELFKind getELFKind(MemoryBufferRef MB) {
+ unsigned char Size;
+ unsigned char Endian;
+ std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
+ if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
+ fatal(MB.getBufferIdentifier() + ": invalid data encoding");
+ if (Size != ELFCLASS32 && Size != ELFCLASS64)
+ fatal(MB.getBufferIdentifier() + ": invalid file class");
+ size_t BufSize = MB.getBuffer().size();
+ if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) ||
+ (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr)))
+ fatal(MB.getBufferIdentifier() + ": file is too short");
+ if (Size == ELFCLASS32)
+ return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
+ return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
+void BinaryFile::parse() {
+ ArrayRef<uint8_t> Data = arrayRefFromStringRef(MB.getBuffer());
+ auto *Section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
+ 8, Data, ".data");
+ Sections.push_back(Section);
+ // For each input file foo that is embedded to a result as a binary
+ // blob, we define _binary_foo_{start,end,size} symbols, so that
+ // user programs can access blobs by name. Non-alphanumeric
+ // characters in a filename are replaced with underscore.
+ std::string S = "_binary_" + MB.getBufferIdentifier().str();
+ for (size_t I = 0; I < S.size(); ++I)
+ if (!isAlnum(S[I]))
+ S[I] = '_';
+ Symtab->addDefined(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT, 0, 0,
+ STB_GLOBAL, Section, nullptr);
+ Symtab->addDefined(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
+ Data.size(), 0, STB_GLOBAL, Section, nullptr);
+ Symtab->addDefined(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
+ Data.size(), 0, STB_GLOBAL, nullptr, nullptr);
+InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName,
+ uint64_t OffsetInArchive) {
+ if (isBitcode(MB))
+ return make<BitcodeFile>(MB, ArchiveName, OffsetInArchive);
+ switch (getELFKind(MB)) {
+ case ELF32LEKind:
+ return make<ObjFile<ELF32LE>>(MB, ArchiveName);
+ case ELF32BEKind:
+ return make<ObjFile<ELF32BE>>(MB, ArchiveName);
+ case ELF64LEKind:
+ return make<ObjFile<ELF64LE>>(MB, ArchiveName);
+ case ELF64BEKind:
+ return make<ObjFile<ELF64BE>>(MB, ArchiveName);
+ default:
+ llvm_unreachable("getELFKind");
+ }
+InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) {
+ switch (getELFKind(MB)) {
+ case ELF32LEKind:
+ return make<SharedFile<ELF32LE>>(MB, DefaultSoName);
+ case ELF32BEKind:
+ return make<SharedFile<ELF32BE>>(MB, DefaultSoName);
+ case ELF64LEKind:
+ return make<SharedFile<ELF64LE>>(MB, DefaultSoName);
+ case ELF64BEKind:
+ return make<SharedFile<ELF64BE>>(MB, DefaultSoName);
+ default:
+ llvm_unreachable("getELFKind");
+ }
+MemoryBufferRef LazyObjFile::getBuffer() {
+ if (AddedToLink)
+ return MemoryBufferRef();
+ AddedToLink = true;
+ return MB;
+InputFile *LazyObjFile::fetch() {
+ MemoryBufferRef MBRef = getBuffer();
+ if (MBRef.getBuffer().empty())
+ return nullptr;
+ InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
+ File->GroupId = GroupId;
+ return File;
+template <class ELFT> void LazyObjFile::parse() {
+ // A lazy object file wraps either a bitcode file or an ELF file.
+ if (isBitcode(this->MB)) {
+ std::unique_ptr<lto::InputFile> Obj =
+ CHECK(lto::InputFile::create(this->MB), this);
+ for (const lto::InputFile::Symbol &Sym : Obj->symbols())
+ if (!Sym.isUndefined())
+ Symtab->addLazyObject<ELFT>(Saver.save(Sym.getName()), *this);
+ return;
+ }
+ if (getELFKind(this->MB) != Config->EKind) {
+ error("incompatible file: " + this->MB.getBufferIdentifier());
+ return;
+ }
+ ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
+ ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
+ for (const typename ELFT::Shdr &Sec : Sections) {
+ if (Sec.sh_type != SHT_SYMTAB)
+ continue;
+ typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this);
+ uint32_t FirstGlobal = Sec.sh_info;
+ StringRef StringTable =
+ CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
+ for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal))
+ if (Sym.st_shndx != SHN_UNDEF)
+ Symtab->addLazyObject<ELFT>(CHECK(Sym.getName(StringTable), this),
+ *this);
+ return;
+ }
+std::string elf::replaceThinLTOSuffix(StringRef Path) {
+ StringRef Suffix = Config->ThinLTOObjectSuffixReplace.first;
+ StringRef Repl = Config->ThinLTOObjectSuffixReplace.second;
+ if (Path.consume_back(Suffix))
+ return (Path + Repl).str();
+ return Path;
+template void ArchiveFile::parse<ELF32LE>();
+template void ArchiveFile::parse<ELF32BE>();
+template void ArchiveFile::parse<ELF64LE>();
+template void ArchiveFile::parse<ELF64BE>();
+template void BitcodeFile::parse<ELF32LE>(DenseSet<CachedHashStringRef> &);
+template void BitcodeFile::parse<ELF32BE>(DenseSet<CachedHashStringRef> &);
+template void BitcodeFile::parse<ELF64LE>(DenseSet<CachedHashStringRef> &);
+template void BitcodeFile::parse<ELF64BE>(DenseSet<CachedHashStringRef> &);
+template void LazyObjFile::parse<ELF32LE>();
+template void LazyObjFile::parse<ELF32BE>();
+template void LazyObjFile::parse<ELF64LE>();
+template void LazyObjFile::parse<ELF64BE>();
+template class elf::ELFFileBase<ELF32LE>;
+template class elf::ELFFileBase<ELF32BE>;
+template class elf::ELFFileBase<ELF64LE>;
+template class elf::ELFFileBase<ELF64BE>;
+template class elf::ObjFile<ELF32LE>;
+template class elf::ObjFile<ELF32BE>;
+template class elf::ObjFile<ELF64LE>;
+template class elf::ObjFile<ELF64BE>;
+template class elf::SharedFile<ELF32LE>;
+template class elf::SharedFile<ELF32BE>;
+template class elf::SharedFile<ELF64LE>;
+template class elf::SharedFile<ELF64BE>;
diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.h b/contrib/llvm/tools/lld/ELF/InputFiles.h
new file mode 100644
index 000000000000..d7cbbc67a365
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/InputFiles.h
@@ -0,0 +1,382 @@
+//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Common/Reproduce.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/Threading.h"
+#include <map>
+namespace llvm {
+class TarWriter;
+struct DILineInfo;
+namespace lto {
+class InputFile;
+} // namespace llvm
+namespace lld {
+namespace elf {
+class InputFile;
+class InputSectionBase;
+// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
+std::string toString(const elf::InputFile *F);
+namespace elf {
+using llvm::object::Archive;
+class Symbol;
+// If -reproduce option is given, all input files are written
+// to this tar archive.
+extern std::unique_ptr<llvm::TarWriter> Tar;
+// Opens a given file.
+llvm::Optional<MemoryBufferRef> readFile(StringRef Path);
+// The root class of input files.
+class InputFile {
+ enum Kind {
+ ObjKind,
+ SharedKind,
+ LazyObjKind,
+ ArchiveKind,
+ BitcodeKind,
+ BinaryKind,
+ };
+ Kind kind() const { return FileKind; }
+ bool isElf() const {
+ Kind K = kind();
+ return K == ObjKind || K == SharedKind;
+ }
+ StringRef getName() const { return MB.getBufferIdentifier(); }
+ MemoryBufferRef MB;
+ // Returns sections. It is a runtime error to call this function
+ // on files that don't have the notion of sections.
+ ArrayRef<InputSectionBase *> getSections() const {
+ assert(FileKind == ObjKind || FileKind == BinaryKind);
+ return Sections;
+ }
+ // Returns object file symbols. It is a runtime error to call this
+ // function on files of other types.
+ ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); }
+ std::vector<Symbol *> &getMutableSymbols() {
+ assert(FileKind == BinaryKind || FileKind == ObjKind ||
+ FileKind == BitcodeKind);
+ return Symbols;
+ }
+ // Filename of .a which contained this file. If this file was
+ // not in an archive file, it is the empty string. We use this
+ // string for creating error messages.
+ std::string ArchiveName;
+ // If this is an architecture-specific file, the following members
+ // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
+ ELFKind EKind = ELFNoneKind;
+ uint16_t EMachine = llvm::ELF::EM_NONE;
+ uint8_t OSABI = 0;
+ // Cache for toString(). Only toString() should use this member.
+ mutable std::string ToStringCache;
+ std::string getSrcMsg(const Symbol &Sym, InputSectionBase &Sec,
+ uint64_t Offset);
+ // True if this is an argument for --just-symbols. Usually false.
+ bool JustSymbols = false;
+ // GroupId is used for --warn-backrefs which is an optional error
+ // checking feature. All files within the same --{start,end}-group or
+ // --{start,end}-lib get the same group ID. Otherwise, each file gets a new
+ // group ID. For more info, see checkDependency() in SymbolTable.cpp.
+ uint32_t GroupId;
+ static bool IsInGroup;
+ static uint32_t NextGroupId;
+ // Index of MIPS GOT built for this file.
+ llvm::Optional<size_t> MipsGotIndex;
+ InputFile(Kind K, MemoryBufferRef M);
+ std::vector<InputSectionBase *> Sections;
+ std::vector<Symbol *> Symbols;
+ const Kind FileKind;
+template <typename ELFT> class ELFFileBase : public InputFile {
+ typedef typename ELFT::Shdr Elf_Shdr;
+ typedef typename ELFT::Sym Elf_Sym;
+ typedef typename ELFT::Word Elf_Word;
+ typedef typename ELFT::SymRange Elf_Sym_Range;
+ ELFFileBase(Kind K, MemoryBufferRef M);
+ static bool classof(const InputFile *F) { return F->isElf(); }
+ llvm::object::ELFFile<ELFT> getObj() const {
+ return check(llvm::object::ELFFile<ELFT>::create(MB.getBuffer()));
+ }
+ StringRef getStringTable() const { return StringTable; }
+ uint32_t getSectionIndex(const Elf_Sym &Sym) const;
+ Elf_Sym_Range getGlobalELFSyms();
+ Elf_Sym_Range getELFSyms() const { return ELFSyms; }
+ ArrayRef<Elf_Sym> ELFSyms;
+ uint32_t FirstGlobal = 0;
+ ArrayRef<Elf_Word> SymtabSHNDX;
+ StringRef StringTable;
+ void initSymtab(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr *Symtab);
+// .o file.
+template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
+ typedef ELFFileBase<ELFT> Base;
+ typedef typename ELFT::Rel Elf_Rel;
+ typedef typename ELFT::Rela Elf_Rela;
+ typedef typename ELFT::Sym Elf_Sym;
+ typedef typename ELFT::Shdr Elf_Shdr;
+ typedef typename ELFT::Word Elf_Word;
+ typedef typename ELFT::CGProfile Elf_CGProfile;
+ StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
+ const Elf_Shdr &Sec);
+ static bool classof(const InputFile *F) { return F->kind() == Base::ObjKind; }
+ ArrayRef<Symbol *> getLocalSymbols();
+ ArrayRef<Symbol *> getGlobalSymbols();
+ ObjFile(MemoryBufferRef M, StringRef ArchiveName);
+ void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
+ Symbol &getSymbol(uint32_t SymbolIndex) const {
+ if (SymbolIndex >= this->Symbols.size())
+ fatal(toString(this) + ": invalid symbol index");
+ return *this->Symbols[SymbolIndex];
+ }
+ template <typename RelT> Symbol &getRelocTargetSym(const RelT &Rel) const {
+ uint32_t SymIndex = Rel.getSymbol(Config->IsMips64EL);
+ return getSymbol(SymIndex);
+ }
+ llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
+ llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef Name);
+ // MIPS GP0 value defined by this file. This value represents the gp value
+ // used to create the relocatable object and required to support
+ // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
+ uint32_t MipsGp0 = 0;
+ // Name of source file obtained from STT_FILE symbol value,
+ // or empty string if there is no such symbol in object file
+ // symbol table.
+ StringRef SourceFile;
+ // True if the file defines functions compiled with
+ // -fsplit-stack. Usually false.
+ bool SplitStack = false;
+ // True if the file defines functions compiled with -fsplit-stack,
+ // but had one or more functions with the no_split_stack attribute.
+ bool SomeNoSplitStack = false;
+ // Pointer to this input file's .llvm_addrsig section, if it has one.
+ const Elf_Shdr *AddrsigSec = nullptr;
+ ArrayRef<Elf_CGProfile> CGProfile;
+ void
+ initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
+ void initializeSymbols();
+ void initializeJustSymbols();
+ void initializeDwarf();
+ InputSectionBase *getRelocTarget(const Elf_Shdr &Sec);
+ InputSectionBase *createInputSection(const Elf_Shdr &Sec);
+ StringRef getSectionName(const Elf_Shdr &Sec);
+ bool shouldMerge(const Elf_Shdr &Sec);
+ Symbol *createSymbol(const Elf_Sym *Sym);
+ // .shstrtab contents.
+ StringRef SectionStringTable;
+ // Debugging information to retrieve source file and line for error
+ // reporting. Linker may find reasonable number of errors in a
+ // single object file, so we cache debugging information in order to
+ // parse it only once for each object file we link.
+ std::unique_ptr<llvm::DWARFContext> Dwarf;
+ std::vector<const llvm::DWARFDebugLine::LineTable *> LineTables;
+ struct VarLoc {
+ const llvm::DWARFDebugLine::LineTable *LT;
+ unsigned File;
+ unsigned Line;
+ };
+ llvm::DenseMap<StringRef, VarLoc> VariableLoc;
+ llvm::once_flag InitDwarfLine;
+// LazyObjFile is analogous to ArchiveFile in the sense that
+// the file contains lazy symbols. The difference is that
+// LazyObjFile wraps a single file instead of multiple files.
+// This class is used for --start-lib and --end-lib options which
+// instruct the linker to link object files between them with the
+// archive file semantics.
+class LazyObjFile : public InputFile {
+ LazyObjFile(MemoryBufferRef M, StringRef ArchiveName,
+ uint64_t OffsetInArchive)
+ : InputFile(LazyObjKind, M), OffsetInArchive(OffsetInArchive) {
+ this->ArchiveName = ArchiveName;
+ }
+ static bool classof(const InputFile *F) { return F->kind() == LazyObjKind; }
+ template <class ELFT> void parse();
+ MemoryBufferRef getBuffer();
+ InputFile *fetch();
+ bool AddedToLink = false;
+ uint64_t OffsetInArchive;
+// An ArchiveFile object represents a .a file.
+class ArchiveFile : public InputFile {
+ explicit ArchiveFile(std::unique_ptr<Archive> &&File);
+ static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
+ template <class ELFT> void parse();
+ // Pulls out an object file that contains a definition for Sym and
+ // returns it. If the same file was instantiated before, this
+ // function returns a nullptr (so we don't instantiate the same file
+ // more than once.)
+ InputFile *fetch(const Archive::Symbol &Sym);
+ std::unique_ptr<Archive> File;
+ llvm::DenseSet<uint64_t> Seen;
+class BitcodeFile : public InputFile {
+ BitcodeFile(MemoryBufferRef M, StringRef ArchiveName,
+ uint64_t OffsetInArchive);
+ static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
+ template <class ELFT>
+ void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
+ std::unique_ptr<llvm::lto::InputFile> Obj;
+// .so file.
+template <class ELFT> class SharedFile : public ELFFileBase<ELFT> {
+ typedef ELFFileBase<ELFT> Base;
+ typedef typename ELFT::Dyn Elf_Dyn;
+ typedef typename ELFT::Shdr Elf_Shdr;
+ typedef typename ELFT::Sym Elf_Sym;
+ typedef typename ELFT::SymRange Elf_Sym_Range;
+ typedef typename ELFT::Verdef Elf_Verdef;
+ typedef typename ELFT::Versym Elf_Versym;
+ const Elf_Shdr *VersymSec = nullptr;
+ const Elf_Shdr *VerdefSec = nullptr;
+ std::vector<const Elf_Verdef *> Verdefs;
+ std::string SoName;
+ static bool classof(const InputFile *F) {
+ return F->kind() == Base::SharedKind;
+ }
+ SharedFile(MemoryBufferRef M, StringRef DefaultSoName);
+ void parseSoName();
+ void parseRest();
+ uint32_t getAlignment(ArrayRef<Elf_Shdr> Sections, const Elf_Sym &Sym);
+ std::vector<const Elf_Verdef *> parseVerdefs();
+ std::vector<uint32_t> parseVersyms();
+ struct NeededVer {
+ // The string table offset of the version name in the output file.
+ size_t StrTab;
+ // The version identifier for this version name.
+ uint16_t Index;
+ };
+ // Mapping from Elf_Verdef data structures to information about Elf_Vernaux
+ // data structures in the output file.
+ std::map<const Elf_Verdef *, NeededVer> VerdefMap;
+ // Used for --as-needed
+ bool IsNeeded;
+class BinaryFile : public InputFile {
+ explicit BinaryFile(MemoryBufferRef M) : InputFile(BinaryKind, M) {}
+ static bool classof(const InputFile *F) { return F->kind() == BinaryKind; }
+ void parse();
+InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "",
+ uint64_t OffsetInArchive = 0);
+InputFile *createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName);
+inline bool isBitcode(MemoryBufferRef MB) {
+ return identify_magic(MB.getBuffer()) == llvm::file_magic::bitcode;
+std::string replaceThinLTOSuffix(StringRef Path);
+extern std::vector<BinaryFile *> BinaryFiles;
+extern std::vector<BitcodeFile *> BitcodeFiles;
+extern std::vector<LazyObjFile *> LazyObjFiles;
+extern std::vector<InputFile *> ObjectFiles;
+extern std::vector<InputFile *> SharedFiles;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/InputSection.cpp b/contrib/llvm/tools/lld/ELF/InputSection.cpp
new file mode 100644
index 000000000000..839bff7011eb
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/InputSection.cpp
@@ -0,0 +1,1283 @@
+//===- InputSection.cpp ---------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputSection.h"
+#include "Config.h"
+#include "EhFrame.h"
+#include "InputFiles.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "Relocations.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Thunks.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/xxhash.h"
+#include <algorithm>
+#include <mutex>
+#include <set>
+#include <vector>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace llvm::sys;
+using namespace lld;
+using namespace lld::elf;
+std::vector<InputSectionBase *> elf::InputSections;
+// Returns a string to construct an error message.
+std::string lld::toString(const InputSectionBase *Sec) {
+ return (toString(Sec->File) + ":(" + Sec->Name + ")").str();
+template <class ELFT>
+static ArrayRef<uint8_t> getSectionContents(ObjFile<ELFT> &File,
+ const typename ELFT::Shdr &Hdr) {
+ if (Hdr.sh_type == SHT_NOBITS)
+ return makeArrayRef<uint8_t>(nullptr, Hdr.sh_size);
+ return check(File.getObj().getSectionContents(&Hdr));
+InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
+ uint32_t Type, uint64_t Entsize,
+ uint32_t Link, uint32_t Info,
+ uint32_t Alignment, ArrayRef<uint8_t> Data,
+ StringRef Name, Kind SectionKind)
+ : SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info,
+ Link),
+ File(File), RawData(Data) {
+ // In order to reduce memory allocation, we assume that mergeable
+ // sections are smaller than 4 GiB, which is not an unreasonable
+ // assumption as of 2017.
+ if (SectionKind == SectionBase::Merge && RawData.size() > UINT32_MAX)
+ error(toString(this) + ": section too large");
+ NumRelocations = 0;
+ AreRelocsRela = false;
+ // The ELF spec states that a value of 0 means the section has
+ // no alignment constraits.
+ uint32_t V = std::max<uint64_t>(Alignment, 1);
+ if (!isPowerOf2_64(V))
+ fatal(toString(File) + ": section sh_addralign is not a power of 2");
+ this->Alignment = V;
+ // In ELF, each section can be compressed by zlib, and if compressed,
+ // section name may be mangled by appending "z" (e.g. ".zdebug_info").
+ // If that's the case, demangle section name so that we can handle a
+ // section as if it weren't compressed.
+ if ((Flags & SHF_COMPRESSED) || Name.startswith(".zdebug")) {
+ if (!zlib::isAvailable())
+ error(toString(File) + ": contains a compressed section, " +
+ "but zlib is not available");
+ parseCompressedHeader();
+ }
+// Drop SHF_GROUP bit unless we are producing a re-linkable object file.
+// SHF_GROUP is a marker that a section belongs to some comdat group.
+// That flag doesn't make sense in an executable.
+static uint64_t getFlags(uint64_t Flags) {
+ Flags &= ~(uint64_t)SHF_INFO_LINK;
+ if (!Config->Relocatable)
+ Flags &= ~(uint64_t)SHF_GROUP;
+ return Flags;
+// GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of
+// March 2017) fail to infer section types for sections starting with
+// ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of
+// SHF_INIT_ARRAY. As a result, the following assembler directive
+// creates ".init_array.100" with SHT_PROGBITS, for example.
+// .section .init_array.100, "aw"
+// This function forces SHT_{INIT,FINI}_ARRAY so that we can handle
+// incorrect inputs as if they were correct from the beginning.
+static uint64_t getType(uint64_t Type, StringRef Name) {
+ if (Type == SHT_PROGBITS && Name.startswith(".init_array."))
+ return SHT_INIT_ARRAY;
+ if (Type == SHT_PROGBITS && Name.startswith(".fini_array."))
+ return SHT_FINI_ARRAY;
+ return Type;
+template <class ELFT>
+InputSectionBase::InputSectionBase(ObjFile<ELFT> &File,
+ const typename ELFT::Shdr &Hdr,
+ StringRef Name, Kind SectionKind)
+ : InputSectionBase(&File, getFlags(Hdr.sh_flags),
+ getType(Hdr.sh_type, Name), Hdr.sh_entsize, Hdr.sh_link,
+ Hdr.sh_info, Hdr.sh_addralign,
+ getSectionContents(File, Hdr), Name, SectionKind) {
+ // We reject object files having insanely large alignments even though
+ // they are allowed by the spec. I think 4GB is a reasonable limitation.
+ // We might want to relax this in the future.
+ if (Hdr.sh_addralign > UINT32_MAX)
+ fatal(toString(&File) + ": section sh_addralign is too large");
+size_t InputSectionBase::getSize() const {
+ if (auto *S = dyn_cast<SyntheticSection>(this))
+ return S->getSize();
+ if (UncompressedSize >= 0)
+ return UncompressedSize;
+ return RawData.size();
+void InputSectionBase::uncompress() const {
+ size_t Size = UncompressedSize;
+ UncompressedBuf.reset(new char[Size]);
+ if (Error E =
+ zlib::uncompress(toStringRef(RawData), UncompressedBuf.get(), Size))
+ fatal(toString(this) +
+ ": uncompress failed: " + llvm::toString(std::move(E)));
+ RawData = makeArrayRef((uint8_t *)UncompressedBuf.get(), Size);
+uint64_t InputSectionBase::getOffsetInFile() const {
+ const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart();
+ const uint8_t *SecStart = data().begin();
+ return SecStart - FileStart;
+uint64_t SectionBase::getOffset(uint64_t Offset) const {
+ switch (kind()) {
+ case Output: {
+ auto *OS = cast<OutputSection>(this);
+ // For output sections we treat offset -1 as the end of the section.
+ return Offset == uint64_t(-1) ? OS->Size : Offset;
+ }
+ case Regular:
+ case Synthetic:
+ return cast<InputSection>(this)->getOffset(Offset);
+ case EHFrame:
+ // The file crtbeginT.o has relocations pointing to the start of an empty
+ // .eh_frame that is known to be the first in the link. It does that to
+ // identify the start of the output .eh_frame.
+ return Offset;
+ case Merge:
+ const MergeInputSection *MS = cast<MergeInputSection>(this);
+ if (InputSection *IS = MS->getParent())
+ return IS->getOffset(MS->getParentOffset(Offset));
+ return MS->getParentOffset(Offset);
+ }
+ llvm_unreachable("invalid section kind");
+uint64_t SectionBase::getVA(uint64_t Offset) const {
+ const OutputSection *Out = getOutputSection();
+ return (Out ? Out->Addr : 0) + getOffset(Offset);
+OutputSection *SectionBase::getOutputSection() {
+ InputSection *Sec;
+ if (auto *IS = dyn_cast<InputSection>(this))
+ Sec = IS;
+ else if (auto *MS = dyn_cast<MergeInputSection>(this))
+ Sec = MS->getParent();
+ else if (auto *EH = dyn_cast<EhInputSection>(this))
+ Sec = EH->getParent();
+ else
+ return cast<OutputSection>(this);
+ return Sec ? Sec->getParent() : nullptr;
+// When a section is compressed, `RawData` consists with a header followed
+// by zlib-compressed data. This function parses a header to initialize
+// `UncompressedSize` member and remove the header from `RawData`.
+void InputSectionBase::parseCompressedHeader() {
+ typedef typename ELF64LE::Chdr Chdr64;
+ typedef typename ELF32LE::Chdr Chdr32;
+ // Old-style header
+ if (Name.startswith(".zdebug")) {
+ if (!toStringRef(RawData).startswith("ZLIB")) {
+ error(toString(this) + ": corrupted compressed section header");
+ return;
+ }
+ RawData = RawData.slice(4);
+ if (RawData.size() < 8) {
+ error(toString(this) + ": corrupted compressed section header");
+ return;
+ }
+ UncompressedSize = read64be(RawData.data());
+ RawData = RawData.slice(8);
+ // Restore the original section name.
+ // (e.g. ".zdebug_info" -> ".debug_info")
+ Name = Saver.save("." + Name.substr(2));
+ return;
+ }
+ assert(Flags & SHF_COMPRESSED);
+ Flags &= ~(uint64_t)SHF_COMPRESSED;
+ // New-style 64-bit header
+ if (Config->Is64) {
+ if (RawData.size() < sizeof(Chdr64)) {
+ error(toString(this) + ": corrupted compressed section");
+ return;
+ }
+ auto *Hdr = reinterpret_cast<const Chdr64 *>(RawData.data());
+ if (Hdr->ch_type != ELFCOMPRESS_ZLIB) {
+ error(toString(this) + ": unsupported compression type");
+ return;
+ }
+ UncompressedSize = Hdr->ch_size;
+ RawData = RawData.slice(sizeof(*Hdr));
+ return;
+ }
+ // New-style 32-bit header
+ if (RawData.size() < sizeof(Chdr32)) {
+ error(toString(this) + ": corrupted compressed section");
+ return;
+ }
+ auto *Hdr = reinterpret_cast<const Chdr32 *>(RawData.data());
+ if (Hdr->ch_type != ELFCOMPRESS_ZLIB) {
+ error(toString(this) + ": unsupported compression type");
+ return;
+ }
+ UncompressedSize = Hdr->ch_size;
+ RawData = RawData.slice(sizeof(*Hdr));
+InputSection *InputSectionBase::getLinkOrderDep() const {
+ assert(Link);
+ assert(Flags & SHF_LINK_ORDER);
+ return cast<InputSection>(File->getSections()[Link]);
+// Find a function symbol that encloses a given location.
+template <class ELFT>
+Defined *InputSectionBase::getEnclosingFunction(uint64_t Offset) {
+ for (Symbol *B : File->getSymbols())
+ if (Defined *D = dyn_cast<Defined>(B))
+ if (D->Section == this && D->Type == STT_FUNC && D->Value <= Offset &&
+ Offset < D->Value + D->Size)
+ return D;
+ return nullptr;
+// Returns a source location string. Used to construct an error message.
+template <class ELFT>
+std::string InputSectionBase::getLocation(uint64_t Offset) {
+ std::string SecAndOffset = (Name + "+0x" + utohexstr(Offset)).str();
+ // We don't have file for synthetic sections.
+ if (getFile<ELFT>() == nullptr)
+ return (Config->OutputFile + ":(" + SecAndOffset + ")")
+ .str();
+ // First check if we can get desired values from debugging information.
+ if (Optional<DILineInfo> Info = getFile<ELFT>()->getDILineInfo(this, Offset))
+ return Info->FileName + ":" + std::to_string(Info->Line) + ":(" +
+ SecAndOffset + ")";
+ // File->SourceFile contains STT_FILE symbol that contains a
+ // source file name. If it's missing, we use an object file name.
+ std::string SrcFile = getFile<ELFT>()->SourceFile;
+ if (SrcFile.empty())
+ SrcFile = toString(File);
+ if (Defined *D = getEnclosingFunction<ELFT>(Offset))
+ return SrcFile + ":(function " + toString(*D) + ": " + SecAndOffset + ")";
+ // If there's no symbol, print out the offset in the section.
+ return (SrcFile + ":(" + SecAndOffset + ")");
+// This function is intended to be used for constructing an error message.
+// The returned message looks like this:
+// foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
+// Returns an empty string if there's no way to get line info.
+std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
+ return File->getSrcMsg(Sym, *this, Offset);
+// Returns a filename string along with an optional section name. This
+// function is intended to be used for constructing an error
+// message. The returned message looks like this:
+// path/to/foo.o:(function bar)
+// or
+// path/to/foo.o:(function bar) in archive path/to/bar.a
+std::string InputSectionBase::getObjMsg(uint64_t Off) {
+ std::string Filename = File->getName();
+ std::string Archive;
+ if (!File->ArchiveName.empty())
+ Archive = " in archive " + File->ArchiveName;
+ // Find a symbol that encloses a given location.
+ for (Symbol *B : File->getSymbols())
+ if (auto *D = dyn_cast<Defined>(B))
+ if (D->Section == this && D->Value <= Off && Off < D->Value + D->Size)
+ return Filename + ":(" + toString(*D) + ")" + Archive;
+ // If there's no symbol, print out the offset in the section.
+ return (Filename + ":(" + Name + "+0x" + utohexstr(Off) + ")" + Archive)
+ .str();
+InputSection InputSection::Discarded(nullptr, 0, 0, 0, ArrayRef<uint8_t>(), "");
+InputSection::InputSection(InputFile *F, uint64_t Flags, uint32_t Type,
+ uint32_t Alignment, ArrayRef<uint8_t> Data,
+ StringRef Name, Kind K)
+ : InputSectionBase(F, Flags, Type,
+ /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Alignment, Data,
+ Name, K) {}
+template <class ELFT>
+InputSection::InputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header,
+ StringRef Name)
+ : InputSectionBase(F, Header, Name, InputSectionBase::Regular) {}
+bool InputSection::classof(const SectionBase *S) {
+ return S->kind() == SectionBase::Regular ||
+ S->kind() == SectionBase::Synthetic;
+OutputSection *InputSection::getParent() const {
+ return cast_or_null<OutputSection>(Parent);
+// Copy SHT_GROUP section contents. Used only for the -r option.
+template <class ELFT> void InputSection::copyShtGroup(uint8_t *Buf) {
+ // ELFT::Word is the 32-bit integral type in the target endianness.
+ typedef typename ELFT::Word u32;
+ ArrayRef<u32> From = getDataAs<u32>();
+ auto *To = reinterpret_cast<u32 *>(Buf);
+ // The first entry is not a section number but a flag.
+ *To++ = From[0];
+ // Adjust section numbers because section numbers in an input object
+ // files are different in the output.
+ ArrayRef<InputSectionBase *> Sections = File->getSections();
+ for (uint32_t Idx : From.slice(1))
+ *To++ = Sections[Idx]->getOutputSection()->SectionIndex;
+InputSectionBase *InputSection::getRelocatedSection() const {
+ if (!File || (Type != SHT_RELA && Type != SHT_REL))
+ return nullptr;
+ ArrayRef<InputSectionBase *> Sections = File->getSections();
+ return Sections[Info];
+// This is used for -r and --emit-relocs. We can't use memcpy to copy
+// relocations because we need to update symbol table offset and section index
+// for each relocation. So we copy relocations one by one.
+template <class ELFT, class RelTy>
+void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
+ InputSectionBase *Sec = getRelocatedSection();
+ for (const RelTy &Rel : Rels) {
+ RelType Type = Rel.getType(Config->IsMips64EL);
+ Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
+ auto *P = reinterpret_cast<typename ELFT::Rela *>(Buf);
+ Buf += sizeof(RelTy);
+ if (RelTy::IsRela)
+ P->r_addend = getAddend<ELFT>(Rel);
+ // Output section VA is zero for -r, so r_offset is an offset within the
+ // section, but for --emit-relocs it is an virtual address.
+ P->r_offset = Sec->getVA(Rel.r_offset);
+ P->setSymbolAndType(In.SymTab->getSymbolIndex(&Sym), Type,
+ Config->IsMips64EL);
+ if (Sym.Type == STT_SECTION) {
+ // We combine multiple section symbols into only one per
+ // section. This means we have to update the addend. That is
+ // trivial for Elf_Rela, but for Elf_Rel we have to write to the
+ // section data. We do that by adding to the Relocation vector.
+ // .eh_frame is horribly special and can reference discarded sections. To
+ // avoid having to parse and recreate .eh_frame, we just replace any
+ // relocation in it pointing to discarded sections with R_*_NONE, which
+ // hopefully creates a frame that is ignored at runtime.
+ auto *D = dyn_cast<Defined>(&Sym);
+ if (!D) {
+ error("STT_SECTION symbol should be defined");
+ continue;
+ }
+ SectionBase *Section = D->Section->Repl;
+ if (!Section->Live) {
+ P->setSymbolAndType(0, 0, false);
+ continue;
+ }
+ int64_t Addend = getAddend<ELFT>(Rel);
+ const uint8_t *BufLoc = Sec->data().begin() + Rel.r_offset;
+ if (!RelTy::IsRela)
+ Addend = Target->getImplicitAddend(BufLoc, Type);
+ if (Config->EMachine == EM_MIPS && Config->Relocatable &&
+ Target->getRelExpr(Type, Sym, BufLoc) == R_MIPS_GOTREL) {
+ // Some MIPS relocations depend on "gp" value. By default,
+ // this value has 0x7ff0 offset from a .got section. But
+ // relocatable files produced by a complier or a linker
+ // might redefine this default value and we must use it
+ // for a calculation of the relocation result. When we
+ // generate EXE or DSO it's trivial. Generating a relocatable
+ // output is more difficult case because the linker does
+ // not calculate relocations in this mode and loses
+ // individual "gp" values used by each input object file.
+ // As a workaround we add the "gp" value to the relocation
+ // addend and save it back to the file.
+ Addend += Sec->getFile<ELFT>()->MipsGp0;
+ }
+ if (RelTy::IsRela)
+ P->r_addend = Sym.getVA(Addend) - Section->getOutputSection()->Addr;
+ else if (Config->Relocatable)
+ Sec->Relocations.push_back({R_ABS, Type, Rel.r_offset, Addend, &Sym});
+ }
+ }
+// The ARM and AArch64 ABI handle pc-relative relocations to undefined weak
+// references specially. The general rule is that the value of the symbol in
+// this context is the address of the place P. A further special case is that
+// branch relocations to an undefined weak reference resolve to the next
+// instruction.
+static uint32_t getARMUndefinedRelativeWeakVA(RelType Type, uint32_t A,
+ uint32_t P) {
+ switch (Type) {
+ // Unresolved branch relocations to weak references resolve to next
+ // instruction, this will be either 2 or 4 bytes on from P.
+ case R_ARM_THM_JUMP11:
+ return P + 2 + A;
+ case R_ARM_CALL:
+ case R_ARM_JUMP24:
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ case R_ARM_PREL31:
+ case R_ARM_THM_JUMP19:
+ case R_ARM_THM_JUMP24:
+ return P + 4 + A;
+ case R_ARM_THM_CALL:
+ // We don't want an interworking BLX to ARM
+ return P + 5 + A;
+ // Unresolved non branch pc-relative relocations
+ // R_ARM_TARGET2 which can be resolved relatively is not present as it never
+ // targets a weak-reference.
+ case R_ARM_REL32:
+ return P + A;
+ }
+ llvm_unreachable("ARM pc-relative relocation expected\n");
+// The comment above getARMUndefinedRelativeWeakVA applies to this function.
+static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A,
+ uint64_t P) {
+ switch (Type) {
+ // Unresolved branch relocations to weak references resolve to next
+ // instruction, this is 4 bytes on from P.
+ case R_AARCH64_CALL26:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_TSTBR14:
+ return P + 4 + A;
+ // Unresolved non branch pc-relative relocations
+ case R_AARCH64_PREL16:
+ case R_AARCH64_PREL32:
+ case R_AARCH64_PREL64:
+ case R_AARCH64_ADR_PREL_LO21:
+ case R_AARCH64_LD_PREL_LO19:
+ return P + A;
+ }
+ llvm_unreachable("AArch64 pc-relative relocation expected\n");
+// ARM SBREL relocations are of the form S + A - B where B is the static base
+// The ARM ABI defines base to be "addressing origin of the output segment
+// defining the symbol S". We defined the "addressing origin"/static base to be
+// the base of the PT_LOAD segment containing the Sym.
+// The procedure call standard only defines a Read Write Position Independent
+// RWPI variant so in practice we should expect the static base to be the base
+// of the RW segment.
+static uint64_t getARMStaticBase(const Symbol &Sym) {
+ OutputSection *OS = Sym.getOutputSection();
+ if (!OS || !OS->PtLoad || !OS->PtLoad->FirstSec)
+ fatal("SBREL relocation to " + Sym.getName() + " without static base");
+ return OS->PtLoad->FirstSec->Addr;
+// For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually
+// points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA
+// is calculated using PCREL_HI20's symbol.
+// This function returns the R_RISCV_PCREL_HI20 relocation from
+// R_RISCV_PCREL_LO12's symbol and addend.
+static Relocation *getRISCVPCRelHi20(const Symbol *Sym, uint64_t Addend) {
+ const Defined *D = cast<Defined>(Sym);
+ InputSection *IS = cast<InputSection>(D->Section);
+ if (Addend != 0)
+ warn("Non-zero addend in R_RISCV_PCREL_LO12 relocation to " +
+ IS->getObjMsg(D->Value) + " is ignored");
+ // Relocations are sorted by offset, so we can use std::equal_range to do
+ // binary search.
+ auto Range = std::equal_range(IS->Relocations.begin(), IS->Relocations.end(),
+ D->Value, RelocationOffsetComparator{});
+ for (auto It = std::get<0>(Range); It != std::get<1>(Range); ++It)
+ if (isRelExprOneOf<R_PC>(It->Expr))
+ return &*It;
+ error("R_RISCV_PCREL_LO12 relocation points to " + IS->getObjMsg(D->Value) +
+ " without an associated R_RISCV_PCREL_HI20 relocation");
+ return nullptr;
+// A TLS symbol's virtual address is relative to the TLS segment. Add a
+// target-specific adjustment to produce a thread-pointer-relative offset.
+static int64_t getTlsTpOffset() {
+ switch (Config->EMachine) {
+ case EM_ARM:
+ case EM_AARCH64:
+ // Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
+ // followed by a variable amount of alignment padding, followed by the TLS
+ // segment.
+ //
+ // NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
+ // effectively increases the TCB size to 8 words for Android compatibility.
+ // It accomplishes this by increasing the segment's alignment.
+ return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
+ case EM_386:
+ case EM_X86_64:
+ // Variant 2. The TLS segment is located just before the thread pointer.
+ return -Out::TlsPhdr->p_memsz;
+ case EM_PPC64:
+ // The thread pointer points to a fixed offset from the start of the
+ // executable's TLS segment. An offset of 0x7000 allows a signed 16-bit
+ // offset to reach 0x1000 of TCB/thread-library data and 0xf000 of the
+ // program's TLS segment.
+ return -0x7000;
+ default:
+ llvm_unreachable("unhandled Config->EMachine");
+ }
+static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
+ uint64_t P, const Symbol &Sym, RelExpr Expr) {
+ switch (Expr) {
+ case R_INVALID:
+ return 0;
+ case R_ABS:
+ return Sym.getVA(A);
+ case R_ADDEND:
+ return A;
+ case R_ARM_SBREL:
+ return Sym.getVA(A) - getARMStaticBase(Sym);
+ case R_GOT:
+ case R_GOT_PLT:
+ return Sym.getGotVA() + A;
+ case R_GOTONLY_PC:
+ return In.Got->getVA() + A - P;
+ return In.Got->getVA() + A - P + In.Got->getSize();
+ case R_GOTREL:
+ return Sym.getVA(A) - In.Got->getVA();
+ return Sym.getVA(A) - In.Got->getVA() - In.Got->getSize();
+ case R_GOT_FROM_END:
+ return Sym.getGotOffset() + A - In.Got->getSize();
+ case R_GOT_OFF:
+ return Sym.getGotOffset() + A;
+ return getAArch64Page(Sym.getGotVA() + A) - getAArch64Page(P);
+ case R_GOT_PC:
+ return Sym.getGotVA() + A - P;
+ return Sym.getGotVA() - In.GotPlt->getVA();
+ return Sym.getVA(A) - In.MipsGot->getGp(File);
+ case R_MIPS_GOT_GP:
+ return In.MipsGot->getGp(File) + A;
+ case R_MIPS_GOT_GP_PC: {
+ // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target
+ // is _gp_disp symbol. In that case we should use the following
+ // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at
+ // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+ // microMIPS variants of these relocations use slightly different
+ // expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi()
+ // to correctly handle less-sugnificant bit of the microMIPS symbol.
+ uint64_t V = In.MipsGot->getGp(File) + A - P;
+ if (Type == R_MIPS_LO16 || Type == R_MICROMIPS_LO16)
+ V += 4;
+ if (Type == R_MICROMIPS_LO16 || Type == R_MICROMIPS_HI16)
+ V -= 1;
+ return V;
+ }
+ // If relocation against MIPS local symbol requires GOT entry, this entry
+ // should be initialized by 'page address'. This address is high 16-bits
+ // of sum the symbol's value and the addend.
+ return In.MipsGot->getVA() + In.MipsGot->getPageEntryOffset(File, Sym, A) -
+ In.MipsGot->getGp(File);
+ case R_MIPS_GOT_OFF:
+ case R_MIPS_GOT_OFF32:
+ // In case of MIPS if a GOT relocation has non-zero addend this addend
+ // should be applied to the GOT entry content not to the GOT entry offset.
+ // That is why we use separate expression type.
+ return In.MipsGot->getVA() + In.MipsGot->getSymEntryOffset(File, Sym, A) -
+ In.MipsGot->getGp(File);
+ case R_MIPS_TLSGD:
+ return In.MipsGot->getVA() + In.MipsGot->getGlobalDynOffset(File, Sym) -
+ In.MipsGot->getGp(File);
+ case R_MIPS_TLSLD:
+ return In.MipsGot->getVA() + In.MipsGot->getTlsIndexOffset(File) -
+ In.MipsGot->getGp(File);
+ case R_AARCH64_PAGE_PC: {
+ uint64_t Val = Sym.isUndefWeak() ? P + A : Sym.getVA(A);
+ return getAArch64Page(Val) - getAArch64Page(P);
+ }
+ case R_AARCH64_PLT_PAGE_PC: {
+ uint64_t Val = Sym.isUndefWeak() ? P + A : Sym.getPltVA() + A;
+ return getAArch64Page(Val) - getAArch64Page(P);
+ }
+ if (const Relocation *HiRel = getRISCVPCRelHi20(&Sym, A))
+ return getRelocTargetVA(File, HiRel->Type, HiRel->Addend, Sym.getVA(),
+ *HiRel->Sym, HiRel->Expr);
+ return 0;
+ }
+ case R_PC: {
+ uint64_t Dest;
+ if (Sym.isUndefWeak()) {
+ // On ARM and AArch64 a branch to an undefined weak resolves to the
+ // next instruction, otherwise the place.
+ if (Config->EMachine == EM_ARM)
+ Dest = getARMUndefinedRelativeWeakVA(Type, A, P);
+ else if (Config->EMachine == EM_AARCH64)
+ Dest = getAArch64UndefinedRelativeWeakVA(Type, A, P);
+ else
+ Dest = Sym.getVA(A);
+ } else {
+ Dest = Sym.getVA(A);
+ }
+ return Dest - P;
+ }
+ case R_PLT:
+ return Sym.getPltVA() + A;
+ case R_PLT_PC:
+ case R_PPC_CALL_PLT:
+ return Sym.getPltVA() + A - P;
+ case R_PPC_CALL: {
+ uint64_t SymVA = Sym.getVA(A);
+ // If we have an undefined weak symbol, we might get here with a symbol
+ // address of zero. That could overflow, but the code must be unreachable,
+ // so don't bother doing anything at all.
+ if (!SymVA)
+ return 0;
+ // PPC64 V2 ABI describes two entry points to a function. The global entry
+ // point is used for calls where the caller and callee (may) have different
+ // TOC base pointers and r2 needs to be modified to hold the TOC base for
+ // the callee. For local calls the caller and callee share the same
+ // TOC base and so the TOC pointer initialization code should be skipped by
+ // branching to the local entry point.
+ return SymVA - P + getPPC64GlobalEntryToLocalEntryOffset(Sym.StOther);
+ }
+ case R_PPC_TOC:
+ return getPPC64TocBase() + A;
+ case R_RELAX_GOT_PC:
+ return Sym.getVA(A) - P;
+ case R_TLS:
+ // A weak undefined TLS symbol resolves to the base of the TLS
+ // block, i.e. gets a value of zero. If we pass --gc-sections to
+ // lld and .tbss is not referenced, it gets reclaimed and we don't
+ // create a TLS program header. Therefore, we resolve this
+ // statically to zero.
+ if (Sym.isTls() && Sym.isUndefWeak())
+ return 0;
+ return Sym.getVA(A) + getTlsTpOffset();
+ case R_NEG_TLS:
+ return Out::TlsPhdr->p_memsz - Sym.getVA(A);
+ case R_SIZE:
+ return Sym.getSize() + A;
+ case R_TLSDESC:
+ return In.Got->getGlobalDynAddr(Sym) + A;
+ return getAArch64Page(In.Got->getGlobalDynAddr(Sym) + A) -
+ getAArch64Page(P);
+ case R_TLSGD_GOT:
+ return In.Got->getGlobalDynOffset(Sym) + A;
+ return In.Got->getGlobalDynOffset(Sym) + A - In.Got->getSize();
+ case R_TLSGD_PC:
+ return In.Got->getGlobalDynAddr(Sym) + A - P;
+ return In.Got->getTlsIndexOff() + A - In.Got->getSize();
+ case R_TLSLD_GOT:
+ return In.Got->getTlsIndexOff() + A;
+ case R_TLSLD_PC:
+ return In.Got->getTlsIndexVA() + A - P;
+ default:
+ llvm_unreachable("invalid expression");
+ }
+// This function applies relocations to sections without SHF_ALLOC bit.
+// Such sections are never mapped to memory at runtime. Debug sections are
+// an example. Relocations in non-alloc sections are much easier to
+// handle than in allocated sections because it will never need complex
+// treatement such as GOT or PLT (because at runtime no one refers them).
+// So, we handle relocations for non-alloc sections directly in this
+// function as a performance optimization.
+template <class ELFT, class RelTy>
+void InputSection::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) {
+ const unsigned Bits = sizeof(typename ELFT::uint) * 8;
+ for (const RelTy &Rel : Rels) {
+ RelType Type = Rel.getType(Config->IsMips64EL);
+ // GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations
+ // against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed
+ // in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we
+ // need to keep this bug-compatible code for a while.
+ if (Config->EMachine == EM_386 && Type == R_386_GOTPC)
+ continue;
+ uint64_t Offset = getOffset(Rel.r_offset);
+ uint8_t *BufLoc = Buf + Offset;
+ int64_t Addend = getAddend<ELFT>(Rel);
+ if (!RelTy::IsRela)
+ Addend += Target->getImplicitAddend(BufLoc, Type);
+ Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
+ RelExpr Expr = Target->getRelExpr(Type, Sym, BufLoc);
+ if (Expr == R_NONE)
+ continue;
+ if (Expr != R_ABS) {
+ std::string Msg = getLocation<ELFT>(Offset) +
+ ": has non-ABS relocation " + toString(Type) +
+ " against symbol '" + toString(Sym) + "'";
+ if (Expr != R_PC) {
+ error(Msg);
+ return;
+ }
+ // If the control reaches here, we found a PC-relative relocation in a
+ // non-ALLOC section. Since non-ALLOC section is not loaded into memory
+ // at runtime, the notion of PC-relative doesn't make sense here. So,
+ // this is a usage error. However, GNU linkers historically accept such
+ // relocations without any errors and relocate them as if they were at
+ // address 0. For bug-compatibilty, we accept them with warnings. We
+ // know Steel Bank Common Lisp as of 2018 have this bug.
+ warn(Msg);
+ Target->relocateOne(BufLoc, Type,
+ SignExtend64<Bits>(Sym.getVA(Addend - Offset)));
+ continue;
+ }
+ if (Sym.isTls() && !Out::TlsPhdr)
+ Target->relocateOne(BufLoc, Type, 0);
+ else
+ Target->relocateOne(BufLoc, Type, SignExtend64<Bits>(Sym.getVA(Addend)));
+ }
+// This is used when '-r' is given.
+// For REL targets, InputSection::copyRelocations() may store artificial
+// relocations aimed to update addends. They are handled in relocateAlloc()
+// for allocatable sections, and this function does the same for
+// non-allocatable sections, such as sections with debug information.
+static void relocateNonAllocForRelocatable(InputSection *Sec, uint8_t *Buf) {
+ const unsigned Bits = Config->Is64 ? 64 : 32;
+ for (const Relocation &Rel : Sec->Relocations) {
+ // InputSection::copyRelocations() adds only R_ABS relocations.
+ assert(Rel.Expr == R_ABS);
+ uint8_t *BufLoc = Buf + Rel.Offset + Sec->OutSecOff;
+ uint64_t TargetVA = SignExtend64(Rel.Sym->getVA(Rel.Addend), Bits);
+ Target->relocateOne(BufLoc, Rel.Type, TargetVA);
+ }
+template <class ELFT>
+void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) {
+ if (Flags & SHF_EXECINSTR)
+ adjustSplitStackFunctionPrologues<ELFT>(Buf, BufEnd);
+ if (Flags & SHF_ALLOC) {
+ relocateAlloc(Buf, BufEnd);
+ return;
+ }
+ auto *Sec = cast<InputSection>(this);
+ if (Config->Relocatable)
+ relocateNonAllocForRelocatable(Sec, Buf);
+ else if (Sec->AreRelocsRela)
+ Sec->relocateNonAlloc<ELFT>(Buf, Sec->template relas<ELFT>());
+ else
+ Sec->relocateNonAlloc<ELFT>(Buf, Sec->template rels<ELFT>());
+void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
+ assert(Flags & SHF_ALLOC);
+ const unsigned Bits = Config->Wordsize * 8;
+ for (const Relocation &Rel : Relocations) {
+ uint64_t Offset = Rel.Offset;
+ if (auto *Sec = dyn_cast<InputSection>(this))
+ Offset += Sec->OutSecOff;
+ uint8_t *BufLoc = Buf + Offset;
+ RelType Type = Rel.Type;
+ uint64_t AddrLoc = getOutputSection()->Addr + Offset;
+ RelExpr Expr = Rel.Expr;
+ uint64_t TargetVA = SignExtend64(
+ getRelocTargetVA(File, Type, Rel.Addend, AddrLoc, *Rel.Sym, Expr),
+ Bits);
+ switch (Expr) {
+ case R_RELAX_GOT_PC:
+ Target->relaxGot(BufLoc, TargetVA);
+ break;
+ Target->relaxTlsIeToLe(BufLoc, Type, TargetVA);
+ break;
+ Target->relaxTlsLdToLe(BufLoc, Type, TargetVA);
+ break;
+ Target->relaxTlsGdToLe(BufLoc, Type, TargetVA);
+ break;
+ Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
+ break;
+ case R_PPC_CALL:
+ // If this is a call to __tls_get_addr, it may be part of a TLS
+ // sequence that has been relaxed and turned into a nop. In this
+ // case, we don't want to handle it as a call.
+ if (read32(BufLoc) == 0x60000000) // nop
+ break;
+ // Patch a nop (0x60000000) to a ld.
+ if (Rel.Sym->NeedsTocRestore) {
+ if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
+ error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
+ break;
+ }
+ write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
+ }
+ Target->relocateOne(BufLoc, Type, TargetVA);
+ break;
+ default:
+ Target->relocateOne(BufLoc, Type, TargetVA);
+ break;
+ }
+ }
+// For each function-defining prologue, find any calls to __morestack,
+// and replace them with calls to __morestack_non_split.
+static void switchMorestackCallsToMorestackNonSplit(
+ DenseSet<Defined *> &Prologues, std::vector<Relocation *> &MorestackCalls) {
+ // If the target adjusted a function's prologue, all calls to
+ // __morestack inside that function should be switched to
+ // __morestack_non_split.
+ Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split");
+ if (!MoreStackNonSplit) {
+ error("Mixing split-stack objects requires a definition of "
+ "__morestack_non_split");
+ return;
+ }
+ // Sort both collections to compare addresses efficiently.
+ llvm::sort(MorestackCalls, [](const Relocation *L, const Relocation *R) {
+ return L->Offset < R->Offset;
+ });
+ std::vector<Defined *> Functions(Prologues.begin(), Prologues.end());
+ llvm::sort(Functions, [](const Defined *L, const Defined *R) {
+ return L->Value < R->Value;
+ });
+ auto It = MorestackCalls.begin();
+ for (Defined *F : Functions) {
+ // Find the first call to __morestack within the function.
+ while (It != MorestackCalls.end() && (*It)->Offset < F->Value)
+ ++It;
+ // Adjust all calls inside the function.
+ while (It != MorestackCalls.end() && (*It)->Offset < F->Value + F->Size) {
+ (*It)->Sym = MoreStackNonSplit;
+ ++It;
+ }
+ }
+static bool enclosingPrologueAttempted(uint64_t Offset,
+ const DenseSet<Defined *> &Prologues) {
+ for (Defined *F : Prologues)
+ if (F->Value <= Offset && Offset < F->Value + F->Size)
+ return true;
+ return false;
+// If a function compiled for split stack calls a function not
+// compiled for split stack, then the caller needs its prologue
+// adjusted to ensure that the called function will have enough stack
+// available. Find those functions, and adjust their prologues.
+template <class ELFT>
+void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
+ uint8_t *End) {
+ if (!getFile<ELFT>()->SplitStack)
+ return;
+ DenseSet<Defined *> Prologues;
+ std::vector<Relocation *> MorestackCalls;
+ for (Relocation &Rel : Relocations) {
+ // Local symbols can't possibly be cross-calls, and should have been
+ // resolved long before this line.
+ if (Rel.Sym->isLocal())
+ continue;
+ // Ignore calls into the split-stack api.
+ if (Rel.Sym->getName().startswith("__morestack")) {
+ if (Rel.Sym->getName().equals("__morestack"))
+ MorestackCalls.push_back(&Rel);
+ continue;
+ }
+ // A relocation to non-function isn't relevant. Sometimes
+ // __morestack is not marked as a function, so this check comes
+ // after the name check.
+ if (Rel.Sym->Type != STT_FUNC)
+ continue;
+ // If the callee's-file was compiled with split stack, nothing to do. In
+ // this context, a "Defined" symbol is one "defined by the binary currently
+ // being produced". So an "undefined" symbol might be provided by a shared
+ // library. It is not possible to tell how such symbols were compiled, so be
+ // conservative.
+ if (Defined *D = dyn_cast<Defined>(Rel.Sym))
+ if (InputSection *IS = cast_or_null<InputSection>(D->Section))
+ if (!IS || !IS->getFile<ELFT>() || IS->getFile<ELFT>()->SplitStack)
+ continue;
+ if (enclosingPrologueAttempted(Rel.Offset, Prologues))
+ continue;
+ if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
+ Prologues.insert(F);
+ if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value),
+ End, F->StOther))
+ continue;
+ if (!getFile<ELFT>()->SomeNoSplitStack)
+ error(lld::toString(this) + ": " + F->getName() +
+ " (with -fsplit-stack) calls " + Rel.Sym->getName() +
+ " (without -fsplit-stack), but couldn't adjust its prologue");
+ }
+ }
+ if (Target->NeedsMoreStackNonSplit)
+ switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
+template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
+ if (Type == SHT_NOBITS)
+ return;
+ if (auto *S = dyn_cast<SyntheticSection>(this)) {
+ S->writeTo(Buf + OutSecOff);
+ return;
+ }
+ // If -r or --emit-relocs is given, then an InputSection
+ // may be a relocation section.
+ if (Type == SHT_RELA) {
+ copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rela>());
+ return;
+ }
+ if (Type == SHT_REL) {
+ copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rel>());
+ return;
+ }
+ // If -r is given, we may have a SHT_GROUP section.
+ if (Type == SHT_GROUP) {
+ copyShtGroup<ELFT>(Buf + OutSecOff);
+ return;
+ }
+ // If this is a compressed section, uncompress section contents directly
+ // to the buffer.
+ if (UncompressedSize >= 0 && !UncompressedBuf) {
+ size_t Size = UncompressedSize;
+ if (Error E = zlib::uncompress(toStringRef(RawData),
+ (char *)(Buf + OutSecOff), Size))
+ fatal(toString(this) +
+ ": uncompress failed: " + llvm::toString(std::move(E)));
+ uint8_t *BufEnd = Buf + OutSecOff + Size;
+ relocate<ELFT>(Buf, BufEnd);
+ return;
+ }
+ // Copy section contents from source object file to output file
+ // and then apply relocations.
+ memcpy(Buf + OutSecOff, data().data(), data().size());
+ uint8_t *BufEnd = Buf + OutSecOff + data().size();
+ relocate<ELFT>(Buf, BufEnd);
+void InputSection::replace(InputSection *Other) {
+ Alignment = std::max(Alignment, Other->Alignment);
+ Other->Repl = Repl;
+ Other->Live = false;
+template <class ELFT>
+EhInputSection::EhInputSection(ObjFile<ELFT> &F,
+ const typename ELFT::Shdr &Header,
+ StringRef Name)
+ : InputSectionBase(F, Header, Name, InputSectionBase::EHFrame) {}
+SyntheticSection *EhInputSection::getParent() const {
+ return cast_or_null<SyntheticSection>(Parent);
+// Returns the index of the first relocation that points to a region between
+// Begin and Begin+Size.
+template <class IntTy, class RelTy>
+static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels,
+ unsigned &RelocI) {
+ // Start search from RelocI for fast access. That works because the
+ // relocations are sorted in .eh_frame.
+ for (unsigned N = Rels.size(); RelocI < N; ++RelocI) {
+ const RelTy &Rel = Rels[RelocI];
+ if (Rel.r_offset < Begin)
+ continue;
+ if (Rel.r_offset < Begin + Size)
+ return RelocI;
+ return -1;
+ }
+ return -1;
+// .eh_frame is a sequence of CIE or FDE records.
+// This function splits an input section into records and returns them.
+template <class ELFT> void EhInputSection::split() {
+ if (AreRelocsRela)
+ split<ELFT>(relas<ELFT>());
+ else
+ split<ELFT>(rels<ELFT>());
+template <class ELFT, class RelTy>
+void EhInputSection::split(ArrayRef<RelTy> Rels) {
+ unsigned RelI = 0;
+ for (size_t Off = 0, End = data().size(); Off != End;) {
+ size_t Size = readEhRecordSize(this, Off);
+ Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI));
+ // The empty record is the end marker.
+ if (Size == 4)
+ break;
+ Off += Size;
+ }
+static size_t findNull(StringRef S, size_t EntSize) {
+ // Optimize the common case.
+ if (EntSize == 1)
+ return S.find(0);
+ for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
+ const char *B = S.begin() + I;
+ if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
+ return I;
+ }
+ return StringRef::npos;
+SyntheticSection *MergeInputSection::getParent() const {
+ return cast_or_null<SyntheticSection>(Parent);
+// Split SHF_STRINGS section. Such section is a sequence of
+// null-terminated strings.
+void MergeInputSection::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) {
+ size_t Off = 0;
+ bool IsAlloc = Flags & SHF_ALLOC;
+ StringRef S = toStringRef(Data);
+ while (!S.empty()) {
+ size_t End = findNull(S, EntSize);
+ if (End == StringRef::npos)
+ fatal(toString(this) + ": string is not null terminated");
+ size_t Size = End + EntSize;
+ Pieces.emplace_back(Off, xxHash64(S.substr(0, Size)), !IsAlloc);
+ S = S.substr(Size);
+ Off += Size;
+ }
+// Split non-SHF_STRINGS section. Such section is a sequence of
+// fixed size records.
+void MergeInputSection::splitNonStrings(ArrayRef<uint8_t> Data,
+ size_t EntSize) {
+ size_t Size = Data.size();
+ assert((Size % EntSize) == 0);
+ bool IsAlloc = Flags & SHF_ALLOC;
+ for (size_t I = 0; I != Size; I += EntSize)
+ Pieces.emplace_back(I, xxHash64(Data.slice(I, EntSize)), !IsAlloc);
+template <class ELFT>
+MergeInputSection::MergeInputSection(ObjFile<ELFT> &F,
+ const typename ELFT::Shdr &Header,
+ StringRef Name)
+ : InputSectionBase(F, Header, Name, InputSectionBase::Merge) {}
+MergeInputSection::MergeInputSection(uint64_t Flags, uint32_t Type,
+ uint64_t Entsize, ArrayRef<uint8_t> Data,
+ StringRef Name)
+ : InputSectionBase(nullptr, Flags, Type, Entsize, /*Link*/ 0, /*Info*/ 0,
+ /*Alignment*/ Entsize, Data, Name, SectionBase::Merge) {}
+// This function is called after we obtain a complete list of input sections
+// that need to be linked. This is responsible to split section contents
+// into small chunks for further processing.
+// Note that this function is called from parallelForEach. This must be
+// thread-safe (i.e. no memory allocation from the pools).
+void MergeInputSection::splitIntoPieces() {
+ assert(Pieces.empty());
+ if (Flags & SHF_STRINGS)
+ splitStrings(data(), Entsize);
+ else
+ splitNonStrings(data(), Entsize);
+SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) {
+ if (this->data().size() <= Offset)
+ fatal(toString(this) + ": offset is outside the section");
+ // If Offset is not at beginning of a section piece, it is not in the map.
+ // In that case we need to do a binary search of the original section piece vector.
+ auto It2 =
+ llvm::upper_bound(Pieces, Offset, [](uint64_t Offset, SectionPiece P) {
+ return Offset < P.InputOff;
+ });
+ return &It2[-1];
+// Returns the offset in an output section for a given input offset.
+// Because contents of a mergeable section is not contiguous in output,
+// it is not just an addition to a base output offset.
+uint64_t MergeInputSection::getParentOffset(uint64_t Offset) const {
+ // If Offset is not at beginning of a section piece, it is not in the map.
+ // In that case we need to search from the original section piece vector.
+ const SectionPiece &Piece =
+ *(const_cast<MergeInputSection *>(this)->getSectionPiece (Offset));
+ uint64_t Addend = Offset - Piece.InputOff;
+ return Piece.OutputOff + Addend;
+template InputSection::InputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &,
+ StringRef);
+template InputSection::InputSection(ObjFile<ELF32BE> &, const ELF32BE::Shdr &,
+ StringRef);
+template InputSection::InputSection(ObjFile<ELF64LE> &, const ELF64LE::Shdr &,
+ StringRef);
+template InputSection::InputSection(ObjFile<ELF64BE> &, const ELF64BE::Shdr &,
+ StringRef);
+template std::string InputSectionBase::getLocation<ELF32LE>(uint64_t);
+template std::string InputSectionBase::getLocation<ELF32BE>(uint64_t);
+template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t);
+template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t);
+template void InputSection::writeTo<ELF32LE>(uint8_t *);
+template void InputSection::writeTo<ELF32BE>(uint8_t *);
+template void InputSection::writeTo<ELF64LE>(uint8_t *);
+template void InputSection::writeTo<ELF64BE>(uint8_t *);
+template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &,
+ const ELF32LE::Shdr &, StringRef);
+template MergeInputSection::MergeInputSection(ObjFile<ELF32BE> &,
+ const ELF32BE::Shdr &, StringRef);
+template MergeInputSection::MergeInputSection(ObjFile<ELF64LE> &,
+ const ELF64LE::Shdr &, StringRef);
+template MergeInputSection::MergeInputSection(ObjFile<ELF64BE> &,
+ const ELF64BE::Shdr &, StringRef);
+template EhInputSection::EhInputSection(ObjFile<ELF32LE> &,
+ const ELF32LE::Shdr &, StringRef);
+template EhInputSection::EhInputSection(ObjFile<ELF32BE> &,
+ const ELF32BE::Shdr &, StringRef);
+template EhInputSection::EhInputSection(ObjFile<ELF64LE> &,
+ const ELF64LE::Shdr &, StringRef);
+template EhInputSection::EhInputSection(ObjFile<ELF64BE> &,
+ const ELF64BE::Shdr &, StringRef);
+template void EhInputSection::split<ELF32LE>();
+template void EhInputSection::split<ELF32BE>();
+template void EhInputSection::split<ELF64LE>();
+template void EhInputSection::split<ELF64BE>();
diff --git a/contrib/llvm/tools/lld/ELF/InputSection.h b/contrib/llvm/tools/lld/ELF/InputSection.h
new file mode 100644
index 000000000000..34f411e87200
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/InputSection.h
@@ -0,0 +1,369 @@
+//===- InputSection.h -------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "Relocations.h"
+#include "Thunks.h"
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Object/ELF.h"
+namespace lld {
+namespace elf {
+class Symbol;
+struct SectionPiece;
+class Defined;
+class SyntheticSection;
+class MergeSyntheticSection;
+template <class ELFT> class ObjFile;
+class OutputSection;
+// This is the base class of all sections that lld handles. Some are sections in
+// input files, some are sections in the produced output file and some exist
+// just as a convenience for implementing special ways of combining some
+// sections.
+class SectionBase {
+ enum Kind { Regular, EHFrame, Merge, Synthetic, Output };
+ Kind kind() const { return (Kind)SectionKind; }
+ StringRef Name;
+ // This pointer points to the "real" instance of this instance.
+ // Usually Repl == this. However, if ICF merges two sections,
+ // Repl pointer of one section points to another section. So,
+ // if you need to get a pointer to this instance, do not use
+ // this but instead this->Repl.
+ SectionBase *Repl;
+ unsigned SectionKind : 3;
+ // The next two bit fields are only used by InputSectionBase, but we
+ // put them here so the struct packs better.
+ // The garbage collector sets sections' Live bits.
+ // If GC is disabled, all sections are considered live by default.
+ unsigned Live : 1;
+ unsigned Bss : 1;
+ // Set for sections that should not be folded by ICF.
+ unsigned KeepUnique : 1;
+ // These corresponds to the fields in Elf_Shdr.
+ uint32_t Alignment;
+ uint64_t Flags;
+ uint64_t Entsize;
+ uint32_t Type;
+ uint32_t Link;
+ uint32_t Info;
+ OutputSection *getOutputSection();
+ const OutputSection *getOutputSection() const {
+ return const_cast<SectionBase *>(this)->getOutputSection();
+ }
+ // Translate an offset in the input section to an offset in the output
+ // section.
+ uint64_t getOffset(uint64_t Offset) const;
+ uint64_t getVA(uint64_t Offset = 0) const;
+ SectionBase(Kind SectionKind, StringRef Name, uint64_t Flags,
+ uint64_t Entsize, uint64_t Alignment, uint32_t Type,
+ uint32_t Info, uint32_t Link)
+ : Name(Name), Repl(this), SectionKind(SectionKind), Live(false),
+ Bss(false), KeepUnique(false), Alignment(Alignment), Flags(Flags),
+ Entsize(Entsize), Type(Type), Link(Link), Info(Info) {}
+// This corresponds to a section of an input file.
+class InputSectionBase : public SectionBase {
+ template <class ELFT>
+ InputSectionBase(ObjFile<ELFT> &File, const typename ELFT::Shdr &Header,
+ StringRef Name, Kind SectionKind);
+ InputSectionBase(InputFile *File, uint64_t Flags, uint32_t Type,
+ uint64_t Entsize, uint32_t Link, uint32_t Info,
+ uint32_t Alignment, ArrayRef<uint8_t> Data, StringRef Name,
+ Kind SectionKind);
+ static bool classof(const SectionBase *S) { return S->kind() != Output; }
+ // The file which contains this section. Its dynamic type is always
+ // ObjFile<ELFT>, but in order to avoid ELFT, we use InputFile as
+ // its static type.
+ InputFile *File;
+ template <class ELFT> ObjFile<ELFT> *getFile() const {
+ return cast_or_null<ObjFile<ELFT>>(File);
+ }
+ ArrayRef<uint8_t> data() const {
+ if (UncompressedSize >= 0 && !UncompressedBuf)
+ uncompress();
+ return RawData;
+ }
+ uint64_t getOffsetInFile() const;
+ // True if this section has already been placed to a linker script
+ // output section. This is needed because, in a linker script, you
+ // can refer to the same section more than once. For example, in
+ // the following linker script,
+ //
+ // .foo : { *(.text) }
+ // .bar : { *(.text) }
+ //
+ // .foo takes all .text sections, and .bar becomes empty. To achieve
+ // this, we need to memorize whether a section has been placed or
+ // not for each input section.
+ bool Assigned = false;
+ // Input sections are part of an output section. Special sections
+ // like .eh_frame and merge sections are first combined into a
+ // synthetic section that is then added to an output section. In all
+ // cases this points one level up.
+ SectionBase *Parent = nullptr;
+ // Relocations that refer to this section.
+ const void *FirstRelocation = nullptr;
+ unsigned NumRelocations : 31;
+ unsigned AreRelocsRela : 1;
+ template <class ELFT> ArrayRef<typename ELFT::Rel> rels() const {
+ assert(!AreRelocsRela);
+ return llvm::makeArrayRef(
+ static_cast<const typename ELFT::Rel *>(FirstRelocation),
+ NumRelocations);
+ }
+ template <class ELFT> ArrayRef<typename ELFT::Rela> relas() const {
+ assert(AreRelocsRela);
+ return llvm::makeArrayRef(
+ static_cast<const typename ELFT::Rela *>(FirstRelocation),
+ NumRelocations);
+ }
+ // InputSections that are dependent on us (reverse dependency for GC)
+ llvm::TinyPtrVector<InputSection *> DependentSections;
+ // Returns the size of this section (even if this is a common or BSS.)
+ size_t getSize() const;
+ InputSection *getLinkOrderDep() const;
+ // Get the function symbol that encloses this offset from within the
+ // section.
+ template <class ELFT>
+ Defined *getEnclosingFunction(uint64_t Offset);
+ // Returns a source location string. Used to construct an error message.
+ template <class ELFT> std::string getLocation(uint64_t Offset);
+ std::string getSrcMsg(const Symbol &Sym, uint64_t Offset);
+ std::string getObjMsg(uint64_t Offset);
+ // Each section knows how to relocate itself. These functions apply
+ // relocations, assuming that Buf points to this section's copy in
+ // the mmap'ed output buffer.
+ template <class ELFT> void relocate(uint8_t *Buf, uint8_t *BufEnd);
+ void relocateAlloc(uint8_t *Buf, uint8_t *BufEnd);
+ // The native ELF reloc data type is not very convenient to handle.
+ // So we convert ELF reloc records to our own records in Relocations.cpp.
+ // This vector contains such "cooked" relocations.
+ std::vector<Relocation> Relocations;
+ // A function compiled with -fsplit-stack calling a function
+ // compiled without -fsplit-stack needs its prologue adjusted. Find
+ // such functions and adjust their prologues. This is very similar
+ // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more
+ // information.
+ template <typename ELFT>
+ void adjustSplitStackFunctionPrologues(uint8_t *Buf, uint8_t *End);
+ template <typename T> llvm::ArrayRef<T> getDataAs() const {
+ size_t S = data().size();
+ assert(S % sizeof(T) == 0);
+ return llvm::makeArrayRef<T>((const T *)data().data(), S / sizeof(T));
+ }
+ void parseCompressedHeader();
+ void uncompress() const;
+ mutable ArrayRef<uint8_t> RawData;
+ // A pointer that owns uncompressed data if a section is compressed by zlib.
+ // Since the feature is not used often, this is usually a nullptr.
+ mutable std::unique_ptr<char[]> UncompressedBuf;
+ int64_t UncompressedSize = -1;
+// SectionPiece represents a piece of splittable section contents.
+// We allocate a lot of these and binary search on them. This means that they
+// have to be as compact as possible, which is why we don't store the size (can
+// be found by looking at the next one).
+struct SectionPiece {
+ SectionPiece(size_t Off, uint32_t Hash, bool Live)
+ : InputOff(Off), Hash(Hash), OutputOff(0),
+ Live(Live || !Config->GcSections) {}
+ uint32_t InputOff;
+ uint32_t Hash;
+ int64_t OutputOff : 63;
+ uint64_t Live : 1;
+static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
+// This corresponds to a SHF_MERGE section of an input file.
+class MergeInputSection : public InputSectionBase {
+ template <class ELFT>
+ MergeInputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header,
+ StringRef Name);
+ MergeInputSection(uint64_t Flags, uint32_t Type, uint64_t Entsize,
+ ArrayRef<uint8_t> Data, StringRef Name);
+ static bool classof(const SectionBase *S) { return S->kind() == Merge; }
+ void splitIntoPieces();
+ // Translate an offset in the input section to an offset in the parent
+ // MergeSyntheticSection.
+ uint64_t getParentOffset(uint64_t Offset) const;
+ // Splittable sections are handled as a sequence of data
+ // rather than a single large blob of data.
+ std::vector<SectionPiece> Pieces;
+ // Returns I'th piece's data. This function is very hot when
+ // string merging is enabled, so we want to inline.
+ llvm::CachedHashStringRef getData(size_t I) const {
+ size_t Begin = Pieces[I].InputOff;
+ size_t End =
+ (Pieces.size() - 1 == I) ? data().size() : Pieces[I + 1].InputOff;
+ return {toStringRef(data().slice(Begin, End - Begin)), Pieces[I].Hash};
+ }
+ // Returns the SectionPiece at a given input section offset.
+ SectionPiece *getSectionPiece(uint64_t Offset);
+ const SectionPiece *getSectionPiece(uint64_t Offset) const {
+ return const_cast<MergeInputSection *>(this)->getSectionPiece(Offset);
+ }
+ SyntheticSection *getParent() const;
+ void splitStrings(ArrayRef<uint8_t> A, size_t Size);
+ void splitNonStrings(ArrayRef<uint8_t> A, size_t Size);
+struct EhSectionPiece {
+ EhSectionPiece(size_t Off, InputSectionBase *Sec, uint32_t Size,
+ unsigned FirstRelocation)
+ : InputOff(Off), Sec(Sec), Size(Size), FirstRelocation(FirstRelocation) {}
+ ArrayRef<uint8_t> data() {
+ return {Sec->data().data() + this->InputOff, Size};
+ }
+ size_t InputOff;
+ ssize_t OutputOff = -1;
+ InputSectionBase *Sec;
+ uint32_t Size;
+ unsigned FirstRelocation;
+// This corresponds to a .eh_frame section of an input file.
+class EhInputSection : public InputSectionBase {
+ template <class ELFT>
+ EhInputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header,
+ StringRef Name);
+ static bool classof(const SectionBase *S) { return S->kind() == EHFrame; }
+ template <class ELFT> void split();
+ template <class ELFT, class RelTy> void split(ArrayRef<RelTy> Rels);
+ // Splittable sections are handled as a sequence of data
+ // rather than a single large blob of data.
+ std::vector<EhSectionPiece> Pieces;
+ SyntheticSection *getParent() const;
+// This is a section that is added directly to an output section
+// instead of needing special combination via a synthetic section. This
+// includes all input sections with the exceptions of SHF_MERGE and
+// .eh_frame. It also includes the synthetic sections themselves.
+class InputSection : public InputSectionBase {
+ InputSection(InputFile *F, uint64_t Flags, uint32_t Type, uint32_t Alignment,
+ ArrayRef<uint8_t> Data, StringRef Name, Kind K = Regular);
+ template <class ELFT>
+ InputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header,
+ StringRef Name);
+ // Write this section to a mmap'ed file, assuming Buf is pointing to
+ // beginning of the output section.
+ template <class ELFT> void writeTo(uint8_t *Buf);
+ uint64_t getOffset(uint64_t Offset) const { return OutSecOff + Offset; }
+ OutputSection *getParent() const;
+ // This variable has two usages. Initially, it represents an index in the
+ // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER
+ // sections. After assignAddresses is called, it represents the offset from
+ // the beginning of the output section this section was assigned to.
+ uint64_t OutSecOff = 0;
+ static bool classof(const SectionBase *S);
+ InputSectionBase *getRelocatedSection() const;
+ template <class ELFT, class RelTy>
+ void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels);
+ // Used by ICF.
+ uint32_t Class[2] = {0, 0};
+ // Called by ICF to merge two input sections.
+ void replace(InputSection *Other);
+ static InputSection Discarded;
+ template <class ELFT, class RelTy>
+ void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels);
+ template <class ELFT> void copyShtGroup(uint8_t *Buf);
+// The list of all input sections.
+extern std::vector<InputSectionBase *> InputSections;
+} // namespace elf
+std::string toString(const elf::InputSectionBase *);
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp
new file mode 100644
index 000000000000..ca44581780e4
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/LTO.cpp
@@ -0,0 +1,296 @@
+//===- LTO.cpp ------------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "LTO.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "LinkerScript.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/TargetOptionsCommandFlags.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/LTO/Caching.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+// Creates an empty file to store a list of object files for final
+// linking of distributed ThinLTO.
+static std::unique_ptr<raw_fd_ostream> openFile(StringRef File) {
+ std::error_code EC;
+ auto Ret =
+ llvm::make_unique<raw_fd_ostream>(File, EC, sys::fs::OpenFlags::F_None);
+ if (EC) {
+ error("cannot open " + File + ": " + EC.message());
+ return nullptr;
+ }
+ return Ret;
+static std::string getThinLTOOutputFile(StringRef ModulePath) {
+ return lto::getThinLTOOutputFile(ModulePath,
+ Config->ThinLTOPrefixReplace.first,
+ Config->ThinLTOPrefixReplace.second);
+static lto::Config createConfig() {
+ lto::Config C;
+ // LLD supports the new relocations and address-significance tables.
+ C.Options = InitTargetOptionsFromCodeGenFlags();
+ C.Options.RelaxELFRelocations = true;
+ C.Options.EmitAddrsig = true;
+ // Always emit a section per function/datum with LTO.
+ C.Options.FunctionSections = true;
+ C.Options.DataSections = true;
+ if (Config->Relocatable)
+ C.RelocModel = None;
+ else if (Config->Pic)
+ C.RelocModel = Reloc::PIC_;
+ else
+ C.RelocModel = Reloc::Static;
+ C.CodeModel = GetCodeModelFromCMModel();
+ C.DisableVerify = Config->DisableVerify;
+ C.DiagHandler = diagnosticHandler;
+ C.OptLevel = Config->LTOO;
+ C.CPU = GetCPUStr();
+ C.MAttrs = GetMAttrs();
+ // Set up a custom pipeline if we've been asked to.
+ C.OptPipeline = Config->LTONewPmPasses;
+ C.AAPipeline = Config->LTOAAPipeline;
+ // Set up optimization remarks if we've been asked to.
+ C.RemarksFilename = Config->OptRemarksFilename;
+ C.RemarksWithHotness = Config->OptRemarksWithHotness;
+ C.SampleProfile = Config->LTOSampleProfile;
+ C.UseNewPM = Config->LTONewPassManager;
+ C.DebugPassManager = Config->LTODebugPassManager;
+ C.DwoDir = Config->DwoDir;
+ if (Config->EmitLLVM) {
+ C.PostInternalizeModuleHook = [](size_t Task, const Module &M) {
+ if (std::unique_ptr<raw_fd_ostream> OS = openFile(Config->OutputFile))
+ WriteBitcodeToFile(M, *OS, false);
+ return false;
+ };
+ }
+ if (Config->SaveTemps)
+ checkError(C.addSaveTemps(Config->OutputFile.str() + ".",
+ /*UseInputModulePath*/ true));
+ return C;
+BitcodeCompiler::BitcodeCompiler() {
+ // Initialize IndexFile.
+ if (!Config->ThinLTOIndexOnlyArg.empty())
+ IndexFile = openFile(Config->ThinLTOIndexOnlyArg);
+ // Initialize LTOObj.
+ lto::ThinBackend Backend;
+ if (Config->ThinLTOIndexOnly) {
+ auto OnIndexWrite = [&](StringRef S) { ThinIndices.erase(S); };
+ Backend = lto::createWriteIndexesThinBackend(
+ Config->ThinLTOPrefixReplace.first, Config->ThinLTOPrefixReplace.second,
+ Config->ThinLTOEmitImportsFiles, IndexFile.get(), OnIndexWrite);
+ } else if (Config->ThinLTOJobs != -1U) {
+ Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs);
+ }
+ LTOObj = llvm::make_unique<lto::LTO>(createConfig(), Backend,
+ Config->LTOPartitions);
+ // Initialize UsedStartStop.
+ for (Symbol *Sym : Symtab->getSymbols()) {
+ StringRef S = Sym->getName();
+ for (StringRef Prefix : {"__start_", "__stop_"})
+ if (S.startswith(Prefix))
+ UsedStartStop.insert(S.substr(Prefix.size()));
+ }
+BitcodeCompiler::~BitcodeCompiler() = default;
+static void undefine(Symbol *S) {
+ replaceSymbol<Undefined>(S, nullptr, S->getName(), STB_GLOBAL, STV_DEFAULT,
+ S->Type);
+void BitcodeCompiler::add(BitcodeFile &F) {
+ lto::InputFile &Obj = *F.Obj;
+ bool IsExec = !Config->Shared && !Config->Relocatable;
+ if (Config->ThinLTOIndexOnly)
+ ThinIndices.insert(Obj.getName());
+ ArrayRef<Symbol *> Syms = F.getSymbols();
+ ArrayRef<lto::InputFile::Symbol> ObjSyms = Obj.symbols();
+ std::vector<lto::SymbolResolution> Resols(Syms.size());
+ // Provide a resolution to the LTO API for each symbol.
+ for (size_t I = 0, E = Syms.size(); I != E; ++I) {
+ Symbol *Sym = Syms[I];
+ const lto::InputFile::Symbol &ObjSym = ObjSyms[I];
+ lto::SymbolResolution &R = Resols[I];
+ // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile
+ // reports two symbols for module ASM defined. Without this check, lld
+ // flags an undefined in IR with a definition in ASM as prevailing.
+ // Once IRObjectFile is fixed to report only one symbol this hack can
+ // be removed.
+ R.Prevailing = !ObjSym.isUndefined() && Sym->File == &F;
+ // We ask LTO to preserve following global symbols:
+ // 1) All symbols when doing relocatable link, so that them can be used
+ // for doing final link.
+ // 2) Symbols that are used in regular objects.
+ // 3) C named sections if we have corresponding __start_/__stop_ symbol.
+ // 4) Symbols that are defined in bitcode files and used for dynamic linking.
+ R.VisibleToRegularObj = Config->Relocatable || Sym->IsUsedInRegularObj ||
+ (R.Prevailing && Sym->includeInDynsym()) ||
+ UsedStartStop.count(ObjSym.getSectionName());
+ const auto *DR = dyn_cast<Defined>(Sym);
+ R.FinalDefinitionInLinkageUnit =
+ (IsExec || Sym->Visibility != STV_DEFAULT) && DR &&
+ // Skip absolute symbols from ELF objects, otherwise PC-rel relocations
+ // will be generated by for them, triggering linker errors.
+ // Symbol section is always null for bitcode symbols, hence the check
+ // for isElf(). Skip linker script defined symbols as well: they have
+ // no File defined.
+ !(DR->Section == nullptr && (!Sym->File || Sym->File->isElf()));
+ if (R.Prevailing)
+ undefine(Sym);
+ // We tell LTO to not apply interprocedural optimization for wrapped
+ // (with --wrap) symbols because otherwise LTO would inline them while
+ // their values are still not final.
+ R.LinkerRedefined = !Sym->CanInline;
+ }
+ checkError(LTOObj->add(std::move(F.Obj), Resols));
+static void createEmptyIndex(StringRef ModulePath) {
+ std::string Path = replaceThinLTOSuffix(getThinLTOOutputFile(ModulePath));
+ std::unique_ptr<raw_fd_ostream> OS = openFile(Path + ".thinlto.bc");
+ if (!OS)
+ return;
+ ModuleSummaryIndex M(/*HaveGVs*/ false);
+ M.setSkipModuleByDistributedBackend();
+ WriteIndexToFile(M, *OS);
+ if (Config->ThinLTOEmitImportsFiles)
+ openFile(Path + ".imports");
+// Merge all the bitcode files we have seen, codegen the result
+// and return the resulting ObjectFile(s).
+std::vector<InputFile *> BitcodeCompiler::compile() {
+ unsigned MaxTasks = LTOObj->getMaxTasks();
+ Buf.resize(MaxTasks);
+ Files.resize(MaxTasks);
+ // The --thinlto-cache-dir option specifies the path to a directory in which
+ // to cache native object files for ThinLTO incremental builds. If a path was
+ // specified, configure LTO to use it as the cache directory.
+ lto::NativeObjectCache Cache;
+ if (!Config->ThinLTOCacheDir.empty())
+ Cache = check(
+ lto::localCache(Config->ThinLTOCacheDir,
+ [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
+ Files[Task] = std::move(MB);
+ }));
+ checkError(LTOObj->run(
+ [&](size_t Task) {
+ return llvm::make_unique<lto::NativeObjectStream>(
+ llvm::make_unique<raw_svector_ostream>(Buf[Task]));
+ },
+ Cache));
+ // Emit empty index files for non-indexed files
+ for (StringRef S : ThinIndices) {
+ std::string Path = getThinLTOOutputFile(S);
+ openFile(Path + ".thinlto.bc");
+ if (Config->ThinLTOEmitImportsFiles)
+ openFile(Path + ".imports");
+ }
+ // If LazyObjFile has not been added to link, emit empty index files.
+ // This is needed because this is what GNU gold plugin does and we have a
+ // distributed build system that depends on that behavior.
+ if (Config->ThinLTOIndexOnly) {
+ for (LazyObjFile *F : LazyObjFiles)
+ if (!F->AddedToLink && isBitcode(F->MB))
+ createEmptyIndex(F->getName());
+ if (!Config->LTOObjPath.empty())
+ saveBuffer(Buf[0], Config->LTOObjPath);
+ // ThinLTO with index only option is required to generate only the index
+ // files. After that, we exit from linker and ThinLTO backend runs in a
+ // distributed environment.
+ if (IndexFile)
+ IndexFile->close();
+ return {};
+ }
+ if (!Config->ThinLTOCacheDir.empty())
+ pruneCache(Config->ThinLTOCacheDir, Config->ThinLTOCachePolicy);
+ std::vector<InputFile *> Ret;
+ for (unsigned I = 0; I != MaxTasks; ++I) {
+ if (Buf[I].empty())
+ continue;
+ if (Config->SaveTemps) {
+ if (I == 0)
+ saveBuffer(Buf[I], Config->OutputFile + ".lto.o");
+ else
+ saveBuffer(Buf[I], Config->OutputFile + Twine(I) + ".lto.o");
+ }
+ InputFile *Obj = createObjectFile(MemoryBufferRef(Buf[I], "lto.tmp"));
+ Ret.push_back(Obj);
+ }
+ for (std::unique_ptr<MemoryBuffer> &File : Files)
+ if (File)
+ Ret.push_back(createObjectFile(*File));
+ return Ret;
diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h
new file mode 100644
index 000000000000..a190da3e5996
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/LTO.h
@@ -0,0 +1,63 @@
+//===- LTO.h ----------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file provides a way to combine bitcode files into one ELF
+// file by compiling them using LLVM.
+// If LTO is in use, your input files are not in regular ELF files
+// but instead LLVM bitcode files. In that case, the linker has to
+// convert bitcode files into the native format so that we can create
+// an ELF file that contains native code. This file provides that
+// functionality.
+#ifndef LLD_ELF_LTO_H
+#define LLD_ELF_LTO_H
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <vector>
+namespace llvm {
+namespace lto {
+class LTO;
+} // namespace llvm
+namespace lld {
+namespace elf {
+class BitcodeFile;
+class InputFile;
+class LazyObjFile;
+class BitcodeCompiler {
+ BitcodeCompiler();
+ ~BitcodeCompiler();
+ void add(BitcodeFile &F);
+ std::vector<InputFile *> compile();
+ std::unique_ptr<llvm::lto::LTO> LTOObj;
+ std::vector<SmallString<0>> Buf;
+ std::vector<std::unique_ptr<MemoryBuffer>> Files;
+ llvm::DenseSet<StringRef> UsedStartStop;
+ std::unique_ptr<llvm::raw_fd_ostream> IndexFile;
+ llvm::DenseSet<StringRef> ThinIndices;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
new file mode 100644
index 000000000000..fbc025416205
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
@@ -0,0 +1,1144 @@
+//===- LinkerScript.cpp ---------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains the parser/evaluator of the linker script.
+#include "LinkerScript.h"
+#include "Config.h"
+#include "InputSection.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Writer.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/Threads.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <string>
+#include <vector>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::elf;
+LinkerScript *elf::Script;
+static uint64_t getOutputSectionVA(SectionBase *InputSec, StringRef Loc) {
+ if (OutputSection *OS = InputSec->getOutputSection())
+ return OS->Addr;
+ error(Loc + ": unable to evaluate expression: input section " +
+ InputSec->Name + " has no output section assigned");
+ return 0;
+uint64_t ExprValue::getValue() const {
+ if (Sec)
+ return alignTo(Sec->getOffset(Val) + getOutputSectionVA(Sec, Loc),
+ Alignment);
+ return alignTo(Val, Alignment);
+uint64_t ExprValue::getSecAddr() const {
+ if (Sec)
+ return Sec->getOffset(0) + getOutputSectionVA(Sec, Loc);
+ return 0;
+uint64_t ExprValue::getSectionOffset() const {
+ // If the alignment is trivial, we don't have to compute the full
+ // value to know the offset. This allows this function to succeed in
+ // cases where the output section is not yet known.
+ if (Alignment == 1 && (!Sec || !Sec->getOutputSection()))
+ return Val;
+ return getValue() - getSecAddr();
+OutputSection *LinkerScript::createOutputSection(StringRef Name,
+ StringRef Location) {
+ OutputSection *&SecRef = NameToOutputSection[Name];
+ OutputSection *Sec;
+ if (SecRef && SecRef->Location.empty()) {
+ // There was a forward reference.
+ Sec = SecRef;
+ } else {
+ Sec = make<OutputSection>(Name, SHT_NOBITS, 0);
+ if (!SecRef)
+ SecRef = Sec;
+ }
+ Sec->Location = Location;
+ return Sec;
+OutputSection *LinkerScript::getOrCreateOutputSection(StringRef Name) {
+ OutputSection *&CmdRef = NameToOutputSection[Name];
+ if (!CmdRef)
+ CmdRef = make<OutputSection>(Name, SHT_PROGBITS, 0);
+ return CmdRef;
+// Expands the memory region by the specified size.
+static void expandMemoryRegion(MemoryRegion *MemRegion, uint64_t Size,
+ StringRef RegionName, StringRef SecName) {
+ MemRegion->CurPos += Size;
+ uint64_t NewSize = MemRegion->CurPos - MemRegion->Origin;
+ if (NewSize > MemRegion->Length)
+ error("section '" + SecName + "' will not fit in region '" + RegionName +
+ "': overflowed by " + Twine(NewSize - MemRegion->Length) + " bytes");
+void LinkerScript::expandMemoryRegions(uint64_t Size) {
+ if (Ctx->MemRegion)
+ expandMemoryRegion(Ctx->MemRegion, Size, Ctx->MemRegion->Name,
+ Ctx->OutSec->Name);
+ // Only expand the LMARegion if it is different from MemRegion.
+ if (Ctx->LMARegion && Ctx->MemRegion != Ctx->LMARegion)
+ expandMemoryRegion(Ctx->LMARegion, Size, Ctx->LMARegion->Name,
+ Ctx->OutSec->Name);
+void LinkerScript::expandOutputSection(uint64_t Size) {
+ Ctx->OutSec->Size += Size;
+ expandMemoryRegions(Size);
+void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) {
+ uint64_t Val = E().getValue();
+ if (Val < Dot && InSec)
+ error(Loc + ": unable to move location counter backward for: " +
+ Ctx->OutSec->Name);
+ // Update to location counter means update to section size.
+ if (InSec)
+ expandOutputSection(Val - Dot);
+ else
+ expandMemoryRegions(Val - Dot);
+ Dot = Val;
+// Used for handling linker symbol assignments, for both finalizing
+// their values and doing early declarations. Returns true if symbol
+// should be defined from linker script.
+static bool shouldDefineSym(SymbolAssignment *Cmd) {
+ if (Cmd->Name == ".")
+ return false;
+ if (!Cmd->Provide)
+ return true;
+ // If a symbol was in PROVIDE(), we need to define it only
+ // when it is a referenced undefined symbol.
+ Symbol *B = Symtab->find(Cmd->Name);
+ if (B && !B->isDefined())
+ return true;
+ return false;
+// This function is called from processSectionCommands,
+// while we are fixing the output section layout.
+void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
+ if (!shouldDefineSym(Cmd))
+ return;
+ // Define a symbol.
+ Symbol *Sym;
+ uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
+ std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, Visibility,
+ /*CanOmitFromDynSym*/ false,
+ /*File*/ nullptr);
+ ExprValue Value = Cmd->Expression();
+ SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec;
+ // When this function is called, section addresses have not been
+ // fixed yet. So, we may or may not know the value of the RHS
+ // expression.
+ //
+ // For example, if an expression is `x = 42`, we know x is always 42.
+ // However, if an expression is `x = .`, there's no way to know its
+ // value at the moment.
+ //
+ // We want to set symbol values early if we can. This allows us to
+ // use symbols as variables in linker scripts. Doing so allows us to
+ // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`.
+ uint64_t SymValue = Value.Sec ? 0 : Value.getValue();
+ replaceSymbol<Defined>(Sym, nullptr, Cmd->Name, STB_GLOBAL, Visibility,
+ STT_NOTYPE, SymValue, 0, Sec);
+ Cmd->Sym = cast<Defined>(Sym);
+// This function is called from LinkerScript::declareSymbols.
+// It creates a placeholder symbol if needed.
+static void declareSymbol(SymbolAssignment *Cmd) {
+ if (!shouldDefineSym(Cmd))
+ return;
+ // We can't calculate final value right now.
+ Symbol *Sym;
+ uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
+ std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, Visibility,
+ /*CanOmitFromDynSym*/ false,
+ /*File*/ nullptr);
+ replaceSymbol<Defined>(Sym, nullptr, Cmd->Name, STB_GLOBAL, Visibility,
+ STT_NOTYPE, 0, 0, nullptr);
+ Cmd->Sym = cast<Defined>(Sym);
+ Cmd->Provide = false;
+ Sym->ScriptDefined = true;
+// This method is used to handle INSERT AFTER statement. Here we rebuild
+// the list of script commands to mix sections inserted into.
+void LinkerScript::processInsertCommands() {
+ std::vector<BaseCommand *> V;
+ auto Insert = [&](std::vector<BaseCommand *> &From) {
+ V.insert(V.end(), From.begin(), From.end());
+ From.clear();
+ };
+ for (BaseCommand *Base : SectionCommands) {
+ if (auto *OS = dyn_cast<OutputSection>(Base)) {
+ Insert(InsertBeforeCommands[OS->Name]);
+ V.push_back(Base);
+ Insert(InsertAfterCommands[OS->Name]);
+ continue;
+ }
+ V.push_back(Base);
+ }
+ for (auto &Cmds : {InsertBeforeCommands, InsertAfterCommands})
+ for (const std::pair<StringRef, std::vector<BaseCommand *>> &P : Cmds)
+ if (!P.second.empty())
+ error("unable to INSERT AFTER/BEFORE " + P.first +
+ ": section not defined");
+ SectionCommands = std::move(V);
+// Symbols defined in script should not be inlined by LTO. At the same time
+// we don't know their final values until late stages of link. Here we scan
+// over symbol assignment commands and create placeholder symbols if needed.
+void LinkerScript::declareSymbols() {
+ assert(!Ctx);
+ for (BaseCommand *Base : SectionCommands) {
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
+ declareSymbol(Cmd);
+ continue;
+ }
+ // If the output section directive has constraints,
+ // we can't say for sure if it is going to be included or not.
+ // Skip such sections for now. Improve the checks if we ever
+ // need symbols from that sections to be declared early.
+ auto *Sec = cast<OutputSection>(Base);
+ if (Sec->Constraint != ConstraintKind::NoConstraint)
+ continue;
+ for (BaseCommand *Base2 : Sec->SectionCommands)
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base2))
+ declareSymbol(Cmd);
+ }
+// This function is called from assignAddresses, while we are
+// fixing the output section addresses. This function is supposed
+// to set the final value for a given symbol assignment.
+void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) {
+ if (Cmd->Name == ".") {
+ setDot(Cmd->Expression, Cmd->Location, InSec);
+ return;
+ }
+ if (!Cmd->Sym)
+ return;
+ ExprValue V = Cmd->Expression();
+ if (V.isAbsolute()) {
+ Cmd->Sym->Section = nullptr;
+ Cmd->Sym->Value = V.getValue();
+ } else {
+ Cmd->Sym->Section = V.Sec;
+ Cmd->Sym->Value = V.getSectionOffset();
+ }
+static std::string getFilename(InputFile *File) {
+ if (!File)
+ return "";
+ if (File->ArchiveName.empty())
+ return File->getName();
+ return (File->ArchiveName + "(" + File->getName() + ")").str();
+bool LinkerScript::shouldKeep(InputSectionBase *S) {
+ if (KeptSections.empty())
+ return false;
+ std::string Filename = getFilename(S->File);
+ for (InputSectionDescription *ID : KeptSections)
+ if (ID->FilePat.match(Filename))
+ for (SectionPattern &P : ID->SectionPatterns)
+ if (P.SectionPat.match(S->Name))
+ return true;
+ return false;
+// A helper function for the SORT() command.
+static std::function<bool(InputSectionBase *, InputSectionBase *)>
+getComparator(SortSectionPolicy K) {
+ switch (K) {
+ case SortSectionPolicy::Alignment:
+ return [](InputSectionBase *A, InputSectionBase *B) {
+ // ">" is not a mistake. Sections with larger alignments are placed
+ // before sections with smaller alignments in order to reduce the
+ // amount of padding necessary. This is compatible with GNU.
+ return A->Alignment > B->Alignment;
+ };
+ case SortSectionPolicy::Name:
+ return [](InputSectionBase *A, InputSectionBase *B) {
+ return A->Name < B->Name;
+ };
+ case SortSectionPolicy::Priority:
+ return [](InputSectionBase *A, InputSectionBase *B) {
+ return getPriority(A->Name) < getPriority(B->Name);
+ };
+ default:
+ llvm_unreachable("unknown sort policy");
+ }
+// A helper function for the SORT() command.
+static bool matchConstraints(ArrayRef<InputSection *> Sections,
+ ConstraintKind Kind) {
+ if (Kind == ConstraintKind::NoConstraint)
+ return true;
+ bool IsRW = llvm::any_of(
+ Sections, [](InputSection *Sec) { return Sec->Flags & SHF_WRITE; });
+ return (IsRW && Kind == ConstraintKind::ReadWrite) ||
+ (!IsRW && Kind == ConstraintKind::ReadOnly);
+static void sortSections(MutableArrayRef<InputSection *> Vec,
+ SortSectionPolicy K) {
+ if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None)
+ std::stable_sort(Vec.begin(), Vec.end(), getComparator(K));
+// Sort sections as instructed by SORT-family commands and --sort-section
+// option. Because SORT-family commands can be nested at most two depth
+// (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command
+// line option is respected even if a SORT command is given, the exact
+// behavior we have here is a bit complicated. Here are the rules.
+// 1. If two SORT commands are given, --sort-section is ignored.
+// 2. If one SORT command is given, and if it is not SORT_NONE,
+// --sort-section is handled as an inner SORT command.
+// 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
+// 4. If no SORT command is given, sort according to --sort-section.
+static void sortInputSections(MutableArrayRef<InputSection *> Vec,
+ const SectionPattern &Pat) {
+ if (Pat.SortOuter == SortSectionPolicy::None)
+ return;
+ if (Pat.SortInner == SortSectionPolicy::Default)
+ sortSections(Vec, Config->SortSection);
+ else
+ sortSections(Vec, Pat.SortInner);
+ sortSections(Vec, Pat.SortOuter);
+// Compute and remember which sections the InputSectionDescription matches.
+std::vector<InputSection *>
+LinkerScript::computeInputSections(const InputSectionDescription *Cmd) {
+ std::vector<InputSection *> Ret;
+ // Collects all sections that satisfy constraints of Cmd.
+ for (const SectionPattern &Pat : Cmd->SectionPatterns) {
+ size_t SizeBefore = Ret.size();
+ for (InputSectionBase *Sec : InputSections) {
+ if (!Sec->Live || Sec->Assigned)
+ continue;
+ // For -emit-relocs we have to ignore entries like
+ // .rela.dyn : { *(.rela.data) }
+ // which are common because they are in the default bfd script.
+ // We do not ignore SHT_REL[A] linker-synthesized sections here because
+ // want to support scripts that do custom layout for them.
+ if (auto *IS = dyn_cast<InputSection>(Sec))
+ if (IS->getRelocatedSection())
+ continue;
+ std::string Filename = getFilename(Sec->File);
+ if (!Cmd->FilePat.match(Filename) ||
+ Pat.ExcludedFilePat.match(Filename) ||
+ !Pat.SectionPat.match(Sec->Name))
+ continue;
+ // It is safe to assume that Sec is an InputSection
+ // because mergeable or EH input sections have already been
+ // handled and eliminated.
+ Ret.push_back(cast<InputSection>(Sec));
+ Sec->Assigned = true;
+ }
+ sortInputSections(MutableArrayRef<InputSection *>(Ret).slice(SizeBefore),
+ Pat);
+ }
+ return Ret;
+void LinkerScript::discard(ArrayRef<InputSection *> V) {
+ for (InputSection *S : V) {
+ if (S == In.ShStrTab || S == In.RelaDyn || S == In.RelrDyn)
+ error("discarding " + S->Name + " section is not allowed");
+ // You can discard .hash and .gnu.hash sections by linker scripts. Since
+ // they are synthesized sections, we need to handle them differently than
+ // other regular sections.
+ if (S == In.GnuHashTab)
+ In.GnuHashTab = nullptr;
+ if (S == In.HashTab)
+ In.HashTab = nullptr;
+ S->Assigned = false;
+ S->Live = false;
+ discard(S->DependentSections);
+ }
+std::vector<InputSection *>
+LinkerScript::createInputSectionList(OutputSection &OutCmd) {
+ std::vector<InputSection *> Ret;
+ for (BaseCommand *Base : OutCmd.SectionCommands) {
+ if (auto *Cmd = dyn_cast<InputSectionDescription>(Base)) {
+ Cmd->Sections = computeInputSections(Cmd);
+ Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end());
+ }
+ }
+ return Ret;
+void LinkerScript::processSectionCommands() {
+ // A symbol can be assigned before any section is mentioned in the linker
+ // script. In an DSO, the symbol values are addresses, so the only important
+ // section values are:
+ // * SHN_UNDEF
+ // * SHN_ABS
+ // * Any value meaning a regular section.
+ // To handle that, create a dummy aether section that fills the void before
+ // the linker scripts switches to another section. It has an index of one
+ // which will map to whatever the first actual section is.
+ Aether = make<OutputSection>("", 0, SHF_ALLOC);
+ Aether->SectionIndex = 1;
+ // Ctx captures the local AddressState and makes it accessible deliberately.
+ // This is needed as there are some cases where we cannot just
+ // thread the current state through to a lambda function created by the
+ // script parser.
+ auto Deleter = make_unique<AddressState>();
+ Ctx = Deleter.get();
+ Ctx->OutSec = Aether;
+ size_t I = 0;
+ // Add input sections to output sections.
+ for (BaseCommand *Base : SectionCommands) {
+ // Handle symbol assignments outside of any output section.
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
+ addSymbol(Cmd);
+ continue;
+ }
+ if (auto *Sec = dyn_cast<OutputSection>(Base)) {
+ std::vector<InputSection *> V = createInputSectionList(*Sec);
+ // The output section name `/DISCARD/' is special.
+ // Any input section assigned to it is discarded.
+ if (Sec->Name == "/DISCARD/") {
+ discard(V);
+ Sec->SectionCommands.clear();
+ continue;
+ }
+ // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive
+ // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input
+ // sections satisfy a given constraint. If not, a directive is handled
+ // as if it wasn't present from the beginning.
+ //
+ // Because we'll iterate over SectionCommands many more times, the easy
+ // way to "make it as if it wasn't present" is to make it empty.
+ if (!matchConstraints(V, Sec->Constraint)) {
+ for (InputSectionBase *S : V)
+ S->Assigned = false;
+ Sec->SectionCommands.clear();
+ continue;
+ }
+ // A directive may contain symbol definitions like this:
+ // ".foo : { ...; bar = .; }". Handle them.
+ for (BaseCommand *Base : Sec->SectionCommands)
+ if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base))
+ addSymbol(OutCmd);
+ // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
+ // is given, input sections are aligned to that value, whether the
+ // given value is larger or smaller than the original section alignment.
+ if (Sec->SubalignExpr) {
+ uint32_t Subalign = Sec->SubalignExpr().getValue();
+ for (InputSectionBase *S : V)
+ S->Alignment = Subalign;
+ }
+ // Add input sections to an output section.
+ for (InputSection *S : V)
+ Sec->addSection(S);
+ Sec->SectionIndex = I++;
+ if (Sec->Noload)
+ Sec->Type = SHT_NOBITS;
+ if (Sec->NonAlloc)
+ Sec->Flags &= ~(uint64_t)SHF_ALLOC;
+ }
+ }
+ Ctx = nullptr;
+static OutputSection *findByName(ArrayRef<BaseCommand *> Vec,
+ StringRef Name) {
+ for (BaseCommand *Base : Vec)
+ if (auto *Sec = dyn_cast<OutputSection>(Base))
+ if (Sec->Name == Name)
+ return Sec;
+ return nullptr;
+static OutputSection *createSection(InputSectionBase *IS,
+ StringRef OutsecName) {
+ OutputSection *Sec = Script->createOutputSection(OutsecName, "<internal>");
+ Sec->addSection(cast<InputSection>(IS));
+ return Sec;
+static OutputSection *addInputSec(StringMap<OutputSection *> &Map,
+ InputSectionBase *IS, StringRef OutsecName) {
+ // Sections with SHT_GROUP or SHF_GROUP attributes reach here only when the -r
+ // option is given. A section with SHT_GROUP defines a "section group", and
+ // its members have SHF_GROUP attribute. Usually these flags have already been
+ // stripped by InputFiles.cpp as section groups are processed and uniquified.
+ // However, for the -r option, we want to pass through all section groups
+ // as-is because adding/removing members or merging them with other groups
+ // change their semantics.
+ if (IS->Type == SHT_GROUP || (IS->Flags & SHF_GROUP))
+ return createSection(IS, OutsecName);
+ // Imagine .zed : { *(.foo) *(.bar) } script. Both foo and bar may have
+ // relocation sections .rela.foo and .rela.bar for example. Most tools do
+ // not allow multiple REL[A] sections for output section. Hence we
+ // should combine these relocation sections into single output.
+ // We skip synthetic sections because it can be .rela.dyn/.rela.plt or any
+ // other REL[A] sections created by linker itself.
+ if (!isa<SyntheticSection>(IS) &&
+ (IS->Type == SHT_REL || IS->Type == SHT_RELA)) {
+ auto *Sec = cast<InputSection>(IS);
+ OutputSection *Out = Sec->getRelocatedSection()->getOutputSection();
+ if (Out->RelocationSection) {
+ Out->RelocationSection->addSection(Sec);
+ return nullptr;
+ }
+ Out->RelocationSection = createSection(IS, OutsecName);
+ return Out->RelocationSection;
+ }
+ // When control reaches here, mergeable sections have already been merged into
+ // synthetic sections. For relocatable case we want to create one output
+ // section per syntetic section so that they have a valid sh_entsize.
+ if (Config->Relocatable && (IS->Flags & SHF_MERGE))
+ return createSection(IS, OutsecName);
+ // The ELF spec just says
+ // ----------------------------------------------------------------
+ // In the first phase, input sections that match in name, type and
+ // attribute flags should be concatenated into single sections.
+ // ----------------------------------------------------------------
+ //
+ // However, it is clear that at least some flags have to be ignored for
+ // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be
+ // ignored. We should not have two output .text sections just because one was
+ // in a group and another was not for example.
+ //
+ // It also seems that wording was a late addition and didn't get the
+ // necessary scrutiny.
+ //
+ // Merging sections with different flags is expected by some users. One
+ // reason is that if one file has
+ //
+ // int *const bar __attribute__((section(".foo"))) = (int *)0;
+ //
+ // gcc with -fPIC will produce a read only .foo section. But if another
+ // file has
+ //
+ // int zed;
+ // int *const bar __attribute__((section(".foo"))) = (int *)&zed;
+ //
+ // gcc with -fPIC will produce a read write section.
+ //
+ // Last but not least, when using linker script the merge rules are forced by
+ // the script. Unfortunately, linker scripts are name based. This means that
+ // expressions like *(.foo*) can refer to multiple input sections with
+ // different flags. We cannot put them in different output sections or we
+ // would produce wrong results for
+ //
+ // start = .; *(.foo.*) end = .; *(.bar)
+ //
+ // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to
+ // another. The problem is that there is no way to layout those output
+ // sections such that the .foo sections are the only thing between the start
+ // and end symbols.
+ //
+ // Given the above issues, we instead merge sections by name and error on
+ // incompatible types and flags.
+ OutputSection *&Sec = Map[OutsecName];
+ if (Sec) {
+ Sec->addSection(cast<InputSection>(IS));
+ return nullptr;
+ }
+ Sec = createSection(IS, OutsecName);
+ return Sec;
+// Add sections that didn't match any sections command.
+void LinkerScript::addOrphanSections() {
+ unsigned End = SectionCommands.size();
+ StringMap<OutputSection *> Map;
+ std::vector<OutputSection *> V;
+ auto Add = [&](InputSectionBase *S) {
+ if (!S->Live || S->Parent)
+ return;
+ StringRef Name = getOutputSectionName(S);
+ if (Config->OrphanHandling == OrphanHandlingPolicy::Error)
+ error(toString(S) + " is being placed in '" + Name + "'");
+ else if (Config->OrphanHandling == OrphanHandlingPolicy::Warn)
+ warn(toString(S) + " is being placed in '" + Name + "'");
+ if (OutputSection *Sec =
+ findByName(makeArrayRef(SectionCommands).slice(0, End), Name)) {
+ Sec->addSection(cast<InputSection>(S));
+ return;
+ }
+ if (OutputSection *OS = addInputSec(Map, S, Name))
+ V.push_back(OS);
+ assert(S->getOutputSection()->SectionIndex == UINT32_MAX);
+ };
+ // For futher --emit-reloc handling code we need target output section
+ // to be created before we create relocation output section, so we want
+ // to create target sections first. We do not want priority handling
+ // for synthetic sections because them are special.
+ for (InputSectionBase *IS : InputSections) {
+ if (auto *Sec = dyn_cast<InputSection>(IS))
+ if (InputSectionBase *Rel = Sec->getRelocatedSection())
+ if (auto *RelIS = dyn_cast_or_null<InputSectionBase>(Rel->Parent))
+ Add(RelIS);
+ Add(IS);
+ }
+ // If no SECTIONS command was given, we should insert sections commands
+ // before others, so that we can handle scripts which refers them,
+ // for example: "foo = ABSOLUTE(ADDR(.text)));".
+ // When SECTIONS command is present we just add all orphans to the end.
+ if (HasSectionsCommand)
+ SectionCommands.insert(SectionCommands.end(), V.begin(), V.end());
+ else
+ SectionCommands.insert(SectionCommands.begin(), V.begin(), V.end());
+uint64_t LinkerScript::advance(uint64_t Size, unsigned Alignment) {
+ bool IsTbss =
+ (Ctx->OutSec->Flags & SHF_TLS) && Ctx->OutSec->Type == SHT_NOBITS;
+ uint64_t Start = IsTbss ? Dot + Ctx->ThreadBssOffset : Dot;
+ Start = alignTo(Start, Alignment);
+ uint64_t End = Start + Size;
+ if (IsTbss)
+ Ctx->ThreadBssOffset = End - Dot;
+ else
+ Dot = End;
+ return End;
+void LinkerScript::output(InputSection *S) {
+ assert(Ctx->OutSec == S->getParent());
+ uint64_t Before = advance(0, 1);
+ uint64_t Pos = advance(S->getSize(), S->Alignment);
+ S->OutSecOff = Pos - S->getSize() - Ctx->OutSec->Addr;
+ // Update output section size after adding each section. This is so that
+ // SIZEOF works correctly in the case below:
+ // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
+ expandOutputSection(Pos - Before);
+void LinkerScript::switchTo(OutputSection *Sec) {
+ Ctx->OutSec = Sec;
+ uint64_t Before = advance(0, 1);
+ Ctx->OutSec->Addr = advance(0, Ctx->OutSec->Alignment);
+ expandMemoryRegions(Ctx->OutSec->Addr - Before);
+// This function searches for a memory region to place the given output
+// section in. If found, a pointer to the appropriate memory region is
+// returned. Otherwise, a nullptr is returned.
+MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *Sec) {
+ // If a memory region name was specified in the output section command,
+ // then try to find that region first.
+ if (!Sec->MemoryRegionName.empty()) {
+ if (MemoryRegion *M = MemoryRegions.lookup(Sec->MemoryRegionName))
+ return M;
+ error("memory region '" + Sec->MemoryRegionName + "' not declared");
+ return nullptr;
+ }
+ // If at least one memory region is defined, all sections must
+ // belong to some memory region. Otherwise, we don't need to do
+ // anything for memory regions.
+ if (MemoryRegions.empty())
+ return nullptr;
+ // See if a region can be found by matching section flags.
+ for (auto &Pair : MemoryRegions) {
+ MemoryRegion *M = Pair.second;
+ if ((M->Flags & Sec->Flags) && (M->NegFlags & Sec->Flags) == 0)
+ return M;
+ }
+ // Otherwise, no suitable region was found.
+ if (Sec->Flags & SHF_ALLOC)
+ error("no memory region specified for section '" + Sec->Name + "'");
+ return nullptr;
+static OutputSection *findFirstSection(PhdrEntry *Load) {
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->PtLoad == Load)
+ return Sec;
+ return nullptr;
+// This function assigns offsets to input sections and an output section
+// for a single sections command (e.g. ".text { *(.text); }").
+void LinkerScript::assignOffsets(OutputSection *Sec) {
+ if (!(Sec->Flags & SHF_ALLOC))
+ Dot = 0;
+ else if (Sec->AddrExpr)
+ setDot(Sec->AddrExpr, Sec->Location, false);
+ Ctx->MemRegion = Sec->MemRegion;
+ Ctx->LMARegion = Sec->LMARegion;
+ if (Ctx->MemRegion)
+ Dot = Ctx->MemRegion->CurPos;
+ switchTo(Sec);
+ if (Sec->LMAExpr)
+ Ctx->LMAOffset = Sec->LMAExpr().getValue() - Dot;
+ if (MemoryRegion *MR = Sec->LMARegion)
+ Ctx->LMAOffset = MR->CurPos - Dot;
+ // If neither AT nor AT> is specified for an allocatable section, the linker
+ // will set the LMA such that the difference between VMA and LMA for the
+ // section is the same as the preceding output section in the same region
+ // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
+ // This, however, should only be done by the first "non-header" section
+ // in the segment.
+ if (PhdrEntry *L = Ctx->OutSec->PtLoad)
+ if (Sec == findFirstSection(L))
+ L->LMAOffset = Ctx->LMAOffset;
+ // We can call this method multiple times during the creation of
+ // thunks and want to start over calculation each time.
+ Sec->Size = 0;
+ // We visited SectionsCommands from processSectionCommands to
+ // layout sections. Now, we visit SectionsCommands again to fix
+ // section offsets.
+ for (BaseCommand *Base : Sec->SectionCommands) {
+ // This handles the assignments to symbol or to the dot.
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
+ Cmd->Addr = Dot;
+ assignSymbol(Cmd, true);
+ Cmd->Size = Dot - Cmd->Addr;
+ continue;
+ }
+ // Handle BYTE(), SHORT(), LONG(), or QUAD().
+ if (auto *Cmd = dyn_cast<ByteCommand>(Base)) {
+ Cmd->Offset = Dot - Ctx->OutSec->Addr;
+ Dot += Cmd->Size;
+ expandOutputSection(Cmd->Size);
+ continue;
+ }
+ // Handle a single input section description command.
+ // It calculates and assigns the offsets for each section and also
+ // updates the output section size.
+ for (InputSection *Sec : cast<InputSectionDescription>(Base)->Sections)
+ output(Sec);
+ }
+static bool isDiscardable(OutputSection &Sec) {
+ // We do not remove empty sections that are explicitly
+ // assigned to any segment.
+ if (!Sec.Phdrs.empty())
+ return false;
+ // We do not want to remove sections that reference symbols in address and
+ // other expressions. We add script symbols as undefined, and want to ensure
+ // all of them are defined in the output, hence have to keep them.
+ if (Sec.ExpressionsUseSymbols)
+ return false;
+ for (BaseCommand *Base : Sec.SectionCommands) {
+ if (auto Cmd = dyn_cast<SymbolAssignment>(Base))
+ // Don't create empty output sections just for unreferenced PROVIDE
+ // symbols.
+ if (Cmd->Name != "." && !Cmd->Sym)
+ continue;
+ if (!isa<InputSectionDescription>(*Base))
+ return false;
+ }
+ return true;
+void LinkerScript::adjustSectionsBeforeSorting() {
+ // If the output section contains only symbol assignments, create a
+ // corresponding output section. The issue is what to do with linker script
+ // like ".foo : { symbol = 42; }". One option would be to convert it to
+ // "symbol = 42;". That is, move the symbol out of the empty section
+ // description. That seems to be what bfd does for this simple case. The
+ // problem is that this is not completely general. bfd will give up and
+ // create a dummy section too if there is a ". = . + 1" inside the section
+ // for example.
+ // Given that we want to create the section, we have to worry what impact
+ // it will have on the link. For example, if we just create a section with
+ // 0 for flags, it would change which PT_LOADs are created.
+ // We could remember that particular section is dummy and ignore it in
+ // other parts of the linker, but unfortunately there are quite a few places
+ // that would need to change:
+ // * The program header creation.
+ // * The orphan section placement.
+ // * The address assignment.
+ // The other option is to pick flags that minimize the impact the section
+ // will have on the rest of the linker. That is why we copy the flags from
+ // the previous sections. Only a few flags are needed to keep the impact low.
+ uint64_t Flags = SHF_ALLOC;
+ for (BaseCommand *&Cmd : SectionCommands) {
+ auto *Sec = dyn_cast<OutputSection>(Cmd);
+ if (!Sec)
+ continue;
+ // Handle align (e.g. ".foo : ALIGN(16) { ... }").
+ if (Sec->AlignExpr)
+ Sec->Alignment =
+ std::max<uint32_t>(Sec->Alignment, Sec->AlignExpr().getValue());
+ // A live output section means that some input section was added to it. It
+ // might have been removed (if it was empty synthetic section), but we at
+ // least know the flags.
+ if (Sec->Live)
+ Flags = Sec->Flags;
+ // We do not want to keep any special flags for output section
+ // in case it is empty.
+ bool IsEmpty = getInputSections(Sec).empty();
+ if (IsEmpty)
+ Sec->Flags = Flags & (SHF_ALLOC | SHF_WRITE | SHF_EXECINSTR);
+ if (IsEmpty && isDiscardable(*Sec)) {
+ Sec->Live = false;
+ Cmd = nullptr;
+ }
+ }
+ // It is common practice to use very generic linker scripts. So for any
+ // given run some of the output sections in the script will be empty.
+ // We could create corresponding empty output sections, but that would
+ // clutter the output.
+ // We instead remove trivially empty sections. The bfd linker seems even
+ // more aggressive at removing them.
+ llvm::erase_if(SectionCommands, [&](BaseCommand *Base) { return !Base; });
+void LinkerScript::adjustSectionsAfterSorting() {
+ // Try and find an appropriate memory region to assign offsets in.
+ for (BaseCommand *Base : SectionCommands) {
+ if (auto *Sec = dyn_cast<OutputSection>(Base)) {
+ if (!Sec->LMARegionName.empty()) {
+ if (MemoryRegion *M = MemoryRegions.lookup(Sec->LMARegionName))
+ Sec->LMARegion = M;
+ else
+ error("memory region '" + Sec->LMARegionName + "' not declared");
+ }
+ Sec->MemRegion = findMemoryRegion(Sec);
+ }
+ }
+ // If output section command doesn't specify any segments,
+ // and we haven't previously assigned any section to segment,
+ // then we simply assign section to the very first load segment.
+ // Below is an example of such linker script:
+ // PHDRS { seg PT_LOAD; }
+ // SECTIONS { .aaa : { *(.aaa) } }
+ std::vector<StringRef> DefPhdrs;
+ auto FirstPtLoad = llvm::find_if(PhdrsCommands, [](const PhdrsCommand &Cmd) {
+ return Cmd.Type == PT_LOAD;
+ });
+ if (FirstPtLoad != PhdrsCommands.end())
+ DefPhdrs.push_back(FirstPtLoad->Name);
+ // Walk the commands and propagate the program headers to commands that don't
+ // explicitly specify them.
+ for (BaseCommand *Base : SectionCommands) {
+ auto *Sec = dyn_cast<OutputSection>(Base);
+ if (!Sec)
+ continue;
+ if (Sec->Phdrs.empty()) {
+ // To match the bfd linker script behaviour, only propagate program
+ // headers to sections that are allocated.
+ if (Sec->Flags & SHF_ALLOC)
+ Sec->Phdrs = DefPhdrs;
+ } else {
+ DefPhdrs = Sec->Phdrs;
+ }
+ }
+static uint64_t computeBase(uint64_t Min, bool AllocateHeaders) {
+ // If there is no SECTIONS or if the linkerscript is explicit about program
+ // headers, do our best to allocate them.
+ if (!Script->HasSectionsCommand || AllocateHeaders)
+ return 0;
+ // Otherwise only allocate program headers if that would not add a page.
+ return alignDown(Min, Config->MaxPageSize);
+// Try to find an address for the file and program headers output sections,
+// which were unconditionally added to the first PT_LOAD segment earlier.
+// When using the default layout, we check if the headers fit below the first
+// allocated section. When using a linker script, we also check if the headers
+// are covered by the output section. This allows omitting the headers by not
+// leaving enough space for them in the linker script; this pattern is common
+// in embedded systems.
+// If there isn't enough space for these sections, we'll remove them from the
+// PT_LOAD segment, and we'll also remove the PT_PHDR segment.
+void LinkerScript::allocateHeaders(std::vector<PhdrEntry *> &Phdrs) {
+ uint64_t Min = std::numeric_limits<uint64_t>::max();
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Flags & SHF_ALLOC)
+ Min = std::min<uint64_t>(Min, Sec->Addr);
+ auto It = llvm::find_if(
+ Phdrs, [](const PhdrEntry *E) { return E->p_type == PT_LOAD; });
+ if (It == Phdrs.end())
+ return;
+ PhdrEntry *FirstPTLoad = *It;
+ bool HasExplicitHeaders =
+ llvm::any_of(PhdrsCommands, [](const PhdrsCommand &Cmd) {
+ return Cmd.HasPhdrs || Cmd.HasFilehdr;
+ });
+ uint64_t HeaderSize = getHeaderSize();
+ if (HeaderSize <= Min - computeBase(Min, HasExplicitHeaders)) {
+ Min = alignDown(Min - HeaderSize, Config->MaxPageSize);
+ Out::ElfHeader->Addr = Min;
+ Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size;
+ return;
+ }
+ // Error if we were explicitly asked to allocate headers.
+ if (HasExplicitHeaders)
+ error("could not allocate headers");
+ Out::ElfHeader->PtLoad = nullptr;
+ Out::ProgramHeaders->PtLoad = nullptr;
+ FirstPTLoad->FirstSec = findFirstSection(FirstPTLoad);
+ llvm::erase_if(Phdrs,
+ [](const PhdrEntry *E) { return E->p_type == PT_PHDR; });
+LinkerScript::AddressState::AddressState() {
+ for (auto &MRI : Script->MemoryRegions) {
+ MemoryRegion *MR = MRI.second;
+ MR->CurPos = MR->Origin;
+ }
+static uint64_t getInitialDot() {
+ // By default linker scripts use an initial value of 0 for '.',
+ // but prefer -image-base if set.
+ if (Script->HasSectionsCommand)
+ return Config->ImageBase ? *Config->ImageBase : 0;
+ uint64_t StartAddr = UINT64_MAX;
+ // The Sections with -T<section> have been sorted in order of ascending
+ // address. We must lower StartAddr if the lowest -T<section address> as
+ // calls to setDot() must be monotonically increasing.
+ for (auto &KV : Config->SectionStartMap)
+ StartAddr = std::min(StartAddr, KV.second);
+ return std::min(StartAddr, Target->getImageBase() + elf::getHeaderSize());
+// Here we assign addresses as instructed by linker script SECTIONS
+// sub-commands. Doing that allows us to use final VA values, so here
+// we also handle rest commands like symbol assignments and ASSERTs.
+void LinkerScript::assignAddresses() {
+ Dot = getInitialDot();
+ auto Deleter = make_unique<AddressState>();
+ Ctx = Deleter.get();
+ ErrorOnMissingSection = true;
+ switchTo(Aether);
+ for (BaseCommand *Base : SectionCommands) {
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
+ Cmd->Addr = Dot;
+ assignSymbol(Cmd, false);
+ Cmd->Size = Dot - Cmd->Addr;
+ continue;
+ }
+ assignOffsets(cast<OutputSection>(Base));
+ }
+ Ctx = nullptr;
+// Creates program headers as instructed by PHDRS linker script command.
+std::vector<PhdrEntry *> LinkerScript::createPhdrs() {
+ std::vector<PhdrEntry *> Ret;
+ // Process PHDRS and FILEHDR keywords because they are not
+ // real output sections and cannot be added in the following loop.
+ for (const PhdrsCommand &Cmd : PhdrsCommands) {
+ PhdrEntry *Phdr = make<PhdrEntry>(Cmd.Type, Cmd.Flags ? *Cmd.Flags : PF_R);
+ if (Cmd.HasFilehdr)
+ Phdr->add(Out::ElfHeader);
+ if (Cmd.HasPhdrs)
+ Phdr->add(Out::ProgramHeaders);
+ if (Cmd.LMAExpr) {
+ Phdr->p_paddr = Cmd.LMAExpr().getValue();
+ Phdr->HasLMA = true;
+ }
+ Ret.push_back(Phdr);
+ }
+ // Add output sections to program headers.
+ for (OutputSection *Sec : OutputSections) {
+ // Assign headers specified by linker script
+ for (size_t Id : getPhdrIndices(Sec)) {
+ Ret[Id]->add(Sec);
+ if (!PhdrsCommands[Id].Flags.hasValue())
+ Ret[Id]->p_flags |= Sec->getPhdrFlags();
+ }
+ }
+ return Ret;
+// Returns true if we should emit an .interp section.
+// We usually do. But if PHDRS commands are given, and
+// no PT_INTERP is there, there's no place to emit an
+// .interp, so we don't do that in that case.
+bool LinkerScript::needsInterpSection() {
+ if (PhdrsCommands.empty())
+ return true;
+ for (PhdrsCommand &Cmd : PhdrsCommands)
+ if (Cmd.Type == PT_INTERP)
+ return true;
+ return false;
+ExprValue LinkerScript::getSymbolValue(StringRef Name, const Twine &Loc) {
+ if (Name == ".") {
+ if (Ctx)
+ return {Ctx->OutSec, false, Dot - Ctx->OutSec->Addr, Loc};
+ error(Loc + ": unable to get location counter value");
+ return 0;
+ }
+ if (Symbol *Sym = Symtab->find(Name)) {
+ if (auto *DS = dyn_cast<Defined>(Sym))
+ return {DS->Section, false, DS->Value, Loc};
+ if (isa<SharedSymbol>(Sym))
+ if (!ErrorOnMissingSection)
+ return {nullptr, false, 0, Loc};
+ }
+ error(Loc + ": symbol not found: " + Name);
+ return 0;
+// Returns the index of the segment named Name.
+static Optional<size_t> getPhdrIndex(ArrayRef<PhdrsCommand> Vec,
+ StringRef Name) {
+ for (size_t I = 0; I < Vec.size(); ++I)
+ if (Vec[I].Name == Name)
+ return I;
+ return None;
+// Returns indices of ELF headers containing specific section. Each index is a
+// zero based number of ELF header listed within PHDRS {} script block.
+std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *Cmd) {
+ std::vector<size_t> Ret;
+ for (StringRef S : Cmd->Phdrs) {
+ if (Optional<size_t> Idx = getPhdrIndex(PhdrsCommands, S))
+ Ret.push_back(*Idx);
+ else if (S != "NONE")
+ error(Cmd->Location + ": section header '" + S +
+ "' is not listed in PHDRS");
+ }
+ return Ret;
diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.h b/contrib/llvm/tools/lld/ELF/LinkerScript.h
new file mode 100644
index 000000000000..51161981efc8
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/LinkerScript.h
@@ -0,0 +1,314 @@
+//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "Writer.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <vector>
+namespace lld {
+namespace elf {
+class Defined;
+class InputSection;
+class InputSectionBase;
+class InputSectionBase;
+class OutputSection;
+class SectionBase;
+class Symbol;
+class ThunkSection;
+// This represents an r-value in the linker script.
+struct ExprValue {
+ ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val,
+ const Twine &Loc)
+ : Sec(Sec), ForceAbsolute(ForceAbsolute), Val(Val), Loc(Loc.str()) {}
+ ExprValue(uint64_t Val) : ExprValue(nullptr, false, Val, "") {}
+ bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; }
+ uint64_t getValue() const;
+ uint64_t getSecAddr() const;
+ uint64_t getSectionOffset() const;
+ // If a value is relative to a section, it has a non-null Sec.
+ SectionBase *Sec;
+ // True if this expression is enclosed in ABSOLUTE().
+ // This flag affects the return value of getValue().
+ bool ForceAbsolute;
+ uint64_t Val;
+ uint64_t Alignment = 1;
+ // Original source location. Used for error messages.
+ std::string Loc;
+// This represents an expression in the linker script.
+// ScriptParser::readExpr reads an expression and returns an Expr.
+// Later, we evaluate the expression by calling the function.
+typedef std::function<ExprValue()> Expr;
+// This enum is used to implement linker script SECTIONS command.
+// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
+enum SectionsCommandKind {
+ AssignmentKind, // . = expr or <sym> = expr
+ OutputSectionKind,
+ InputSectionKind,
+ ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
+struct BaseCommand {
+ BaseCommand(int K) : Kind(K) {}
+ int Kind;
+// This represents ". = <expr>" or "<symbol> = <expr>".
+struct SymbolAssignment : BaseCommand {
+ SymbolAssignment(StringRef Name, Expr E, std::string Loc)
+ : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
+ static bool classof(const BaseCommand *C) {
+ return C->Kind == AssignmentKind;
+ }
+ // The LHS of an expression. Name is either a symbol name or ".".
+ StringRef Name;
+ Defined *Sym = nullptr;
+ // The RHS of an expression.
+ Expr Expression;
+ // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
+ bool Provide = false;
+ bool Hidden = false;
+ // Holds file name and line number for error reporting.
+ std::string Location;
+ // A string representation of this command. We use this for -Map.
+ std::string CommandString;
+ // Address of this assignment command.
+ unsigned Addr;
+ // Size of this assignment command. This is usually 0, but if
+ // you move '.' this may be greater than 0.
+ unsigned Size;
+// Linker scripts allow additional constraints to be put on ouput sections.
+// If an output section is marked as ONLY_IF_RO, the section is created
+// only if its input sections are read-only. Likewise, an output section
+// with ONLY_IF_RW is created if all input sections are RW.
+enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
+// This struct is used to represent the location and size of regions of
+// target memory. Instances of the struct are created by parsing the
+// MEMORY command.
+struct MemoryRegion {
+ MemoryRegion(StringRef Name, uint64_t Origin, uint64_t Length, uint32_t Flags,
+ uint32_t NegFlags)
+ : Name(Name), Origin(Origin), Length(Length), Flags(Flags),
+ NegFlags(NegFlags) {}
+ std::string Name;
+ uint64_t Origin;
+ uint64_t Length;
+ uint32_t Flags;
+ uint32_t NegFlags;
+ uint64_t CurPos = 0;
+// This struct represents one section match pattern in SECTIONS() command.
+// It can optionally have negative match pattern for EXCLUDED_FILE command.
+// Also it may be surrounded with SORT() command, so contains sorting rules.
+struct SectionPattern {
+ SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2)
+ : ExcludedFilePat(Pat1), SectionPat(Pat2),
+ SortOuter(SortSectionPolicy::Default),
+ SortInner(SortSectionPolicy::Default) {}
+ StringMatcher ExcludedFilePat;
+ StringMatcher SectionPat;
+ SortSectionPolicy SortOuter;
+ SortSectionPolicy SortInner;
+struct InputSectionDescription : BaseCommand {
+ InputSectionDescription(StringRef FilePattern)
+ : BaseCommand(InputSectionKind), FilePat(FilePattern) {}
+ static bool classof(const BaseCommand *C) {
+ return C->Kind == InputSectionKind;
+ }
+ StringMatcher FilePat;
+ // Input sections that matches at least one of SectionPatterns
+ // will be associated with this InputSectionDescription.
+ std::vector<SectionPattern> SectionPatterns;
+ std::vector<InputSection *> Sections;
+ // Temporary record of synthetic ThunkSection instances and the pass that
+ // they were created in. This is used to insert newly created ThunkSections
+ // into Sections at the end of a createThunks() pass.
+ std::vector<std::pair<ThunkSection *, uint32_t>> ThunkSections;
+// Represents BYTE(), SHORT(), LONG(), or QUAD().
+struct ByteCommand : BaseCommand {
+ ByteCommand(Expr E, unsigned Size, std::string CommandString)
+ : BaseCommand(ByteKind), CommandString(CommandString), Expression(E),
+ Size(Size) {}
+ static bool classof(const BaseCommand *C) { return C->Kind == ByteKind; }
+ // Keeps string representing the command. Used for -Map" is perhaps better.
+ std::string CommandString;
+ Expr Expression;
+ // This is just an offset of this assignment command in the output section.
+ unsigned Offset;
+ // Size of this data command.
+ unsigned Size;
+struct PhdrsCommand {
+ StringRef Name;
+ unsigned Type = llvm::ELF::PT_NULL;
+ bool HasFilehdr = false;
+ bool HasPhdrs = false;
+ llvm::Optional<unsigned> Flags;
+ Expr LMAExpr = nullptr;
+class LinkerScript final {
+ // Temporary state used in processSectionCommands() and assignAddresses()
+ // that must be reinitialized for each call to the above functions, and must
+ // not be used outside of the scope of a call to the above functions.
+ struct AddressState {
+ AddressState();
+ uint64_t ThreadBssOffset = 0;
+ OutputSection *OutSec = nullptr;
+ MemoryRegion *MemRegion = nullptr;
+ MemoryRegion *LMARegion = nullptr;
+ uint64_t LMAOffset = 0;
+ };
+ llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection;
+ void addSymbol(SymbolAssignment *Cmd);
+ void assignSymbol(SymbolAssignment *Cmd, bool InSec);
+ void setDot(Expr E, const Twine &Loc, bool InSec);
+ void expandOutputSection(uint64_t Size);
+ void expandMemoryRegions(uint64_t Size);
+ std::vector<InputSection *>
+ computeInputSections(const InputSectionDescription *);
+ std::vector<InputSection *> createInputSectionList(OutputSection &Cmd);
+ std::vector<size_t> getPhdrIndices(OutputSection *Sec);
+ MemoryRegion *findMemoryRegion(OutputSection *Sec);
+ void switchTo(OutputSection *Sec);
+ uint64_t advance(uint64_t Size, unsigned Align);
+ void output(InputSection *Sec);
+ void assignOffsets(OutputSection *Sec);
+ // Ctx captures the local AddressState and makes it accessible
+ // deliberately. This is needed as there are some cases where we cannot just
+ // thread the current state through to a lambda function created by the
+ // script parser.
+ // This should remain a plain pointer as its lifetime is smaller than
+ // LinkerScript.
+ AddressState *Ctx = nullptr;
+ OutputSection *Aether;
+ uint64_t Dot;
+ OutputSection *createOutputSection(StringRef Name, StringRef Location);
+ OutputSection *getOrCreateOutputSection(StringRef Name);
+ bool hasPhdrsCommands() { return !PhdrsCommands.empty(); }
+ uint64_t getDot() { return Dot; }
+ void discard(ArrayRef<InputSection *> V);
+ ExprValue getSymbolValue(StringRef Name, const Twine &Loc);
+ void addOrphanSections();
+ void adjustSectionsBeforeSorting();
+ void adjustSectionsAfterSorting();
+ std::vector<PhdrEntry *> createPhdrs();
+ bool needsInterpSection();
+ bool shouldKeep(InputSectionBase *S);
+ void assignAddresses();
+ void allocateHeaders(std::vector<PhdrEntry *> &Phdrs);
+ void processSectionCommands();
+ void declareSymbols();
+ // Used to handle INSERT AFTER statements.
+ void processInsertCommands();
+ // SECTIONS command list.
+ std::vector<BaseCommand *> SectionCommands;
+ // PHDRS command list.
+ std::vector<PhdrsCommand> PhdrsCommands;
+ bool HasSectionsCommand = false;
+ bool ErrorOnMissingSection = false;
+ // List of section patterns specified with KEEP commands. They will
+ // be kept even if they are unused and --gc-sections is specified.
+ std::vector<InputSectionDescription *> KeptSections;
+ // A map from memory region name to a memory region descriptor.
+ llvm::MapVector<llvm::StringRef, MemoryRegion *> MemoryRegions;
+ // A list of symbols referenced by the script.
+ std::vector<llvm::StringRef> ReferencedSymbols;
+ // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need
+ // to be inserted into SECTIONS commands list.
+ llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertAfterCommands;
+ llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertBeforeCommands;
+extern LinkerScript *Script;
+} // end namespace elf
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/MapFile.cpp b/contrib/llvm/tools/lld/ELF/MapFile.cpp
new file mode 100644
index 000000000000..b0dc6203008d
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/MapFile.cpp
@@ -0,0 +1,263 @@
+//===- MapFile.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements the -Map option. It shows lists in order and
+// hierarchically the output sections, input sections, input files and
+// symbol:
+// Address Size Align Out In Symbol
+// 00201000 00000015 4 .text
+// 00201000 0000000e 4 test.o:(.text)
+// 0020100e 00000000 0 local
+// 00201005 00000000 0 f(int)
+#include "MapFile.h"
+#include "InputFiles.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/Threads.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace lld;
+using namespace lld::elf;
+typedef DenseMap<const SectionBase *, SmallVector<Defined *, 4>> SymbolMapTy;
+static const std::string Indent8 = " "; // 8 spaces
+static const std::string Indent16 = " "; // 16 spaces
+// Print out the first three columns of a line.
+static void writeHeader(raw_ostream &OS, uint64_t VMA, uint64_t LMA,
+ uint64_t Size, uint64_t Align) {
+ if (Config->Is64)
+ OS << format("%16llx %16llx %8llx %5lld ", VMA, LMA, Size, Align);
+ else
+ OS << format("%8llx %8llx %8llx %5lld ", VMA, LMA, Size, Align);
+// Returns a list of all symbols that we want to print out.
+static std::vector<Defined *> getSymbols() {
+ std::vector<Defined *> V;
+ for (InputFile *File : ObjectFiles)
+ for (Symbol *B : File->getSymbols())
+ if (auto *DR = dyn_cast<Defined>(B))
+ if (!DR->isSection() && DR->Section && DR->Section->Live &&
+ (DR->File == File || DR->NeedsPltAddr || DR->Section->Bss))
+ V.push_back(DR);
+ return V;
+// Returns a map from sections to their symbols.
+static SymbolMapTy getSectionSyms(ArrayRef<Defined *> Syms) {
+ SymbolMapTy Ret;
+ for (Defined *DR : Syms)
+ Ret[DR->Section].push_back(DR);
+ // Sort symbols by address. We want to print out symbols in the
+ // order in the output file rather than the order they appeared
+ // in the input files.
+ for (auto &It : Ret) {
+ SmallVectorImpl<Defined *> &V = It.second;
+ std::stable_sort(V.begin(), V.end(), [](Defined *A, Defined *B) {
+ return A->getVA() < B->getVA();
+ });
+ }
+ return Ret;
+// Construct a map from symbols to their stringified representations.
+// Demangling symbols (which is what toString() does) is slow, so
+// we do that in batch using parallel-for.
+static DenseMap<Symbol *, std::string>
+getSymbolStrings(ArrayRef<Defined *> Syms) {
+ std::vector<std::string> Str(Syms.size());
+ parallelForEachN(0, Syms.size(), [&](size_t I) {
+ raw_string_ostream OS(Str[I]);
+ OutputSection *OSec = Syms[I]->getOutputSection();
+ uint64_t VMA = Syms[I]->getVA();
+ uint64_t LMA = OSec ? OSec->getLMA() + VMA - OSec->getVA(0) : 0;
+ writeHeader(OS, VMA, LMA, Syms[I]->getSize(), 1);
+ OS << Indent16 << toString(*Syms[I]);
+ });
+ DenseMap<Symbol *, std::string> Ret;
+ for (size_t I = 0, E = Syms.size(); I < E; ++I)
+ Ret[Syms[I]] = std::move(Str[I]);
+ return Ret;
+// Print .eh_frame contents. Since the section consists of EhSectionPieces,
+// we need a specialized printer for that section.
+// .eh_frame tend to contain a lot of section pieces that are contiguous
+// both in input file and output file. Such pieces are squashed before
+// being displayed to make output compact.
+static void printEhFrame(raw_ostream &OS, OutputSection *OSec) {
+ std::vector<EhSectionPiece> Pieces;
+ auto Add = [&](const EhSectionPiece &P) {
+ // If P is adjacent to Last, squash the two.
+ if (!Pieces.empty()) {
+ EhSectionPiece &Last = Pieces.back();
+ if (Last.Sec == P.Sec && Last.InputOff + Last.Size == P.InputOff &&
+ Last.OutputOff + Last.Size == P.OutputOff) {
+ Last.Size += P.Size;
+ return;
+ }
+ }
+ Pieces.push_back(P);
+ };
+ // Gather section pieces.
+ for (const CieRecord *Rec : In.EhFrame->getCieRecords()) {
+ Add(*Rec->Cie);
+ for (const EhSectionPiece *Fde : Rec->Fdes)
+ Add(*Fde);
+ }
+ // Print out section pieces.
+ for (EhSectionPiece &P : Pieces) {
+ writeHeader(OS, OSec->Addr + P.OutputOff, OSec->getLMA() + P.OutputOff,
+ P.Size, 1);
+ OS << Indent8 << toString(P.Sec->File) << ":(" << P.Sec->Name << "+0x"
+ << Twine::utohexstr(P.InputOff) + ")\n";
+ }
+void elf::writeMapFile() {
+ if (Config->MapFile.empty())
+ return;
+ // Open a map file for writing.
+ std::error_code EC;
+ raw_fd_ostream OS(Config->MapFile, EC, sys::fs::F_None);
+ if (EC) {
+ error("cannot open " + Config->MapFile + ": " + EC.message());
+ return;
+ }
+ // Collect symbol info that we want to print out.
+ std::vector<Defined *> Syms = getSymbols();
+ SymbolMapTy SectionSyms = getSectionSyms(Syms);
+ DenseMap<Symbol *, std::string> SymStr = getSymbolStrings(Syms);
+ // Print out the header line.
+ int W = Config->Is64 ? 16 : 8;
+ OS << right_justify("VMA", W) << ' ' << right_justify("LMA", W)
+ << " Size Align Out In Symbol\n";
+ OutputSection* OSec = nullptr;
+ for (BaseCommand *Base : Script->SectionCommands) {
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
+ if (Cmd->Provide && !Cmd->Sym)
+ continue;
+ uint64_t LMA = OSec ? OSec->getLMA() + Cmd->Addr - OSec->getVA(0) : 0;
+ writeHeader(OS, Cmd->Addr, LMA, Cmd->Size, 1);
+ OS << Cmd->CommandString << '\n';
+ continue;
+ }
+ OSec = cast<OutputSection>(Base);
+ writeHeader(OS, OSec->Addr, OSec->getLMA(), OSec->Size, OSec->Alignment);
+ OS << OSec->Name << '\n';
+ // Dump symbols for each input section.
+ for (BaseCommand *Base : OSec->SectionCommands) {
+ if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) {
+ for (InputSection *IS : ISD->Sections) {
+ if (IS == In.EhFrame) {
+ printEhFrame(OS, OSec);
+ continue;
+ }
+ writeHeader(OS, IS->getVA(0), OSec->getLMA() + IS->getOffset(0),
+ IS->getSize(), IS->Alignment);
+ OS << Indent8 << toString(IS) << '\n';
+ for (Symbol *Sym : SectionSyms[IS])
+ OS << SymStr[Sym] << '\n';
+ }
+ continue;
+ }
+ if (auto *Cmd = dyn_cast<ByteCommand>(Base)) {
+ writeHeader(OS, OSec->Addr + Cmd->Offset, OSec->getLMA() + Cmd->Offset,
+ Cmd->Size, 1);
+ OS << Indent8 << Cmd->CommandString << '\n';
+ continue;
+ }
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
+ if (Cmd->Provide && !Cmd->Sym)
+ continue;
+ writeHeader(OS, Cmd->Addr, OSec->getLMA() + Cmd->Addr - OSec->getVA(0),
+ Cmd->Size, 1);
+ OS << Indent8 << Cmd->CommandString << '\n';
+ continue;
+ }
+ }
+ }
+static void print(StringRef A, StringRef B) {
+ outs() << left_justify(A, 49) << " " << B << "\n";
+// Output a cross reference table to stdout. This is for --cref.
+// For each global symbol, we print out a file that defines the symbol
+// followed by files that uses that symbol. Here is an example.
+// strlen /lib/x86_64-linux-gnu/libc.so.6
+// tools/lld/tools/lld/CMakeFiles/lld.dir/lld.cpp.o
+// lib/libLLVMSupport.a(PrettyStackTrace.cpp.o)
+// In this case, strlen is defined by libc.so.6 and used by other two
+// files.
+void elf::writeCrossReferenceTable() {
+ if (!Config->Cref)
+ return;
+ // Collect symbols and files.
+ MapVector<Symbol *, SetVector<InputFile *>> Map;
+ for (InputFile *File : ObjectFiles) {
+ for (Symbol *Sym : File->getSymbols()) {
+ if (isa<SharedSymbol>(Sym))
+ Map[Sym].insert(File);
+ if (auto *D = dyn_cast<Defined>(Sym))
+ if (!D->isLocal() && (!D->Section || D->Section->Live))
+ Map[D].insert(File);
+ }
+ }
+ // Print out a header.
+ outs() << "Cross Reference Table\n\n";
+ print("Symbol", "File");
+ // Print out a table.
+ for (auto KV : Map) {
+ Symbol *Sym = KV.first;
+ SetVector<InputFile *> &Files = KV.second;
+ print(toString(*Sym), toString(Sym->File));
+ for (InputFile *File : Files)
+ if (File != Sym->File)
+ print("", toString(File));
+ }
diff --git a/contrib/llvm/tools/lld/ELF/MapFile.h b/contrib/llvm/tools/lld/ELF/MapFile.h
new file mode 100644
index 000000000000..0282425888b7
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/MapFile.h
@@ -0,0 +1,20 @@
+//===- MapFile.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+namespace lld {
+namespace elf {
+void writeMapFile();
+void writeCrossReferenceTable();
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/MarkLive.cpp b/contrib/llvm/tools/lld/ELF/MarkLive.cpp
new file mode 100644
index 000000000000..8d0ec091c327
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/MarkLive.cpp
@@ -0,0 +1,324 @@
+//===- MarkLive.cpp -------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements --gc-sections, which is a feature to remove unused
+// sections from output. Unused sections are sections that are not reachable
+// from known GC-root symbols or sections. Naturally the feature is
+// implemented as a mark-sweep garbage collector.
+// Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off
+// by default. Starting with GC-root symbols or sections, markLive function
+// defined in this file visits all reachable sections to set their Live
+// bits. Writer will then ignore sections whose Live bits are off, so that
+// such sections are not included into output.
+#include "MarkLive.h"
+#include "InputSection.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Object/ELF.h"
+#include <functional>
+#include <vector>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::elf;
+template <class ELFT>
+static typename ELFT::uint getAddend(InputSectionBase &Sec,
+ const typename ELFT::Rel &Rel) {
+ return Target->getImplicitAddend(Sec.data().begin() + Rel.r_offset,
+ Rel.getType(Config->IsMips64EL));
+template <class ELFT>
+static typename ELFT::uint getAddend(InputSectionBase &Sec,
+ const typename ELFT::Rela &Rel) {
+ return Rel.r_addend;
+// There are normally few input sections whose names are valid C
+// identifiers, so we just store a std::vector instead of a multimap.
+static DenseMap<StringRef, std::vector<InputSectionBase *>> CNamedSections;
+template <class ELFT, class RelT>
+static void
+resolveReloc(InputSectionBase &Sec, RelT &Rel,
+ llvm::function_ref<void(InputSectionBase *, uint64_t)> Fn) {
+ Symbol &B = Sec.getFile<ELFT>()->getRelocTargetSym(Rel);
+ // If a symbol is referenced in a live section, it is used.
+ B.Used = true;
+ if (auto *SS = dyn_cast<SharedSymbol>(&B))
+ if (!SS->isWeak())
+ SS->getFile<ELFT>().IsNeeded = true;
+ if (auto *D = dyn_cast<Defined>(&B)) {
+ auto *RelSec = dyn_cast_or_null<InputSectionBase>(D->Section);
+ if (!RelSec)
+ return;
+ uint64_t Offset = D->Value;
+ if (D->isSection())
+ Offset += getAddend<ELFT>(Sec, Rel);
+ Fn(RelSec, Offset);
+ return;
+ }
+ if (!B.isDefined())
+ for (InputSectionBase *Sec : CNamedSections.lookup(B.getName()))
+ Fn(Sec, 0);
+// Calls Fn for each section that Sec refers to via relocations.
+template <class ELFT>
+static void
+forEachSuccessor(InputSection &Sec,
+ llvm::function_ref<void(InputSectionBase *, uint64_t)> Fn) {
+ if (Sec.AreRelocsRela) {
+ for (const typename ELFT::Rela &Rel : Sec.template relas<ELFT>())
+ resolveReloc<ELFT>(Sec, Rel, Fn);
+ } else {
+ for (const typename ELFT::Rel &Rel : Sec.template rels<ELFT>())
+ resolveReloc<ELFT>(Sec, Rel, Fn);
+ }
+ for (InputSectionBase *IS : Sec.DependentSections)
+ Fn(IS, 0);
+// The .eh_frame section is an unfortunate special case.
+// The section is divided in CIEs and FDEs and the relocations it can have are
+// * CIEs can refer to a personality function.
+// * FDEs can refer to a LSDA
+// * FDEs refer to the function they contain information about
+// The last kind of relocation cannot keep the referred section alive, or they
+// would keep everything alive in a common object file. In fact, each FDE is
+// alive if the section it refers to is alive.
+// To keep things simple, in here we just ignore the last relocation kind. The
+// other two keep the referred section alive.
+// A possible improvement would be to fully process .eh_frame in the middle of
+// the gc pass. With that we would be able to also gc some sections holding
+// LSDAs and personality functions if we found that they were unused.
+template <class ELFT, class RelTy>
+static void
+scanEhFrameSection(EhInputSection &EH, ArrayRef<RelTy> Rels,
+ llvm::function_ref<void(InputSectionBase *, uint64_t)> Fn) {
+ const endianness E = ELFT::TargetEndianness;
+ for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) {
+ EhSectionPiece &Piece = EH.Pieces[I];
+ unsigned FirstRelI = Piece.FirstRelocation;
+ if (FirstRelI == (unsigned)-1)
+ continue;
+ if (read32<E>(Piece.data().data() + 4) == 0) {
+ // This is a CIE, we only need to worry about the first relocation. It is
+ // known to point to the personality function.
+ resolveReloc<ELFT>(EH, Rels[FirstRelI], Fn);
+ continue;
+ }
+ // This is a FDE. The relocations point to the described function or to
+ // a LSDA. We only need to keep the LSDA alive, so ignore anything that
+ // points to executable sections.
+ typename ELFT::uint PieceEnd = Piece.InputOff + Piece.Size;
+ for (unsigned I2 = FirstRelI, N2 = Rels.size(); I2 < N2; ++I2) {
+ const RelTy &Rel = Rels[I2];
+ if (Rel.r_offset >= PieceEnd)
+ break;
+ resolveReloc<ELFT>(EH, Rels[I2],
+ [&](InputSectionBase *Sec, uint64_t Offset) {
+ if (Sec && Sec != &InputSection::Discarded &&
+ !(Sec->Flags & SHF_EXECINSTR))
+ Fn(Sec, 0);
+ });
+ }
+ }
+template <class ELFT>
+static void
+scanEhFrameSection(EhInputSection &EH,
+ llvm::function_ref<void(InputSectionBase *, uint64_t)> Fn) {
+ if (!EH.NumRelocations)
+ return;
+ if (EH.AreRelocsRela)
+ scanEhFrameSection<ELFT>(EH, EH.template relas<ELFT>(), Fn);
+ else
+ scanEhFrameSection<ELFT>(EH, EH.template rels<ELFT>(), Fn);
+// Some sections are used directly by the loader, so they should never be
+// garbage-collected. This function returns true if a given section is such
+// section.
+template <class ELFT> static bool isReserved(InputSectionBase *Sec) {
+ switch (Sec->Type) {
+ case SHT_NOTE:
+ return true;
+ default:
+ StringRef S = Sec->Name;
+ return S.startswith(".ctors") || S.startswith(".dtors") ||
+ S.startswith(".init") || S.startswith(".fini") ||
+ S.startswith(".jcr");
+ }
+// This is the main function of the garbage collector.
+// Starting from GC-root sections, this function visits all reachable
+// sections to set their "Live" bits.
+template <class ELFT> static void doGcSections() {
+ SmallVector<InputSection *, 256> Q;
+ CNamedSections.clear();
+ auto Enqueue = [&](InputSectionBase *Sec, uint64_t Offset) {
+ // Skip over discarded sections. This in theory shouldn't happen, because
+ // the ELF spec doesn't allow a relocation to point to a deduplicated
+ // COMDAT section directly. Unfortunately this happens in practice (e.g.
+ // .eh_frame) so we need to add a check.
+ if (Sec == &InputSection::Discarded)
+ return;
+ // Usually, a whole section is marked as live or dead, but in mergeable
+ // (splittable) sections, each piece of data has independent liveness bit.
+ // So we explicitly tell it which offset is in use.
+ if (auto *MS = dyn_cast<MergeInputSection>(Sec))
+ MS->getSectionPiece(Offset)->Live = true;
+ if (Sec->Live)
+ return;
+ Sec->Live = true;
+ // Add input section to the queue.
+ if (InputSection *S = dyn_cast<InputSection>(Sec))
+ Q.push_back(S);
+ };
+ auto MarkSymbol = [&](Symbol *Sym) {
+ if (auto *D = dyn_cast_or_null<Defined>(Sym))
+ if (auto *IS = dyn_cast_or_null<InputSectionBase>(D->Section))
+ Enqueue(IS, D->Value);
+ };
+ // Add GC root symbols.
+ MarkSymbol(Symtab->find(Config->Entry));
+ MarkSymbol(Symtab->find(Config->Init));
+ MarkSymbol(Symtab->find(Config->Fini));
+ for (StringRef S : Config->Undefined)
+ MarkSymbol(Symtab->find(S));
+ for (StringRef S : Script->ReferencedSymbols)
+ MarkSymbol(Symtab->find(S));
+ // Preserve externally-visible symbols if the symbols defined by this
+ // file can interrupt other ELF file's symbols at runtime.
+ for (Symbol *S : Symtab->getSymbols())
+ if (S->includeInDynsym())
+ MarkSymbol(S);
+ // Preserve special sections and those which are specified in linker
+ // script KEEP command.
+ for (InputSectionBase *Sec : InputSections) {
+ // Mark .eh_frame sections as live because there are usually no relocations
+ // that point to .eh_frames. Otherwise, the garbage collector would drop
+ // all of them. We also want to preserve personality routines and LSDA
+ // referenced by .eh_frame sections, so we scan them for that here.
+ if (auto *EH = dyn_cast<EhInputSection>(Sec)) {
+ EH->Live = true;
+ scanEhFrameSection<ELFT>(*EH, Enqueue);
+ }
+ if (Sec->Flags & SHF_LINK_ORDER)
+ continue;
+ if (isReserved<ELFT>(Sec) || Script->shouldKeep(Sec)) {
+ Enqueue(Sec, 0);
+ } else if (isValidCIdentifier(Sec->Name)) {
+ CNamedSections[Saver.save("__start_" + Sec->Name)].push_back(Sec);
+ CNamedSections[Saver.save("__stop_" + Sec->Name)].push_back(Sec);
+ }
+ }
+ // Mark all reachable sections.
+ while (!Q.empty())
+ forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue);
+// Before calling this function, Live bits are off for all
+// input sections. This function make some or all of them on
+// so that they are emitted to the output file.
+template <class ELFT> void elf::markLive() {
+ if (!Config->GcSections) {
+ // If -gc-sections is missing, no sections are removed.
+ for (InputSectionBase *Sec : InputSections)
+ Sec->Live = true;
+ // If a DSO defines a symbol referenced in a regular object, it is needed.
+ for (Symbol *Sym : Symtab->getSymbols())
+ if (auto *S = dyn_cast<SharedSymbol>(Sym))
+ if (S->IsUsedInRegularObj && !S->isWeak())
+ S->getFile<ELFT>().IsNeeded = true;
+ return;
+ }
+ // The -gc-sections option works only for SHF_ALLOC sections
+ // (sections that are memory-mapped at runtime). So we can
+ // unconditionally make non-SHF_ALLOC sections alive except
+ // SHF_LINK_ORDER and SHT_REL/SHT_RELA sections.
+ //
+ // Usually, SHF_ALLOC sections are not removed even if they are
+ // unreachable through relocations because reachability is not
+ // a good signal whether they are garbage or not (e.g. there is
+ // usually no section referring to a .comment section, but we
+ // want to keep it.).
+ //
+ // Note on SHF_LINK_ORDER: Such sections contain metadata and they
+ // have a reverse dependency on the InputSection they are linked with.
+ // We are able to garbage collect them.
+ //
+ // Note on SHF_REL{,A}: Such sections reach here only when -r
+ // or -emit-reloc were given. And they are subject of garbage
+ // collection because, if we remove a text section, we also
+ // remove its relocation section.
+ for (InputSectionBase *Sec : InputSections) {
+ bool IsAlloc = (Sec->Flags & SHF_ALLOC);
+ bool IsLinkOrder = (Sec->Flags & SHF_LINK_ORDER);
+ bool IsRel = (Sec->Type == SHT_REL || Sec->Type == SHT_RELA);
+ if (!IsAlloc && !IsLinkOrder && !IsRel)
+ Sec->Live = true;
+ }
+ // Follow the graph to mark all live sections.
+ doGcSections<ELFT>();
+ // Report garbage-collected sections.
+ if (Config->PrintGcSections)
+ for (InputSectionBase *Sec : InputSections)
+ if (!Sec->Live)
+ message("removing unused section " + toString(Sec));
+template void elf::markLive<ELF32LE>();
+template void elf::markLive<ELF32BE>();
+template void elf::markLive<ELF64LE>();
+template void elf::markLive<ELF64BE>();
diff --git a/contrib/llvm/tools/lld/ELF/MarkLive.h b/contrib/llvm/tools/lld/ELF/MarkLive.h
new file mode 100644
index 000000000000..c9b99add34de
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/MarkLive.h
@@ -0,0 +1,21 @@
+//===- MarkLive.h -----------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+namespace lld {
+namespace elf {
+template <class ELFT> void markLive();
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Options.td b/contrib/llvm/tools/lld/ELF/Options.td
new file mode 100644
index 000000000000..bc203193661b
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Options.td
@@ -0,0 +1,521 @@
+include "llvm/Option/OptParser.td"
+// For options whose names are multiple letters, either one dash or
+// two can precede the option name except those that start with 'o'.
+class F<string name>: Flag<["--", "-"], name>;
+class J<string name>: Joined<["--", "-"], name>;
+multiclass Eq<string name, string help> {
+ def NAME: Separate<["--", "-"], name>;
+ def NAME # _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>,
+ HelpText<help>;
+multiclass B<string name, string help1, string help2> {
+ def NAME: Flag<["--", "-"], name>, HelpText<help1>;
+ def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText<help2>;
+defm auxiliary: Eq<"auxiliary", "Set DT_AUXILIARY field to the specified name">;
+def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">;
+def Bsymbolic_functions: F<"Bsymbolic-functions">,
+ HelpText<"Bind defined function symbols locally">;
+def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">;
+def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
+def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">;
+def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">,
+ MetaVarName<"[fast,md5,sha,uuid,0x<hexstring>]">;
+defm check_sections: B<"check-sections",
+ "Check section addresses for overlaps (default)",
+ "Do not check section addresses for overlaps">;
+defm compress_debug_sections:
+ Eq<"compress-debug-sections", "Compress DWARF debug sections">,
+ MetaVarName<"[none,zlib]">;
+defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
+defm split_stack_adjust_size
+ : Eq<"split-stack-adjust-size",
+ "Specify adjustment to stack size when a split-stack function calls a "
+ "non-split-stack function">,
+ MetaVarName<"<value>">;
+defm library_path:
+ Eq<"library-path", "Add a directory to the library search path">, MetaVarName<"<dir>">;
+def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">;
+defm Tbss: Eq<"Tbss", "Same as --section-start with .bss as the sectionname">;
+defm Tdata: Eq<"Tdata", "Same as --section-start with .data as the sectionname">;
+defm Ttext: Eq<"Ttext", "Same as --section-start with .text as the sectionname">;
+defm allow_multiple_definition: B<"allow-multiple-definition",
+ "Allow multiple definitions",
+ "Do not allow multiple definitions (default)">;
+defm apply_dynamic_relocs: B<"apply-dynamic-relocs",
+ "Apply link-time values for dynamic relocations",
+ "Do not apply link-time values for dynamic relocations (default)">;
+defm as_needed: B<"as-needed",
+ "Only set DT_NEEDED for shared libraries if used",
+ "Always set DT_NEEDED for shared libraries (default)">;
+defm call_graph_ordering_file:
+ Eq<"call-graph-ordering-file", "Layout sections to optimize the given callgraph">;
+defm call_graph_profile_sort: B<"call-graph-profile-sort",
+ "Reorder sections with call graph profile (default)",
+ "Do not reorder sections with call graph profile">;
+// -chroot doesn't have a help text because it is an internal option.
+def chroot: Separate<["--", "-"], "chroot">;
+def color_diagnostics: F<"color-diagnostics">,
+ HelpText<"Alias for --color-diagnostics=always">;
+def color_diagnostics_eq: J<"color-diagnostics=">,
+ HelpText<"Use colors in diagnostics">,
+ MetaVarName<"[auto,always,never]">;
+defm cref: B<"cref",
+ "Output cross reference table",
+ "Do not output cross reference table">;
+defm define_common: B<"define-common",
+ "Assign space to common symbols",
+ "Do not assign space to common symbols">;
+defm demangle: B<"demangle",
+ "Demangle symbol names (default)",
+ "Do not demangle symbol names">;
+def disable_new_dtags: F<"disable-new-dtags">,
+ HelpText<"Disable new dynamic tags">;
+def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">;
+def discard_locals: F<"discard-locals">,
+ HelpText<"Delete temporary local symbols">;
+def discard_none: F<"discard-none">,
+ HelpText<"Keep all symbols in the symbol table">;
+defm dynamic_linker: Eq<"dynamic-linker", "Which dynamic linker to use">;
+defm dynamic_list: Eq<"dynamic-list", "Read a list of dynamic symbols">;
+defm eh_frame_hdr: B<"eh-frame-hdr",
+ "Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header",
+ "Do not create .eh_frame_hdr section">;
+def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">;
+def enable_new_dtags: F<"enable-new-dtags">,
+ HelpText<"Enable new dynamic tags (default)">;
+def end_group: F<"end-group">,
+ HelpText<"Ignored for compatibility with GNU unless you pass --warn-backrefs">;
+def end_lib: F<"end-lib">,
+ HelpText<"End a grouping of objects that should be treated as if they were together in an archive">;
+defm entry: Eq<"entry", "Name of entry point symbol">,
+ MetaVarName<"<entry>">;
+defm error_limit:
+ Eq<"error-limit", "Maximum number of errors to emit before stopping (0 = no limit)">;
+def error_unresolved_symbols: F<"error-unresolved-symbols">,
+ HelpText<"Report unresolved symbols as errors">;
+defm exclude_libs: Eq<"exclude-libs", "Exclude static libraries from automatic export">;
+defm execute_only: B<"execute-only",
+ "Mark executable sections unreadable",
+ "Mark executable sections readable (default)">;
+defm export_dynamic: B<"export-dynamic",
+ "Put symbols in the dynamic symbol table",
+ "Do not put symbols in the dynamic symbol table (default)">;
+defm export_dynamic_symbol:
+ Eq<"export-dynamic-symbol", "Put a symbol in the dynamic symbol table">;
+defm fatal_warnings: B<"fatal-warnings",
+ "Treat warnings as errors",
+ "Do not treat warnings as errors (default)">;
+defm filter: Eq<"filter", "Set DT_FILTER field to the specified name">;
+defm fini: Eq<"fini", "Specify a finalizer function">, MetaVarName<"<symbol>">;
+def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">,
+ HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">;
+defm format: Eq<"format", "Change the input format of the inputs following this option">,
+ MetaVarName<"[default,elf,binary]">;
+defm gc_sections: B<"gc-sections",
+ "Enable garbage collection of unused sections",
+ "Disable garbage collection of unused sections (default)">;
+defm gdb_index: B<"gdb-index",
+ "Generate .gdb_index section",
+ "Do not generate .gdb_index section (default)">;
+defm gnu_unique: B<"gnu-unique",
+ "Enable STB_GNU_UNIQUE symbol binding (default)",
+ "Disable STB_GNU_UNIQUE symbol binding">;
+defm hash_style: Eq<"hash-style", "Specify hash style (sysv, gnu or both)">;
+def help: F<"help">, HelpText<"Print option help">;
+def icf_all: F<"icf=all">, HelpText<"Enable identical code folding">;
+def icf_safe: F<"icf=safe">, HelpText<"Enable safe identical code folding">;
+def icf_none: F<"icf=none">, HelpText<"Disable identical code folding (default)">;
+def ignore_function_address_equality: F<"ignore-function-address-equality">,
+ HelpText<"lld can break the address equality of functions">;
+def ignore_data_address_equality: F<"ignore-data-address-equality">,
+ HelpText<"lld can break the address equality of data">;
+defm image_base: Eq<"image-base", "Set the base address">;
+defm init: Eq<"init", "Specify an initializer function">,
+ MetaVarName<"<symbol>">;
+defm just_symbols: Eq<"just-symbols", "Just link symbols">;
+defm keep_unique: Eq<"keep-unique", "Do not fold this symbol during ICF">;
+defm library: Eq<"library", "Root name of library to use">,
+ MetaVarName<"<libName>">;
+def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">;
+defm Map: Eq<"Map", "Print a link map to the specified file">;
+defm merge_exidx_entries: B<"merge-exidx-entries",
+ "Enable merging .ARM.exidx entries (default)",
+ "Disable merging .ARM.exidx entries">;
+def nostdlib: F<"nostdlib">,
+ HelpText<"Only search directories specified on the command line">;
+def no_color_diagnostics: F<"no-color-diagnostics">,
+ HelpText<"Do not use colors in diagnostics">;
+def no_dynamic_linker: F<"no-dynamic-linker">,
+ HelpText<"Inhibit output of .interp section">;
+def noinhibit_exec: F<"noinhibit-exec">,
+ HelpText<"Retain the executable output file whenever it is still usable">;
+def no_omagic: F<"no-omagic">, MetaVarName<"<magic>">,
+ HelpText<"Do not set the text data sections to be writable">;
+def no_rosegment: F<"no-rosegment">,
+ HelpText<"Do not put read-only non-executable sections in their own segment">;
+def no_undefined: F<"no-undefined">,
+ HelpText<"Report unresolved symbols even if the linker is creating a shared library">;
+def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
+ HelpText<"Path to file to write output">;
+def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">,
+ HelpText<"Specify the binary format for the output object file">;
+def omagic: Flag<["--"], "omagic">, MetaVarName<"<magic>">,
+ HelpText<"Set the text and data sections to be readable and writable">;
+defm orphan_handling:
+ Eq<"orphan-handling", "Control how orphan sections are handled when linker script used">;
+defm pack_dyn_relocs:
+ Eq<"pack-dyn-relocs", "Pack dynamic relocations in the given format">,
+ MetaVarName<"[none,android,relr,android+relr]">;
+defm use_android_relr_tags: B<"use-android-relr-tags",
+ "Use SHT_RELR / DT_RELR* tags (default)">;
+def pic_veneer: F<"pic-veneer">,
+ HelpText<"Always generate position independent thunks (veneers)">;
+defm pie: B<"pie",
+ "Create a position independent executable",
+ "Do not create a position independent executable (default)">;
+defm print_gc_sections: B<"print-gc-sections",
+ "List removed unused sections",
+ "Do not list removed unused sections (default)">;
+defm print_icf_sections: B<"print-icf-sections",
+ "List identical folded sections",
+ "Do not list identical folded sections (default)">;
+def pop_state: F<"pop-state">,
+ HelpText<"Undo the effect of -push-state">;
+def push_state: F<"push-state">,
+ HelpText<"Save the current state of -as-needed, -static and -whole-archive">;
+def print_map: F<"print-map">,
+ HelpText<"Print a link map to the standard output">;
+defm reproduce: Eq<"reproduce", "Dump linker invocation and input files for debugging">;
+defm rpath: Eq<"rpath", "Add a DT_RUNPATH to the output">;
+def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">;
+defm retain_symbols_file:
+ Eq<"retain-symbols-file", "Retain only the symbols listed in the file">,
+ MetaVarName<"<file>">;
+defm script: Eq<"script", "Read linker script">;
+defm section_start: Eq<"section-start", "Set address of section">,
+ MetaVarName<"<address>">;
+def shared: F<"shared">, HelpText<"Build a shared object">;
+defm soname: Eq<"soname", "Set DT_SONAME">;
+defm sort_section:
+ Eq<"sort-section", "Specifies sections sorting rule when linkerscript is used">;
+def start_group: F<"start-group">,
+ HelpText<"Ignored for compatibility with GNU unless you pass --warn-backrefs">;
+def start_lib: F<"start-lib">,
+ HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">;
+def strip_all: F<"strip-all">, HelpText<"Strip all symbols">;
+def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">;
+defm symbol_ordering_file:
+ Eq<"symbol-ordering-file", "Layout sections to place symbols in the order specified by symbol ordering file">;
+defm sysroot: Eq<"sysroot", "Set the system root">;
+def target1_rel: F<"target1-rel">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_REL32">;
+def target1_abs: F<"target1-abs">, HelpText<"Interpret R_ARM_TARGET1 as R_ARM_ABS32 (default)">;
+defm target2:
+ Eq<"target2", "Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel">,
+ MetaVarName<"<type>">;
+defm threads: B<"threads",
+ "Run the linker multi-threaded (default)",
+ "Do not run the linker multi-threaded">;
+defm toc_optimize : B<"toc-optimize",
+ "(PowerPC64) Enable TOC related optimizations (default)",
+ "(PowerPC64) Disable TOC related optimizations">;
+def trace: F<"trace">, HelpText<"Print the names of the input files">;
+defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">;
+defm undefined: Eq<"undefined", "Force undefined symbol during linking">,
+ MetaVarName<"<symbol>">;
+defm unresolved_symbols:
+ Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">;
+defm undefined_version: B<"undefined-version",
+ "Allow unused version in version script (default)",
+ "Report version scripts that refer undefined symbols">;
+defm rsp_quoting: Eq<"rsp-quoting", "Quoting style for response files">,
+ MetaVarName<"[posix,windows]">;
+def v: Flag<["-"], "v">, HelpText<"Display the version number">;
+def verbose: F<"verbose">, HelpText<"Verbose mode">;
+def version: F<"version">, HelpText<"Display the version number and exit">;
+defm version_script: Eq<"version-script", "Read a version script">;
+defm warn_backrefs: B<"warn-backrefs",
+ "Warn about backward symbol references to fetch archive members",
+ "Do not warn about backward symbol references to fetch archive members (default)">;
+defm warn_common: B<"warn-common",
+ "Warn about duplicate common symbols",
+ "Do not warn about duplicate common symbols (default)">;
+defm warn_ifunc_textrel: B<"warn-ifunc-textrel",
+ "Warn about using ifunc symbols with text relocations",
+ "Do not warn about using ifunc symbols with text relocations (default)">;
+defm warn_symbol_ordering: B<"warn-symbol-ordering",
+ "Warn about problems with the symbol ordering file (default)",
+ "Do not warn about problems with the symbol ordering file">;
+def warn_unresolved_symbols: F<"warn-unresolved-symbols">,
+ HelpText<"Report unresolved symbols as warnings">;
+defm whole_archive: B<"whole-archive",
+ "Force load of all members in a static library",
+ "Do not force load of all members in a static library (default)">;
+defm wrap: Eq<"wrap", "Use wrapper functions for symbol">,
+ MetaVarName<"<symbol>=<symbol>">;
+def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
+ HelpText<"Linker option extensions">;
+// Aliases
+def: Separate<["-"], "f">, Alias<auxiliary>, HelpText<"Alias for --auxiliary">;
+def: F<"call_shared">, Alias<Bdynamic>, HelpText<"Alias for --Bdynamic">;
+def: F<"dy">, Alias<Bdynamic>, HelpText<"Alias for --Bdynamic">;
+def: F<"dn">, Alias<Bstatic>, HelpText<"Alias for --Bstatic">;
+def: F<"non_shared">, Alias<Bstatic>, HelpText<"Alias for --Bstatic">;
+def: F<"static">, Alias<Bstatic>, HelpText<"Alias for --Bstatic">;
+def: Flag<["-"], "d">, Alias<define_common>, HelpText<"Alias for --define-common">;
+def: F<"dc">, Alias<define_common>, HelpText<"Alias for --define-common">;
+def: F<"dp">, Alias<define_common>, HelpText<"Alias for --define-common">;
+def: Flag<["-"], "x">, Alias<discard_all>, HelpText<"Alias for --discard-all">;
+def: Flag<["-"], "X">, Alias<discard_locals>, HelpText<"Alias for --discard-locals">;
+def: Flag<["-"], "q">, Alias<emit_relocs>, HelpText<"Alias for --emit-relocs">;
+def: Flag<["-"], ")">, Alias<end_group>, HelpText<"Alias for --end-group">;
+def: JoinedOrSeparate<["-"], "e">, Alias<entry>, HelpText<"Alias for --entry">;
+def: Flag<["-"], "E">, Alias<export_dynamic>, HelpText<"Alias for --export-dynamic">;
+def: Separate<["-"], "F">, Alias<filter>, HelpText<"Alias for --filter">;
+def: Separate<["-"], "b">, Alias<format>, HelpText<"Alias for --format">;
+def: JoinedOrSeparate<["-"], "l">, Alias<library>, HelpText<"Alias for --library">;
+def: JoinedOrSeparate<["-"], "L">, Alias<library_path>, HelpText<"Alias for --library-path">;
+def: F<"no-pic-executable">, Alias<no_pie>, HelpText<"Alias for --no-pie">;
+def: Flag<["-"], "N">, Alias<omagic>, HelpText<"Alias for --omagic">;
+def: Joined<["--"], "output=">, Alias<o>, HelpText<"Alias for -o">;
+def: Separate<["--"], "output">, Alias<o>, HelpText<"Alias for -o">;
+def: F<"pic-executable">, Alias<pie>, HelpText<"Alias for --pie">;
+def: Flag<["-"], "M">, Alias<print_map>, HelpText<"Alias for --print-map">;
+def: Flag<["-"], "r">, Alias<relocatable>, HelpText<"Alias for --relocatable">;
+def: JoinedOrSeparate<["-"], "R">, Alias<rpath>, HelpText<"Alias for --rpath">;
+def: JoinedOrSeparate<["-"], "T">, Alias<script>, HelpText<"Alias for --script">;
+def: F<"Bshareable">, Alias<shared>, HelpText<"Alias for --shared">;
+def: JoinedOrSeparate<["-"], "h">, Alias<soname>, HelpText<"Alias for --soname">;
+def: Flag<["-"], "(">, Alias<start_group>, HelpText<"Alias for --start-group">;
+def: Flag<["-"], "s">, Alias<strip_all>, HelpText<"Alias for --strip-all">;
+def: Flag<["-"], "S">, Alias<strip_debug>, HelpText<"Alias for --strip-debug">;
+def: Flag<["-"], "t">, Alias<trace>, HelpText<"Alias for --trace">;
+def: JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>, HelpText<"Alias for --trace-symbol">;
+def: Separate<["-", "--"], "Ttext-segment">, Alias<Ttext>, HelpText<"Alias for --Ttext">;
+def: Joined<["-", "--"], "Ttext-segment=">, Alias<Ttext>, HelpText<"Alias for --Ttext">;
+def: JoinedOrSeparate<["-"], "u">, Alias<undefined>, HelpText<"Alias for --undefined">;
+def: Flag<["-"], "V">, Alias<version>, HelpText<"Alias for --version">;
+// LTO-related options.
+def lto_aa_pipeline: J<"lto-aa-pipeline=">,
+ HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">;
+def lto_debug_pass_manager: F<"lto-debug-pass-manager">,
+ HelpText<"Debug new pass manager">;
+def lto_new_pass_manager: F<"lto-new-pass-manager">,
+ HelpText<"Use new pass manager">;
+def lto_newpm_passes: J<"lto-newpm-passes=">,
+ HelpText<"Passes to run during LTO">;
+def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">,
+ HelpText<"Optimization level for LTO">;
+def lto_partitions: J<"lto-partitions=">,
+ HelpText<"Number of LTO codegen partitions">;
+def lto_sample_profile: J<"lto-sample-profile=">,
+ HelpText<"Sample profile file path">;
+def disable_verify: F<"disable-verify">;
+defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">;
+def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">,
+ HelpText<"YAML output file for optimization remarks">;
+def opt_remarks_with_hotness: Flag<["--"], "opt-remarks-with-hotness">,
+ HelpText<"Include hotness information in the optimization remarks file">;
+defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">;
+def save_temps: F<"save-temps">;
+def thinlto_cache_dir: J<"thinlto-cache-dir=">,
+ HelpText<"Path to ThinLTO cached object file directory">;
+defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">;
+def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">;
+def: J<"plugin-opt=O">, Alias<lto_O>, HelpText<"Alias for -lto-O">;
+def: F<"plugin-opt=debug-pass-manager">,
+ Alias<lto_debug_pass_manager>, HelpText<"Alias for -lto-debug-pass-manager">;
+def: F<"plugin-opt=disable-verify">, Alias<disable_verify>, HelpText<"Alias for -disable-verify">;
+def plugin_opt_dwo_dir_eq: J<"plugin-opt=dwo_dir=">,
+ HelpText<"Directory to store .dwo files when LTO and debug fission are used">;
+def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">;
+def: J<"plugin-opt=jobs=">, Alias<thinlto_jobs>, HelpText<"Alias for -thinlto-jobs">;
+def: J<"plugin-opt=lto-partitions=">, Alias<lto_partitions>, HelpText<"Alias for -lto-partitions">;
+def plugin_opt_mcpu_eq: J<"plugin-opt=mcpu=">;
+def: F<"plugin-opt=new-pass-manager">,
+ Alias<lto_new_pass_manager>, HelpText<"Alias for -lto-new-pass-manager">;
+def plugin_opt_obj_path_eq: J<"plugin-opt=obj-path=">;
+def: J<"plugin-opt=sample-profile=">,
+ Alias<lto_sample_profile>, HelpText<"Alias for -lto-sample-profile">;
+def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for -save-temps">;
+def plugin_opt_thinlto_emit_imports_files: F<"plugin-opt=thinlto-emit-imports-files">;
+def plugin_opt_thinlto_index_only: F<"plugin-opt=thinlto-index-only">;
+def plugin_opt_thinlto_index_only_eq: J<"plugin-opt=thinlto-index-only=">;
+def plugin_opt_thinlto_object_suffix_replace_eq: J<"plugin-opt=thinlto-object-suffix-replace=">;
+def plugin_opt_thinlto_prefix_replace_eq: J<"plugin-opt=thinlto-prefix-replace=">;
+// Ignore LTO plugin-related options.
+// clang -flto passes -plugin and -plugin-opt to the linker. This is required
+// for ld.gold and ld.bfd to get LTO working. But it's not for lld which doesn't
+// rely on a plugin. Instead of detecting which linker is used on clang side we
+// just ignore the option on lld side as it's easier. In fact, the linker could
+// be called 'ld' and understanding which linker is used would require parsing of
+// --version output.
+defm plugin: Eq<"plugin", "Ignored for compatibility with GNU linkers">;
+def plugin_opt_fresolution_eq: J<"plugin-opt=-fresolution=">;
+def plugin_opt_pass_through_eq: J<"plugin-opt=-pass-through=">;
+def plugin_opt_thinlto: J<"plugin-opt=thinlto">;
+def plugin_opt_slash: J<"plugin-opt=/">;
+// Options listed below are silently ignored for now for compatibility.
+def: F<"allow-shlib-undefined">;
+def: F<"detect-odr-violations">;
+def: Flag<["-"], "g">;
+def: F<"long-plt">;
+def: F<"no-add-needed">;
+def: F<"no-allow-shlib-undefined">;
+def: F<"no-copy-dt-needed-entries">;
+def: F<"no-ctors-in-init-array">;
+def: F<"no-keep-memory">;
+def: F<"no-mmap-output-file">;
+def: F<"no-warn-mismatch">;
+def: Separate<["--", "-"], "rpath-link">;
+def: J<"rpath-link=">;
+def: F<"sort-common">;
+def: F<"stats">;
+def: F<"warn-execstack">;
+def: F<"warn-once">;
+def: F<"warn-shared-textrel">;
+def: F<"EB">;
+def: F<"EL">;
+def: JoinedOrSeparate<["-"], "G">;
+def: F<"Qy">;
+// Hidden option used for testing MIPS multi-GOT implementation.
+defm mips_got_size:
+ Eq<"mips-got-size", "Max size of a single MIPS GOT. 0x10000 by default.">,
+ Flags<[HelpHidden]>;
diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.cpp b/contrib/llvm/tools/lld/ELF/OutputSections.cpp
new file mode 100644
index 000000000000..c1442c078736
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/OutputSections.cpp
@@ -0,0 +1,437 @@
+//===- OutputSections.cpp -------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "OutputSections.h"
+#include "Config.h"
+#include "LinkerScript.h"
+#include "SymbolTable.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/Threads.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SHA1.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+uint8_t Out::First;
+PhdrEntry *Out::TlsPhdr;
+OutputSection *Out::ElfHeader;
+OutputSection *Out::ProgramHeaders;
+OutputSection *Out::PreinitArray;
+OutputSection *Out::InitArray;
+OutputSection *Out::FiniArray;
+std::vector<OutputSection *> elf::OutputSections;
+uint32_t OutputSection::getPhdrFlags() const {
+ uint32_t Ret = 0;
+ if (Config->EMachine != EM_ARM || !(Flags & SHF_ARM_PURECODE))
+ Ret |= PF_R;
+ if (Flags & SHF_WRITE)
+ Ret |= PF_W;
+ if (Flags & SHF_EXECINSTR)
+ Ret |= PF_X;
+ return Ret;
+template <class ELFT>
+void OutputSection::writeHeaderTo(typename ELFT::Shdr *Shdr) {
+ Shdr->sh_entsize = Entsize;
+ Shdr->sh_addralign = Alignment;
+ Shdr->sh_type = Type;
+ Shdr->sh_offset = Offset;
+ Shdr->sh_flags = Flags;
+ Shdr->sh_info = Info;
+ Shdr->sh_link = Link;
+ Shdr->sh_addr = Addr;
+ Shdr->sh_size = Size;
+ Shdr->sh_name = ShName;
+OutputSection::OutputSection(StringRef Name, uint32_t Type, uint64_t Flags)
+ : BaseCommand(OutputSectionKind),
+ SectionBase(Output, Name, Flags, /*Entsize*/ 0, /*Alignment*/ 1, Type,
+ /*Info*/ 0, /*Link*/ 0) {
+ Live = false;
+// We allow sections of types listed below to merged into a
+// single progbits section. This is typically done by linker
+// scripts. Merging nobits and progbits will force disk space
+// to be allocated for nobits sections. Other ones don't require
+// any special treatment on top of progbits, so there doesn't
+// seem to be a harm in merging them.
+static bool canMergeToProgbits(unsigned Type) {
+ return Type == SHT_NOBITS || Type == SHT_PROGBITS || Type == SHT_INIT_ARRAY ||
+ Type == SHT_NOTE;
+void OutputSection::addSection(InputSection *IS) {
+ if (!Live) {
+ // If IS is the first section to be added to this section,
+ // initialize Type, Entsize and flags from IS.
+ Live = true;
+ Type = IS->Type;
+ Entsize = IS->Entsize;
+ Flags = IS->Flags;
+ } else {
+ // Otherwise, check if new type or flags are compatible with existing ones.
+ unsigned Mask = SHF_TLS | SHF_LINK_ORDER;
+ if ((Flags & Mask) != (IS->Flags & Mask))
+ error("incompatible section flags for " + Name + "\n>>> " + toString(IS) +
+ ": 0x" + utohexstr(IS->Flags) + "\n>>> output section " + Name +
+ ": 0x" + utohexstr(Flags));
+ if (Type != IS->Type) {
+ if (!canMergeToProgbits(Type) || !canMergeToProgbits(IS->Type))
+ error("section type mismatch for " + IS->Name + "\n>>> " +
+ toString(IS) + ": " +
+ getELFSectionTypeName(Config->EMachine, IS->Type) +
+ "\n>>> output section " + Name + ": " +
+ getELFSectionTypeName(Config->EMachine, Type));
+ }
+ }
+ IS->Parent = this;
+ uint64_t AndMask =
+ Config->EMachine == EM_ARM ? (uint64_t)SHF_ARM_PURECODE : 0;
+ uint64_t OrMask = ~AndMask;
+ uint64_t AndFlags = (Flags & IS->Flags) & AndMask;
+ uint64_t OrFlags = (Flags | IS->Flags) & OrMask;
+ Flags = AndFlags | OrFlags;
+ Alignment = std::max(Alignment, IS->Alignment);
+ // If this section contains a table of fixed-size entries, sh_entsize
+ // holds the element size. If it contains elements of different size we
+ // set sh_entsize to 0.
+ if (Entsize != IS->Entsize)
+ Entsize = 0;
+ if (!IS->Assigned) {
+ IS->Assigned = true;
+ if (SectionCommands.empty() ||
+ !isa<InputSectionDescription>(SectionCommands.back()))
+ SectionCommands.push_back(make<InputSectionDescription>(""));
+ auto *ISD = cast<InputSectionDescription>(SectionCommands.back());
+ ISD->Sections.push_back(IS);
+ }
+static void sortByOrder(MutableArrayRef<InputSection *> In,
+ llvm::function_ref<int(InputSectionBase *S)> Order) {
+ typedef std::pair<int, InputSection *> Pair;
+ auto Comp = [](const Pair &A, const Pair &B) { return A.first < B.first; };
+ std::vector<Pair> V;
+ for (InputSection *S : In)
+ V.push_back({Order(S), S});
+ std::stable_sort(V.begin(), V.end(), Comp);
+ for (size_t I = 0; I < V.size(); ++I)
+ In[I] = V[I].second;
+uint64_t elf::getHeaderSize() {
+ if (Config->OFormatBinary)
+ return 0;
+ return Out::ElfHeader->Size + Out::ProgramHeaders->Size;
+bool OutputSection::classof(const BaseCommand *C) {
+ return C->Kind == OutputSectionKind;
+void OutputSection::sort(llvm::function_ref<int(InputSectionBase *S)> Order) {
+ assert(Live);
+ for (BaseCommand *B : SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(B))
+ sortByOrder(ISD->Sections, Order);
+// Fill [Buf, Buf + Size) with Filler.
+// This is used for linker script "=fillexp" command.
+static void fill(uint8_t *Buf, size_t Size,
+ const std::array<uint8_t, 4> &Filler) {
+ size_t I = 0;
+ for (; I + 4 < Size; I += 4)
+ memcpy(Buf + I, Filler.data(), 4);
+ memcpy(Buf + I, Filler.data(), Size - I);
+// Compress section contents if this section contains debug info.
+template <class ELFT> void OutputSection::maybeCompress() {
+ typedef typename ELFT::Chdr Elf_Chdr;
+ // Compress only DWARF debug sections.
+ if (!Config->CompressDebugSections || (Flags & SHF_ALLOC) ||
+ !Name.startswith(".debug_"))
+ return;
+ // Create a section header.
+ ZDebugHeader.resize(sizeof(Elf_Chdr));
+ auto *Hdr = reinterpret_cast<Elf_Chdr *>(ZDebugHeader.data());
+ Hdr->ch_type = ELFCOMPRESS_ZLIB;
+ Hdr->ch_size = Size;
+ Hdr->ch_addralign = Alignment;
+ // Write section contents to a temporary buffer and compress it.
+ std::vector<uint8_t> Buf(Size);
+ writeTo<ELFT>(Buf.data());
+ if (Error E = zlib::compress(toStringRef(Buf), CompressedData))
+ fatal("compress failed: " + llvm::toString(std::move(E)));
+ // Update section headers.
+ Size = sizeof(Elf_Chdr) + CompressedData.size();
+static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
+ if (Size == 1)
+ *Buf = Data;
+ else if (Size == 2)
+ write16(Buf, Data);
+ else if (Size == 4)
+ write32(Buf, Data);
+ else if (Size == 8)
+ write64(Buf, Data);
+ else
+ llvm_unreachable("unsupported Size argument");
+template <class ELFT> void OutputSection::writeTo(uint8_t *Buf) {
+ if (Type == SHT_NOBITS)
+ return;
+ Loc = Buf;
+ // If -compress-debug-section is specified and if this is a debug seciton,
+ // we've already compressed section contents. If that's the case,
+ // just write it down.
+ if (!CompressedData.empty()) {
+ memcpy(Buf, ZDebugHeader.data(), ZDebugHeader.size());
+ memcpy(Buf + ZDebugHeader.size(), CompressedData.data(),
+ CompressedData.size());
+ return;
+ }
+ // Write leading padding.
+ std::vector<InputSection *> Sections = getInputSections(this);
+ std::array<uint8_t, 4> Filler = getFiller();
+ bool NonZeroFiller = read32(Filler.data()) != 0;
+ if (NonZeroFiller)
+ fill(Buf, Sections.empty() ? Size : Sections[0]->OutSecOff, Filler);
+ parallelForEachN(0, Sections.size(), [&](size_t I) {
+ InputSection *IS = Sections[I];
+ IS->writeTo<ELFT>(Buf);
+ // Fill gaps between sections.
+ if (NonZeroFiller) {
+ uint8_t *Start = Buf + IS->OutSecOff + IS->getSize();
+ uint8_t *End;
+ if (I + 1 == Sections.size())
+ End = Buf + Size;
+ else
+ End = Buf + Sections[I + 1]->OutSecOff;
+ fill(Start, End - Start, Filler);
+ }
+ });
+ // Linker scripts may have BYTE()-family commands with which you
+ // can write arbitrary bytes to the output. Process them if any.
+ for (BaseCommand *Base : SectionCommands)
+ if (auto *Data = dyn_cast<ByteCommand>(Base))
+ writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size);
+template <class ELFT>
+static void finalizeShtGroup(OutputSection *OS,
+ InputSection *Section) {
+ assert(Config->Relocatable);
+ // sh_link field for SHT_GROUP sections should contain the section index of
+ // the symbol table.
+ OS->Link = In.SymTab->getParent()->SectionIndex;
+ // sh_info then contain index of an entry in symbol table section which
+ // provides signature of the section group.
+ ObjFile<ELFT> *Obj = Section->getFile<ELFT>();
+ ArrayRef<Symbol *> Symbols = Obj->getSymbols();
+ OS->Info = In.SymTab->getSymbolIndex(Symbols[Section->Info]);
+template <class ELFT> void OutputSection::finalize() {
+ if (Type == SHT_NOBITS)
+ for (BaseCommand *Base : SectionCommands)
+ if (isa<ByteCommand>(Base))
+ std::vector<InputSection *> V = getInputSections(this);
+ InputSection *First = V.empty() ? nullptr : V[0];
+ if (Flags & SHF_LINK_ORDER) {
+ // We must preserve the link order dependency of sections with the
+ // SHF_LINK_ORDER flag. The dependency is indicated by the sh_link field. We
+ // need to translate the InputSection sh_link to the OutputSection sh_link,
+ // all InputSections in the OutputSection have the same dependency.
+ if (auto *D = First->getLinkOrderDep())
+ Link = D->getParent()->SectionIndex;
+ }
+ if (Type == SHT_GROUP) {
+ finalizeShtGroup<ELFT>(this, First);
+ return;
+ }
+ if (!Config->CopyRelocs || (Type != SHT_RELA && Type != SHT_REL))
+ return;
+ if (isa<SyntheticSection>(First))
+ return;
+ Link = In.SymTab->getParent()->SectionIndex;
+ // sh_info for SHT_REL[A] sections should contain the section header index of
+ // the section to which the relocation applies.
+ InputSectionBase *S = First->getRelocatedSection();
+ Info = S->getOutputSection()->SectionIndex;
+ Flags |= SHF_INFO_LINK;
+// Returns true if S matches /Filename.?\.o$/.
+static bool isCrtBeginEnd(StringRef S, StringRef Filename) {
+ if (!S.endswith(".o"))
+ return false;
+ S = S.drop_back(2);
+ if (S.endswith(Filename))
+ return true;
+ return !S.empty() && S.drop_back().endswith(Filename);
+static bool isCrtbegin(StringRef S) { return isCrtBeginEnd(S, "crtbegin"); }
+static bool isCrtend(StringRef S) { return isCrtBeginEnd(S, "crtend"); }
+// .ctors and .dtors are sorted by this priority from highest to lowest.
+// 1. The section was contained in crtbegin (crtbegin contains
+// some sentinel value in its .ctors and .dtors so that the runtime
+// can find the beginning of the sections.)
+// 2. The section has an optional priority value in the form of ".ctors.N"
+// or ".dtors.N" where N is a number. Unlike .{init,fini}_array,
+// they are compared as string rather than number.
+// 3. The section is just ".ctors" or ".dtors".
+// 4. The section was contained in crtend, which contains an end marker.
+// In an ideal world, we don't need this function because .init_array and
+// .ctors are duplicate features (and .init_array is newer.) However, there
+// are too many real-world use cases of .ctors, so we had no choice to
+// support that with this rather ad-hoc semantics.
+static bool compCtors(const InputSection *A, const InputSection *B) {
+ bool BeginA = isCrtbegin(A->File->getName());
+ bool BeginB = isCrtbegin(B->File->getName());
+ if (BeginA != BeginB)
+ return BeginA;
+ bool EndA = isCrtend(A->File->getName());
+ bool EndB = isCrtend(B->File->getName());
+ if (EndA != EndB)
+ return EndB;
+ StringRef X = A->Name;
+ StringRef Y = B->Name;
+ assert(X.startswith(".ctors") || X.startswith(".dtors"));
+ assert(Y.startswith(".ctors") || Y.startswith(".dtors"));
+ X = X.substr(6);
+ Y = Y.substr(6);
+ return X < Y;
+// Sorts input sections by the special rules for .ctors and .dtors.
+// Unfortunately, the rules are different from the one for .{init,fini}_array.
+// Read the comment above.
+void OutputSection::sortCtorsDtors() {
+ assert(SectionCommands.size() == 1);
+ auto *ISD = cast<InputSectionDescription>(SectionCommands[0]);
+ std::stable_sort(ISD->Sections.begin(), ISD->Sections.end(), compCtors);
+// If an input string is in the form of "foo.N" where N is a number,
+// return N. Otherwise, returns 65536, which is one greater than the
+// lowest priority.
+int elf::getPriority(StringRef S) {
+ size_t Pos = S.rfind('.');
+ if (Pos == StringRef::npos)
+ return 65536;
+ int V;
+ if (!to_integer(S.substr(Pos + 1), V, 10))
+ return 65536;
+ return V;
+std::vector<InputSection *> elf::getInputSections(OutputSection *OS) {
+ std::vector<InputSection *> Ret;
+ for (BaseCommand *Base : OS->SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(Base))
+ Ret.insert(Ret.end(), ISD->Sections.begin(), ISD->Sections.end());
+ return Ret;
+// Sorts input sections by section name suffixes, so that .foo.N comes
+// before .foo.M if N < M. Used to sort .{init,fini}_array.N sections.
+// We want to keep the original order if the priorities are the same
+// because the compiler keeps the original initialization order in a
+// translation unit and we need to respect that.
+// For more detail, read the section of the GCC's manual about init_priority.
+void OutputSection::sortInitFini() {
+ // Sort sections by priority.
+ sort([](InputSectionBase *S) { return getPriority(S->Name); });
+std::array<uint8_t, 4> OutputSection::getFiller() {
+ if (Filler)
+ return *Filler;
+ if (Flags & SHF_EXECINSTR)
+ return Target->TrapInstr;
+ return {0, 0, 0, 0};
+template void OutputSection::writeHeaderTo<ELF32LE>(ELF32LE::Shdr *Shdr);
+template void OutputSection::writeHeaderTo<ELF32BE>(ELF32BE::Shdr *Shdr);
+template void OutputSection::writeHeaderTo<ELF64LE>(ELF64LE::Shdr *Shdr);
+template void OutputSection::writeHeaderTo<ELF64BE>(ELF64BE::Shdr *Shdr);
+template void OutputSection::writeTo<ELF32LE>(uint8_t *Buf);
+template void OutputSection::writeTo<ELF32BE>(uint8_t *Buf);
+template void OutputSection::writeTo<ELF64LE>(uint8_t *Buf);
+template void OutputSection::writeTo<ELF64BE>(uint8_t *Buf);
+template void OutputSection::maybeCompress<ELF32LE>();
+template void OutputSection::maybeCompress<ELF32BE>();
+template void OutputSection::maybeCompress<ELF64LE>();
+template void OutputSection::maybeCompress<ELF64BE>();
+template void OutputSection::finalize<ELF32LE>();
+template void OutputSection::finalize<ELF32BE>();
+template void OutputSection::finalize<ELF64LE>();
+template void OutputSection::finalize<ELF64BE>();
diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.h b/contrib/llvm/tools/lld/ELF/OutputSections.h
new file mode 100644
index 000000000000..113bf6836926
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/OutputSections.h
@@ -0,0 +1,153 @@
+//===- OutputSections.h -----------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Config.h"
+#include "InputSection.h"
+#include "LinkerScript.h"
+#include "Relocations.h"
+#include "lld/Common/LLVM.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/ELF.h"
+#include <array>
+namespace lld {
+namespace elf {
+struct PhdrEntry;
+class Symbol;
+struct EhSectionPiece;
+class EhInputSection;
+class InputSection;
+class InputSectionBase;
+class MergeInputSection;
+class OutputSection;
+template <class ELFT> class ObjFile;
+template <class ELFT> class SharedFile;
+class SharedSymbol;
+class Defined;
+// This represents a section in an output file.
+// It is composed of multiple InputSections.
+// The writer creates multiple OutputSections and assign them unique,
+// non-overlapping file offsets and VAs.
+class OutputSection final : public BaseCommand, public SectionBase {
+ OutputSection(StringRef Name, uint32_t Type, uint64_t Flags);
+ static bool classof(const SectionBase *S) {
+ return S->kind() == SectionBase::Output;
+ }
+ static bool classof(const BaseCommand *C);
+ uint64_t getLMA() const { return PtLoad ? Addr + PtLoad->LMAOffset : Addr; }
+ template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr);
+ uint32_t SectionIndex = UINT32_MAX;
+ unsigned SortRank;
+ uint32_t getPhdrFlags() const;
+ // Pointer to the PT_LOAD segment, which this section resides in. This field
+ // is used to correctly compute file offset of a section. When two sections
+ // share the same load segment, difference between their file offsets should
+ // be equal to difference between their virtual addresses. To compute some
+ // section offset we use the following formula: Off = Off_first + VA -
+ // VA_first, where Off_first and VA_first is file offset and VA of first
+ // section in PT_LOAD.
+ PhdrEntry *PtLoad = nullptr;
+ // Pointer to a relocation section for this section. Usually nullptr because
+ // we consume relocations, but if --emit-relocs is specified (which is rare),
+ // it may have a non-null value.
+ OutputSection *RelocationSection = nullptr;
+ // Initially this field is the number of InputSections that have been added to
+ // the OutputSection so far. Later on, after a call to assignAddresses, it
+ // corresponds to the Elf_Shdr member.
+ uint64_t Size = 0;
+ // The following fields correspond to Elf_Shdr members.
+ uint64_t Offset = 0;
+ uint64_t Addr = 0;
+ uint32_t ShName = 0;
+ void addSection(InputSection *IS);
+ // Location in the output buffer.
+ uint8_t *Loc = nullptr;
+ // The following members are normally only used in linker scripts.
+ MemoryRegion *MemRegion = nullptr;
+ MemoryRegion *LMARegion = nullptr;
+ Expr AddrExpr;
+ Expr AlignExpr;
+ Expr LMAExpr;
+ Expr SubalignExpr;
+ std::vector<BaseCommand *> SectionCommands;
+ std::vector<StringRef> Phdrs;
+ llvm::Optional<std::array<uint8_t, 4>> Filler;
+ ConstraintKind Constraint = ConstraintKind::NoConstraint;
+ std::string Location;
+ std::string MemoryRegionName;
+ std::string LMARegionName;
+ bool NonAlloc = false;
+ bool Noload = false;
+ bool ExpressionsUseSymbols = false;
+ bool InOverlay = false;
+ template <class ELFT> void finalize();
+ template <class ELFT> void writeTo(uint8_t *Buf);
+ template <class ELFT> void maybeCompress();
+ void sort(llvm::function_ref<int(InputSectionBase *S)> Order);
+ void sortInitFini();
+ void sortCtorsDtors();
+ // Used for implementation of --compress-debug-sections option.
+ std::vector<uint8_t> ZDebugHeader;
+ llvm::SmallVector<char, 1> CompressedData;
+ std::array<uint8_t, 4> getFiller();
+int getPriority(StringRef S);
+std::vector<InputSection *> getInputSections(OutputSection* OS);
+// All output sections that are handled by the linker specially are
+// globally accessible. Writer initializes them, so don't use them
+// until Writer is initialized.
+struct Out {
+ static uint8_t First;
+ static PhdrEntry *TlsPhdr;
+ static OutputSection *ElfHeader;
+ static OutputSection *ProgramHeaders;
+ static OutputSection *PreinitArray;
+ static OutputSection *InitArray;
+ static OutputSection *FiniArray;
+} // namespace elf
+} // namespace lld
+namespace lld {
+namespace elf {
+uint64_t getHeaderSize();
+extern std::vector<OutputSection *> OutputSections;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/README.md b/contrib/llvm/tools/lld/ELF/README.md
new file mode 100644
index 000000000000..f1bfc9c15263
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/README.md
@@ -0,0 +1 @@
+See docs/NewLLD.rst
diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp
new file mode 100644
index 000000000000..1aa58d4356bf
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp
@@ -0,0 +1,1520 @@
+//===- Relocations.cpp ----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains platform-independent functions to process relocations.
+// I'll describe the overview of this file here.
+// Simple relocations are easy to handle for the linker. For example,
+// for R_X86_64_PC64 relocs, the linker just has to fix up locations
+// with the relative offsets to the target symbols. It would just be
+// reading records from relocation sections and applying them to output.
+// But not all relocations are that easy to handle. For example, for
+// R_386_GOTOFF relocs, the linker has to create new GOT entries for
+// symbols if they don't exist, and fix up locations with GOT entry
+// offsets from the beginning of GOT section. So there is more than
+// fixing addresses in relocation processing.
+// ELF defines a large number of complex relocations.
+// The functions in this file analyze relocations and do whatever needs
+// to be done. It includes, but not limited to, the following.
+// - create GOT/PLT entries
+// - create new relocations in .dynsym to let the dynamic linker resolve
+// them at runtime (since ELF supports dynamic linking, not all
+// relocations can be resolved at link-time)
+// - create COPY relocs and reserve space in .bss
+// - replace expensive relocs (in terms of runtime cost) with cheap ones
+// - error out infeasible combinations such as PIC and non-relative relocs
+// Note that the functions in this file don't actually apply relocations
+// because it doesn't know about the output file nor the output file buffer.
+// It instead stores Relocation objects to InputSection's Relocations
+// vector to let it apply later in InputSection::writeTo.
+#include "Relocations.h"
+#include "Config.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Thunks.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::elf;
+static Optional<std::string> getLinkerScriptLocation(const Symbol &Sym) {
+ for (BaseCommand *Base : Script->SectionCommands)
+ if (auto *Cmd = dyn_cast<SymbolAssignment>(Base))
+ if (Cmd->Sym == &Sym)
+ return Cmd->Location;
+ return None;
+// Construct a message in the following format.
+// >>> defined in /home/alice/src/foo.o
+// >>> referenced by bar.c:12 (/home/alice/src/bar.c:12)
+// >>> /home/alice/src/bar.o:(.text+0x1)
+static std::string getLocation(InputSectionBase &S, const Symbol &Sym,
+ uint64_t Off) {
+ std::string Msg = "\n>>> defined in ";
+ if (Sym.File)
+ Msg += toString(Sym.File);
+ else if (Optional<std::string> Loc = getLinkerScriptLocation(Sym))
+ Msg += *Loc;
+ Msg += "\n>>> referenced by ";
+ std::string Src = S.getSrcMsg(Sym, Off);
+ if (!Src.empty())
+ Msg += Src + "\n>>> ";
+ return Msg + S.getObjMsg(Off);
+// This function is similar to the `handleTlsRelocation`. MIPS does not
+// support any relaxations for TLS relocations so by factoring out MIPS
+// handling in to the separate function we can simplify the code and do not
+// pollute other `handleTlsRelocation` by MIPS `ifs` statements.
+// Mips has a custom MipsGotSection that handles the writing of GOT entries
+// without dynamic relocations.
+static unsigned handleMipsTlsRelocation(RelType Type, Symbol &Sym,
+ InputSectionBase &C, uint64_t Offset,
+ int64_t Addend, RelExpr Expr) {
+ if (Expr == R_MIPS_TLSLD) {
+ In.MipsGot->addTlsIndex(*C.File);
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ if (Expr == R_MIPS_TLSGD) {
+ In.MipsGot->addDynTlsEntry(*C.File, Sym);
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ return 0;
+// This function is similar to the `handleMipsTlsRelocation`. ARM also does not
+// support any relaxations for TLS relocations. ARM is logically similar to Mips
+// in how it handles TLS, but Mips uses its own custom GOT which handles some
+// of the cases that ARM uses GOT relocations for.
+// We look for TLS global dynamic and local dynamic relocations, these may
+// require the generation of a pair of GOT entries that have associated
+// dynamic relocations. When the results of the dynamic relocations can be
+// resolved at static link time we do so. This is necessary for static linking
+// as there will be no dynamic loader to resolve them at load-time.
+// The pair of GOT entries created are of the form
+// GOT[e0] Module Index (Used to find pointer to TLS block at run-time)
+// GOT[e1] Offset of symbol in TLS block
+template <class ELFT>
+static unsigned handleARMTlsRelocation(RelType Type, Symbol &Sym,
+ InputSectionBase &C, uint64_t Offset,
+ int64_t Addend, RelExpr Expr) {
+ // The Dynamic TLS Module Index Relocation for a symbol defined in an
+ // executable is always 1. If the target Symbol is not preemptible then
+ // we know the offset into the TLS block at static link time.
+ bool NeedDynId = Sym.IsPreemptible || Config->Shared;
+ bool NeedDynOff = Sym.IsPreemptible;
+ auto AddTlsReloc = [&](uint64_t Off, RelType Type, Symbol *Dest, bool Dyn) {
+ if (Dyn)
+ In.RelaDyn->addReloc(Type, In.Got, Off, Dest);
+ else
+ In.Got->Relocations.push_back({R_ABS, Type, Off, 0, Dest});
+ };
+ // Local Dynamic is for access to module local TLS variables, while still
+ // being suitable for being dynamically loaded via dlopen.
+ // GOT[e0] is the module index, with a special value of 0 for the current
+ // module. GOT[e1] is unused. There only needs to be one module index entry.
+ if (Expr == R_TLSLD_PC && In.Got->addTlsIndex()) {
+ AddTlsReloc(In.Got->getTlsIndexOff(), Target->TlsModuleIndexRel,
+ NeedDynId ? nullptr : &Sym, NeedDynId);
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ // Global Dynamic is the most general purpose access model. When we know
+ // the module index and offset of symbol in TLS block we can fill these in
+ // using static GOT relocations.
+ if (Expr == R_TLSGD_PC) {
+ if (In.Got->addDynTlsEntry(Sym)) {
+ uint64_t Off = In.Got->getGlobalDynOffset(Sym);
+ AddTlsReloc(Off, Target->TlsModuleIndexRel, &Sym, NeedDynId);
+ AddTlsReloc(Off + Config->Wordsize, Target->TlsOffsetRel, &Sym,
+ NeedDynOff);
+ }
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ return 0;
+// Returns the number of relocations processed.
+template <class ELFT>
+static unsigned
+handleTlsRelocation(RelType Type, Symbol &Sym, InputSectionBase &C,
+ typename ELFT::uint Offset, int64_t Addend, RelExpr Expr) {
+ if (!Sym.isTls())
+ return 0;
+ if (Config->EMachine == EM_ARM)
+ return handleARMTlsRelocation<ELFT>(Type, Sym, C, Offset, Addend, Expr);
+ if (Config->EMachine == EM_MIPS)
+ return handleMipsTlsRelocation(Type, Sym, C, Offset, Addend, Expr);
+ Config->Shared) {
+ if (In.Got->addDynTlsEntry(Sym)) {
+ uint64_t Off = In.Got->getGlobalDynOffset(Sym);
+ In.RelaDyn->addReloc(
+ {Target->TlsDescRel, In.Got, Off, !Sym.IsPreemptible, &Sym, 0});
+ }
+ if (Expr != R_TLSDESC_CALL)
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ R_TLSLD_HINT>(Expr)) {
+ // Local-Dynamic relocs can be relaxed to Local-Exec.
+ if (!Config->Shared) {
+ C.Relocations.push_back(
+ {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_LD_TO_LE), Type,
+ Offset, Addend, &Sym});
+ return Target->TlsGdRelaxSkip;
+ }
+ if (Expr == R_TLSLD_HINT)
+ return 1;
+ if (In.Got->addTlsIndex())
+ In.RelaDyn->addReloc(Target->TlsModuleIndexRel, In.Got,
+ In.Got->getTlsIndexOff(), nullptr);
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ // Local-Dynamic relocs can be relaxed to Local-Exec.
+ if (Expr == R_ABS && !Config->Shared) {
+ C.Relocations.push_back(
+ {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_LD_TO_LE), Type,
+ Offset, Addend, &Sym});
+ return 1;
+ }
+ // Local-Dynamic sequence where offset of tls variable relative to dynamic
+ // thread pointer is stored in the got.
+ if (Expr == R_TLSLD_GOT_OFF) {
+ // Local-Dynamic relocs can be relaxed to local-exec
+ if (!Config->Shared) {
+ C.Relocations.push_back({R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ if (!Sym.isInGot()) {
+ In.Got->addEntry(Sym);
+ uint64_t Off = Sym.getGotOffset();
+ In.Got->Relocations.push_back(
+ {R_ABS, Target->TlsOffsetRel, Off, 0, &Sym});
+ }
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ if (Config->Shared) {
+ if (In.Got->addDynTlsEntry(Sym)) {
+ uint64_t Off = In.Got->getGlobalDynOffset(Sym);
+ In.RelaDyn->addReloc(Target->TlsModuleIndexRel, In.Got, Off, &Sym);
+ // If the symbol is preemptible we need the dynamic linker to write
+ // the offset too.
+ uint64_t OffsetOff = Off + Config->Wordsize;
+ if (Sym.IsPreemptible)
+ In.RelaDyn->addReloc(Target->TlsOffsetRel, In.Got, OffsetOff, &Sym);
+ else
+ In.Got->Relocations.push_back(
+ {R_ABS, Target->TlsOffsetRel, OffsetOff, 0, &Sym});
+ }
+ C.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
+ // depending on the symbol being locally defined or not.
+ if (Sym.IsPreemptible) {
+ C.Relocations.push_back(
+ {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type,
+ Offset, Addend, &Sym});
+ if (!Sym.isInGot()) {
+ In.Got->addEntry(Sym);
+ In.RelaDyn->addReloc(Target->TlsGotRel, In.Got, Sym.getGotOffset(),
+ &Sym);
+ }
+ } else {
+ C.Relocations.push_back(
+ {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type,
+ Offset, Addend, &Sym});
+ }
+ return Target->TlsGdRelaxSkip;
+ }
+ // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
+ // defined.
+ !Config->Shared && !Sym.IsPreemptible) {
+ C.Relocations.push_back({R_RELAX_TLS_IE_TO_LE, Type, Offset, Addend, &Sym});
+ return 1;
+ }
+ if (Expr == R_TLSIE_HINT)
+ return 1;
+ return 0;
+static RelType getMipsPairType(RelType Type, bool IsLocal) {
+ switch (Type) {
+ case R_MIPS_HI16:
+ return R_MIPS_LO16;
+ case R_MIPS_GOT16:
+ // In case of global symbol, the R_MIPS_GOT16 relocation does not
+ // have a pair. Each global symbol has a unique entry in the GOT
+ // and a corresponding instruction with help of the R_MIPS_GOT16
+ // relocation loads an address of the symbol. In case of local
+ // symbol, the R_MIPS_GOT16 relocation creates a GOT entry to hold
+ // the high 16 bits of the symbol's value. A paired R_MIPS_LO16
+ // relocations handle low 16 bits of the address. That allows
+ // to allocate only one GOT entry for every 64 KBytes of local data.
+ return IsLocal ? R_MIPS_LO16 : R_MIPS_NONE;
+ return IsLocal ? R_MICROMIPS_LO16 : R_MIPS_NONE;
+ case R_MIPS_PCHI16:
+ return R_MIPS_PCLO16;
+ case R_MICROMIPS_HI16:
+ return R_MICROMIPS_LO16;
+ default:
+ return R_MIPS_NONE;
+ }
+// True if non-preemptable symbol always has the same value regardless of where
+// the DSO is loaded.
+static bool isAbsolute(const Symbol &Sym) {
+ if (Sym.isUndefWeak())
+ return true;
+ if (const auto *DR = dyn_cast<Defined>(&Sym))
+ return DR->Section == nullptr; // Absolute symbol.
+ return false;
+static bool isAbsoluteValue(const Symbol &Sym) {
+ return isAbsolute(Sym) || Sym.isTls();
+// Returns true if Expr refers a PLT entry.
+static bool needsPlt(RelExpr Expr) {
+// Returns true if Expr refers a GOT entry. Note that this function
+// returns false for TLS variables even though they need GOT, because
+// TLS variables uses GOT differently than the regular variables.
+static bool needsGot(RelExpr Expr) {
+ R_GOT_PLT>(Expr);
+// True if this expression is of the form Sym - X, where X is a position in the
+// file (PC, or GOT for example).
+static bool isRelExpr(RelExpr Expr) {
+// Returns true if a given relocation can be computed at link-time.
+// For instance, we know the offset from a relocation to its target at
+// link-time if the relocation is PC-relative and refers a
+// non-interposable function in the same executable. This function
+// will return true for such relocation.
+// If this function returns false, that means we need to emit a
+// dynamic relocation so that the relocation will be fixed at load-time.
+static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
+ InputSectionBase &S, uint64_t RelOff) {
+ // These expressions always compute a constant
+ return true;
+ // The computation involves output from the ifunc resolver.
+ if (Sym.isGnuIFunc() && Config->ZIfuncnoplt)
+ return false;
+ // These never do, except if the entire file is position dependent or if
+ // only the low bits are used.
+ if (E == R_GOT || E == R_GOT_PLT || E == R_PLT || E == R_TLSDESC)
+ return Target->usesOnlyLowPageBits(Type) || !Config->Pic;
+ if (Sym.IsPreemptible)
+ return false;
+ if (!Config->Pic)
+ return true;
+ // The size of a non preemptible symbol is a constant.
+ if (E == R_SIZE)
+ return true;
+ // For the target and the relocation, we want to know if they are
+ // absolute or relative.
+ bool AbsVal = isAbsoluteValue(Sym);
+ bool RelE = isRelExpr(E);
+ if (AbsVal && !RelE)
+ return true;
+ if (!AbsVal && RelE)
+ return true;
+ if (!AbsVal && !RelE)
+ return Target->usesOnlyLowPageBits(Type);
+ // Relative relocation to an absolute value. This is normally unrepresentable,
+ // but if the relocation refers to a weak undefined symbol, we allow it to
+ // resolve to the image base. This is a little strange, but it allows us to
+ // link function calls to such symbols. Normally such a call will be guarded
+ // with a comparison, which will load a zero from the GOT.
+ // Another special case is MIPS _gp_disp symbol which represents offset
+ // between start of a function and '_gp' value and defined as absolute just
+ // to simplify the code.
+ assert(AbsVal && RelE);
+ if (Sym.isUndefWeak())
+ return true;
+ error("relocation " + toString(Type) + " cannot refer to absolute symbol: " +
+ toString(Sym) + getLocation(S, Sym, RelOff));
+ return true;
+static RelExpr toPlt(RelExpr Expr) {
+ switch (Expr) {
+ case R_PPC_CALL:
+ return R_PPC_CALL_PLT;
+ case R_PC:
+ return R_PLT_PC;
+ case R_AARCH64_PAGE_PC:
+ return R_AARCH64_PLT_PAGE_PC;
+ case R_ABS:
+ return R_PLT;
+ case R_GOT:
+ return R_GOT_PLT;
+ default:
+ return Expr;
+ }
+static RelExpr fromPlt(RelExpr Expr) {
+ // We decided not to use a plt. Optimize a reference to the plt to a
+ // reference to the symbol itself.
+ switch (Expr) {
+ case R_PLT_PC:
+ return R_PC;
+ case R_PPC_CALL_PLT:
+ return R_PPC_CALL;
+ case R_PLT:
+ return R_ABS;
+ default:
+ return Expr;
+ }
+// Returns true if a given shared symbol is in a read-only segment in a DSO.
+template <class ELFT> static bool isReadOnly(SharedSymbol &SS) {
+ typedef typename ELFT::Phdr Elf_Phdr;
+ // Determine if the symbol is read-only by scanning the DSO's program headers.
+ const SharedFile<ELFT> &File = SS.getFile<ELFT>();
+ for (const Elf_Phdr &Phdr : check(File.getObj().program_headers()))
+ if ((Phdr.p_type == ELF::PT_LOAD || Phdr.p_type == ELF::PT_GNU_RELRO) &&
+ !(Phdr.p_flags & ELF::PF_W) && SS.Value >= Phdr.p_vaddr &&
+ SS.Value < Phdr.p_vaddr + Phdr.p_memsz)
+ return true;
+ return false;
+// Returns symbols at the same offset as a given symbol, including SS itself.
+// If two or more symbols are at the same offset, and at least one of
+// them are copied by a copy relocation, all of them need to be copied.
+// Otherwise, they would refer to different places at runtime.
+template <class ELFT>
+static SmallSet<SharedSymbol *, 4> getSymbolsAt(SharedSymbol &SS) {
+ typedef typename ELFT::Sym Elf_Sym;
+ SharedFile<ELFT> &File = SS.getFile<ELFT>();
+ SmallSet<SharedSymbol *, 4> Ret;
+ for (const Elf_Sym &S : File.getGlobalELFSyms()) {
+ if (S.st_shndx == SHN_UNDEF || S.st_shndx == SHN_ABS ||
+ S.getType() == STT_TLS || S.st_value != SS.Value)
+ continue;
+ StringRef Name = check(S.getName(File.getStringTable()));
+ Symbol *Sym = Symtab->find(Name);
+ if (auto *Alias = dyn_cast_or_null<SharedSymbol>(Sym))
+ Ret.insert(Alias);
+ }
+ return Ret;
+// When a symbol is copy relocated or we create a canonical plt entry, it is
+// effectively a defined symbol. In the case of copy relocation the symbol is
+// in .bss and in the case of a canonical plt entry it is in .plt. This function
+// replaces the existing symbol with a Defined pointing to the appropriate
+// location.
+static void replaceWithDefined(Symbol &Sym, SectionBase *Sec, uint64_t Value,
+ uint64_t Size) {
+ Symbol Old = Sym;
+ replaceSymbol<Defined>(&Sym, Sym.File, Sym.getName(), Sym.Binding,
+ Sym.StOther, Sym.Type, Value, Size, Sec);
+ Sym.PltIndex = Old.PltIndex;
+ Sym.GotIndex = Old.GotIndex;
+ Sym.VerdefIndex = Old.VerdefIndex;
+ Sym.PPC64BranchltIndex = Old.PPC64BranchltIndex;
+ Sym.IsPreemptible = true;
+ Sym.ExportDynamic = true;
+ Sym.IsUsedInRegularObj = true;
+ Sym.Used = true;
+// Reserve space in .bss or .bss.rel.ro for copy relocation.
+// The copy relocation is pretty much a hack. If you use a copy relocation
+// in your program, not only the symbol name but the symbol's size, RW/RO
+// bit and alignment become part of the ABI. In addition to that, if the
+// symbol has aliases, the aliases become part of the ABI. That's subtle,
+// but if you violate that implicit ABI, that can cause very counter-
+// intuitive consequences.
+// So, what is the copy relocation? It's for linking non-position
+// independent code to DSOs. In an ideal world, all references to data
+// exported by DSOs should go indirectly through GOT. But if object files
+// are compiled as non-PIC, all data references are direct. There is no
+// way for the linker to transform the code to use GOT, as machine
+// instructions are already set in stone in object files. This is where
+// the copy relocation takes a role.
+// A copy relocation instructs the dynamic linker to copy data from a DSO
+// to a specified address (which is usually in .bss) at load-time. If the
+// static linker (that's us) finds a direct data reference to a DSO
+// symbol, it creates a copy relocation, so that the symbol can be
+// resolved as if it were in .bss rather than in a DSO.
+// As you can see in this function, we create a copy relocation for the
+// dynamic linker, and the relocation contains not only symbol name but
+// various other informtion about the symbol. So, such attributes become a
+// part of the ABI.
+// Note for application developers: I can give you a piece of advice if
+// you are writing a shared library. You probably should export only
+// functions from your library. You shouldn't export variables.
+// As an example what can happen when you export variables without knowing
+// the semantics of copy relocations, assume that you have an exported
+// variable of type T. It is an ABI-breaking change to add new members at
+// end of T even though doing that doesn't change the layout of the
+// existing members. That's because the space for the new members are not
+// reserved in .bss unless you recompile the main program. That means they
+// are likely to overlap with other data that happens to be laid out next
+// to the variable in .bss. This kind of issue is sometimes very hard to
+// debug. What's a solution? Instead of exporting a varaible V from a DSO,
+// define an accessor getV().
+template <class ELFT> static void addCopyRelSymbol(SharedSymbol &SS) {
+ // Copy relocation against zero-sized symbol doesn't make sense.
+ uint64_t SymSize = SS.getSize();
+ if (SymSize == 0 || SS.Alignment == 0)
+ fatal("cannot create a copy relocation for symbol " + toString(SS));
+ // See if this symbol is in a read-only segment. If so, preserve the symbol's
+ // memory protection by reserving space in the .bss.rel.ro section.
+ bool IsReadOnly = isReadOnly<ELFT>(SS);
+ BssSection *Sec = make<BssSection>(IsReadOnly ? ".bss.rel.ro" : ".bss",
+ SymSize, SS.Alignment);
+ if (IsReadOnly)
+ In.BssRelRo->getParent()->addSection(Sec);
+ else
+ In.Bss->getParent()->addSection(Sec);
+ // Look through the DSO's dynamic symbol table for aliases and create a
+ // dynamic symbol for each one. This causes the copy relocation to correctly
+ // interpose any aliases.
+ for (SharedSymbol *Sym : getSymbolsAt<ELFT>(SS))
+ replaceWithDefined(*Sym, Sec, 0, Sym->Size);
+ In.RelaDyn->addReloc(Target->CopyRel, Sec, 0, &SS);
+// MIPS has an odd notion of "paired" relocations to calculate addends.
+// For example, if a relocation is of R_MIPS_HI16, there must be a
+// R_MIPS_LO16 relocation after that, and an addend is calculated using
+// the two relocations.
+template <class ELFT, class RelTy>
+static int64_t computeMipsAddend(const RelTy &Rel, const RelTy *End,
+ InputSectionBase &Sec, RelExpr Expr,
+ bool IsLocal) {
+ if (Expr == R_MIPS_GOTREL && IsLocal)
+ return Sec.getFile<ELFT>()->MipsGp0;
+ // The ABI says that the paired relocation is used only for REL.
+ // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+ if (RelTy::IsRela)
+ return 0;
+ RelType Type = Rel.getType(Config->IsMips64EL);
+ uint32_t PairTy = getMipsPairType(Type, IsLocal);
+ if (PairTy == R_MIPS_NONE)
+ return 0;
+ const uint8_t *Buf = Sec.data().data();
+ uint32_t SymIndex = Rel.getSymbol(Config->IsMips64EL);
+ // To make things worse, paired relocations might not be contiguous in
+ // the relocation table, so we need to do linear search. *sigh*
+ for (const RelTy *RI = &Rel; RI != End; ++RI)
+ if (RI->getType(Config->IsMips64EL) == PairTy &&
+ RI->getSymbol(Config->IsMips64EL) == SymIndex)
+ return Target->getImplicitAddend(Buf + RI->r_offset, PairTy);
+ warn("can't find matching " + toString(PairTy) + " relocation for " +
+ toString(Type));
+ return 0;
+// Returns an addend of a given relocation. If it is RELA, an addend
+// is in a relocation itself. If it is REL, we need to read it from an
+// input section.
+template <class ELFT, class RelTy>
+static int64_t computeAddend(const RelTy &Rel, const RelTy *End,
+ InputSectionBase &Sec, RelExpr Expr,
+ bool IsLocal) {
+ int64_t Addend;
+ RelType Type = Rel.getType(Config->IsMips64EL);
+ if (RelTy::IsRela) {
+ Addend = getAddend<ELFT>(Rel);
+ } else {
+ const uint8_t *Buf = Sec.data().data();
+ Addend = Target->getImplicitAddend(Buf + Rel.r_offset, Type);
+ }
+ if (Config->EMachine == EM_PPC64 && Config->Pic && Type == R_PPC64_TOC)
+ Addend += getPPC64TocBase();
+ if (Config->EMachine == EM_MIPS)
+ Addend += computeMipsAddend<ELFT>(Rel, End, Sec, Expr, IsLocal);
+ return Addend;
+// Report an undefined symbol if necessary.
+// Returns true if this function printed out an error message.
+static bool maybeReportUndefined(Symbol &Sym, InputSectionBase &Sec,
+ uint64_t Offset) {
+ if (Sym.isLocal() || !Sym.isUndefined() || Sym.isWeak())
+ return false;
+ bool CanBeExternal =
+ Sym.computeBinding() != STB_LOCAL && Sym.Visibility == STV_DEFAULT;
+ if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore && CanBeExternal)
+ return false;
+ std::string Msg =
+ "undefined symbol: " + toString(Sym) + "\n>>> referenced by ";
+ std::string Src = Sec.getSrcMsg(Sym, Offset);
+ if (!Src.empty())
+ Msg += Src + "\n>>> ";
+ Msg += Sec.getObjMsg(Offset);
+ if (Sym.getName().startswith("_ZTV"))
+ Msg += "\nthe vtable symbol may be undefined because the class is missing "
+ "its key function (see https://lld.llvm.org/missingkeyfunction)";
+ if ((Config->UnresolvedSymbols == UnresolvedPolicy::Warn && CanBeExternal) ||
+ Config->NoinhibitExec) {
+ warn(Msg);
+ return false;
+ }
+ error(Msg);
+ return true;
+// MIPS N32 ABI treats series of successive relocations with the same offset
+// as a single relocation. The similar approach used by N64 ABI, but this ABI
+// packs all relocations into the single relocation record. Here we emulate
+// this for the N32 ABI. Iterate over relocation with the same offset and put
+// theirs types into the single bit-set.
+template <class RelTy> static RelType getMipsN32RelType(RelTy *&Rel, RelTy *End) {
+ RelType Type = 0;
+ uint64_t Offset = Rel->r_offset;
+ int N = 0;
+ while (Rel != End && Rel->r_offset == Offset)
+ Type |= (Rel++)->getType(Config->IsMips64EL) << (8 * N++);
+ return Type;
+// .eh_frame sections are mergeable input sections, so their input
+// offsets are not linearly mapped to output section. For each input
+// offset, we need to find a section piece containing the offset and
+// add the piece's base address to the input offset to compute the
+// output offset. That isn't cheap.
+// This class is to speed up the offset computation. When we process
+// relocations, we access offsets in the monotonically increasing
+// order. So we can optimize for that access pattern.
+// For sections other than .eh_frame, this class doesn't do anything.
+namespace {
+class OffsetGetter {
+ explicit OffsetGetter(InputSectionBase &Sec) {
+ if (auto *Eh = dyn_cast<EhInputSection>(&Sec))
+ Pieces = Eh->Pieces;
+ }
+ // Translates offsets in input sections to offsets in output sections.
+ // Given offset must increase monotonically. We assume that Piece is
+ // sorted by InputOff.
+ uint64_t get(uint64_t Off) {
+ if (Pieces.empty())
+ return Off;
+ while (I != Pieces.size() && Pieces[I].InputOff + Pieces[I].Size <= Off)
+ ++I;
+ if (I == Pieces.size())
+ fatal(".eh_frame: relocation is not in any piece");
+ // Pieces must be contiguous, so there must be no holes in between.
+ assert(Pieces[I].InputOff <= Off && "Relocation not in any piece");
+ // Offset -1 means that the piece is dead (i.e. garbage collected).
+ if (Pieces[I].OutputOff == -1)
+ return -1;
+ return Pieces[I].OutputOff + Off - Pieces[I].InputOff;
+ }
+ ArrayRef<EhSectionPiece> Pieces;
+ size_t I = 0;
+} // namespace
+static void addRelativeReloc(InputSectionBase *IS, uint64_t OffsetInSec,
+ Symbol *Sym, int64_t Addend, RelExpr Expr,
+ RelType Type) {
+ // Add a relative relocation. If RelrDyn section is enabled, and the
+ // relocation offset is guaranteed to be even, add the relocation to
+ // the RelrDyn section, otherwise add it to the RelaDyn section.
+ // RelrDyn sections don't support odd offsets. Also, RelrDyn sections
+ // don't store the addend values, so we must write it to the relocated
+ // address.
+ if (In.RelrDyn && IS->Alignment >= 2 && OffsetInSec % 2 == 0) {
+ IS->Relocations.push_back({Expr, Type, OffsetInSec, Addend, Sym});
+ In.RelrDyn->Relocs.push_back({IS, OffsetInSec});
+ return;
+ }
+ In.RelaDyn->addReloc(Target->RelativeRel, IS, OffsetInSec, Sym, Addend, Expr,
+ Type);
+template <class ELFT, class GotPltSection>
+static void addPltEntry(PltSection *Plt, GotPltSection *GotPlt,
+ RelocationBaseSection *Rel, RelType Type, Symbol &Sym) {
+ Plt->addEntry<ELFT>(Sym);
+ GotPlt->addEntry(Sym);
+ Rel->addReloc(
+ {Type, GotPlt, Sym.getGotPltOffset(), !Sym.IsPreemptible, &Sym, 0});
+template <class ELFT> static void addGotEntry(Symbol &Sym) {
+ In.Got->addEntry(Sym);
+ RelExpr Expr;
+ if (Sym.isTls())
+ Expr = R_TLS;
+ else if (Sym.isGnuIFunc())
+ Expr = R_PLT;
+ else
+ Expr = R_ABS;
+ uint64_t Off = Sym.getGotOffset();
+ // If a GOT slot value can be calculated at link-time, which is now,
+ // we can just fill that out.
+ //
+ // (We don't actually write a value to a GOT slot right now, but we
+ // add a static relocation to a Relocations vector so that
+ // InputSection::relocate will do the work for us. We may be able
+ // to just write a value now, but it is a TODO.)
+ bool IsLinkTimeConstant =
+ !Sym.IsPreemptible && (!Config->Pic || isAbsolute(Sym));
+ if (IsLinkTimeConstant) {
+ In.Got->Relocations.push_back({Expr, Target->GotRel, Off, 0, &Sym});
+ return;
+ }
+ // Otherwise, we emit a dynamic relocation to .rel[a].dyn so that
+ // the GOT slot will be fixed at load-time.
+ if (!Sym.isTls() && !Sym.IsPreemptible && Config->Pic && !isAbsolute(Sym)) {
+ addRelativeReloc(In.Got, Off, &Sym, 0, R_ABS, Target->GotRel);
+ return;
+ }
+ In.RelaDyn->addReloc(Sym.isTls() ? Target->TlsGotRel : Target->GotRel, In.Got,
+ Off, &Sym, 0, Sym.IsPreemptible ? R_ADDEND : R_ABS,
+ Target->GotRel);
+// Return true if we can define a symbol in the executable that
+// contains the value/function of a symbol defined in a shared
+// library.
+static bool canDefineSymbolInExecutable(Symbol &Sym) {
+ // If the symbol has default visibility the symbol defined in the
+ // executable will preempt it.
+ // Note that we want the visibility of the shared symbol itself, not
+ // the visibility of the symbol in the output file we are producing. That is
+ // why we use Sym.StOther.
+ if ((Sym.StOther & 0x3) == STV_DEFAULT)
+ return true;
+ // If we are allowed to break address equality of functions, defining
+ // a plt entry will allow the program to call the function in the
+ // .so, but the .so and the executable will no agree on the address
+ // of the function. Similar logic for objects.
+ return ((Sym.isFunc() && Config->IgnoreFunctionAddressEquality) ||
+ (Sym.isObject() && Config->IgnoreDataAddressEquality));
+// The reason we have to do this early scan is as follows
+// * To mmap the output file, we need to know the size
+// * For that, we need to know how many dynamic relocs we will have.
+// It might be possible to avoid this by outputting the file with write:
+// * Write the allocated output sections, computing addresses.
+// * Apply relocations, recording which ones require a dynamic reloc.
+// * Write the dynamic relocations.
+// * Write the rest of the file.
+// This would have some drawbacks. For example, we would only know if .rela.dyn
+// is needed after applying relocations. If it is, it will go after rw and rx
+// sections. Given that it is ro, we will need an extra PT_LOAD. This
+// complicates things for the dynamic linker and means we would have to reserve
+// space for the extra PT_LOAD even if we end up not using it.
+template <class ELFT, class RelTy>
+static void processRelocAux(InputSectionBase &Sec, RelExpr Expr, RelType Type,
+ uint64_t Offset, Symbol &Sym, const RelTy &Rel,
+ int64_t Addend) {
+ if (isStaticLinkTimeConstant(Expr, Type, Sym, Sec, Offset)) {
+ Sec.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return;
+ }
+ if (Sym.isGnuIFunc() && Config->ZIfuncnoplt) {
+ In.RelaDyn->addReloc(Type, &Sec, Offset, &Sym, Addend, R_ADDEND, Type);
+ return;
+ }
+ bool CanWrite = (Sec.Flags & SHF_WRITE) || !Config->ZText;
+ if (CanWrite) {
+ // R_GOT refers to a position in the got, even if the symbol is preemptible.
+ bool IsPreemptibleValue = Sym.IsPreemptible && Expr != R_GOT;
+ if (!IsPreemptibleValue) {
+ addRelativeReloc(&Sec, Offset, &Sym, Addend, Expr, Type);
+ return;
+ } else if (RelType Rel = Target->getDynRel(Type)) {
+ In.RelaDyn->addReloc(Rel, &Sec, Offset, &Sym, Addend, R_ADDEND, Type);
+ // MIPS ABI turns using of GOT and dynamic relocations inside out.
+ // While regular ABI uses dynamic relocations to fill up GOT entries
+ // MIPS ABI requires dynamic linker to fills up GOT entries using
+ // specially sorted dynamic symbol table. This affects even dynamic
+ // relocations against symbols which do not require GOT entries
+ // creation explicitly, i.e. do not have any GOT-relocations. So if
+ // a preemptible symbol has a dynamic relocation we anyway have
+ // to create a GOT entry for it.
+ // If a non-preemptible symbol has a dynamic relocation against it,
+ // dynamic linker takes it st_value, adds offset and writes down
+ // result of the dynamic relocation. In case of preemptible symbol
+ // dynamic linker performs symbol resolution, writes the symbol value
+ // to the GOT entry and reads the GOT entry when it needs to perform
+ // a dynamic relocation.
+ // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19
+ if (Config->EMachine == EM_MIPS)
+ In.MipsGot->addEntry(*Sec.File, Sym, Addend, Expr);
+ return;
+ }
+ }
+ // If the relocation is to a weak undef, and we are producing
+ // executable, give up on it and produce a non preemptible 0.
+ if (!Config->Shared && Sym.isUndefWeak()) {
+ Sec.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return;
+ }
+ if (!CanWrite && (Config->Pic && !isRelExpr(Expr))) {
+ error(
+ "can't create dynamic relocation " + toString(Type) + " against " +
+ (Sym.getName().empty() ? "local symbol" : "symbol: " + toString(Sym)) +
+ " in readonly segment; recompile object files with -fPIC "
+ "or pass '-Wl,-z,notext' to allow text relocations in the output" +
+ getLocation(Sec, Sym, Offset));
+ return;
+ }
+ // Copy relocations are only possible if we are creating an executable.
+ if (Config->Shared) {
+ errorOrWarn("relocation " + toString(Type) +
+ " cannot be used against symbol " + toString(Sym) +
+ "; recompile with -fPIC" + getLocation(Sec, Sym, Offset));
+ return;
+ }
+ // If the symbol is undefined we already reported any relevant errors.
+ if (Sym.isUndefined())
+ return;
+ if (!canDefineSymbolInExecutable(Sym)) {
+ error("cannot preempt symbol: " + toString(Sym) +
+ getLocation(Sec, Sym, Offset));
+ return;
+ }
+ if (Sym.isObject()) {
+ // Produce a copy relocation.
+ if (auto *SS = dyn_cast<SharedSymbol>(&Sym)) {
+ if (!Config->ZCopyreloc)
+ error("unresolvable relocation " + toString(Type) +
+ " against symbol '" + toString(*SS) +
+ "'; recompile with -fPIC or remove '-z nocopyreloc'" +
+ getLocation(Sec, Sym, Offset));
+ addCopyRelSymbol<ELFT>(*SS);
+ }
+ Sec.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return;
+ }
+ if (Sym.isFunc()) {
+ // This handles a non PIC program call to function in a shared library. In
+ // an ideal world, we could just report an error saying the relocation can
+ // overflow at runtime. In the real world with glibc, crt1.o has a
+ // R_X86_64_PC32 pointing to libc.so.
+ //
+ // The general idea on how to handle such cases is to create a PLT entry and
+ // use that as the function value.
+ //
+ // For the static linking part, we just return a plt expr and everything
+ // else will use the PLT entry as the address.
+ //
+ // The remaining problem is making sure pointer equality still works. We
+ // need the help of the dynamic linker for that. We let it know that we have
+ // a direct reference to a so symbol by creating an undefined symbol with a
+ // non zero st_value. Seeing that, the dynamic linker resolves the symbol to
+ // the value of the symbol we created. This is true even for got entries, so
+ // pointer equality is maintained. To avoid an infinite loop, the only entry
+ // that points to the real function is a dedicated got entry used by the
+ // plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT,
+ // R_386_JMP_SLOT, etc).
+ // For position independent executable on i386, the plt entry requires ebx
+ // to be set. This causes two problems:
+ // * If some code has a direct reference to a function, it was probably
+ // compiled without -fPIE/-fPIC and doesn't maintain ebx.
+ // * If a library definition gets preempted to the executable, it will have
+ // the wrong ebx value.
+ if (Config->Pie && Config->EMachine == EM_386)
+ errorOrWarn("symbol '" + toString(Sym) +
+ "' cannot be preempted; recompile with -fPIE" +
+ getLocation(Sec, Sym, Offset));
+ if (!Sym.isInPlt())
+ addPltEntry<ELFT>(In.Plt, In.GotPlt, In.RelaPlt, Target->PltRel, Sym);
+ if (!Sym.isDefined())
+ replaceWithDefined(Sym, In.Plt, getPltEntryOffset(Sym.PltIndex), 0);
+ Sym.NeedsPltAddr = true;
+ Sec.Relocations.push_back({Expr, Type, Offset, Addend, &Sym});
+ return;
+ }
+ errorOrWarn("symbol '" + toString(Sym) + "' has no type" +
+ getLocation(Sec, Sym, Offset));
+template <class ELFT, class RelTy>
+static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
+ RelTy *End) {
+ const RelTy &Rel = *I;
+ Symbol &Sym = Sec.getFile<ELFT>()->getRelocTargetSym(Rel);
+ RelType Type;
+ // Deal with MIPS oddity.
+ if (Config->MipsN32Abi) {
+ Type = getMipsN32RelType(I, End);
+ } else {
+ Type = Rel.getType(Config->IsMips64EL);
+ ++I;
+ }
+ // Get an offset in an output section this relocation is applied to.
+ uint64_t Offset = GetOffset.get(Rel.r_offset);
+ if (Offset == uint64_t(-1))
+ return;
+ // Skip if the target symbol is an erroneous undefined symbol.
+ if (maybeReportUndefined(Sym, Sec, Rel.r_offset))
+ return;
+ const uint8_t *RelocatedAddr = Sec.data().begin() + Rel.r_offset;
+ RelExpr Expr = Target->getRelExpr(Type, Sym, RelocatedAddr);
+ // Ignore "hint" relocations because they are only markers for relaxation.
+ if (isRelExprOneOf<R_HINT, R_NONE>(Expr))
+ return;
+ // Strenghten or relax relocations.
+ //
+ // GNU ifunc symbols must be accessed via PLT because their addresses
+ // are determined by runtime.
+ //
+ // On the other hand, if we know that a PLT entry will be resolved within
+ // the same ELF module, we can skip PLT access and directly jump to the
+ // destination function. For example, if we are linking a main exectuable,
+ // all dynamic symbols that can be resolved within the executable will
+ // actually be resolved that way at runtime, because the main exectuable
+ // is always at the beginning of a search list. We can leverage that fact.
+ if (Sym.isGnuIFunc() && !Config->ZIfuncnoplt) {
+ if (!Config->ZText && Config->WarnIfuncTextrel) {
+ warn("using ifunc symbols when text relocations are allowed may produce "
+ "a binary that will segfault, if the object file is linked with "
+ "old version of glibc (glibc 2.28 and earlier). If this applies to "
+ "you, consider recompiling the object files without -fPIC and "
+ "without -Wl,-z,notext option. Use -no-warn-ifunc-textrel to "
+ "turn off this warning." +
+ getLocation(Sec, Sym, Offset));
+ }
+ Expr = toPlt(Expr);
+ } else if (!Sym.IsPreemptible && Expr == R_GOT_PC && !isAbsoluteValue(Sym)) {
+ Expr = Target->adjustRelaxExpr(Type, RelocatedAddr, Expr);
+ } else if (!Sym.IsPreemptible) {
+ Expr = fromPlt(Expr);
+ }
+ // This relocation does not require got entry, but it is relative to got and
+ // needs it to be created. Here we request for that.
+ In.Got->HasGotOffRel = true;
+ // Read an addend.
+ int64_t Addend = computeAddend<ELFT>(Rel, End, Sec, Expr, Sym.isLocal());
+ // Process some TLS relocations, including relaxing TLS relocations.
+ // Note that this function does not handle all TLS relocations.
+ if (unsigned Processed =
+ handleTlsRelocation<ELFT>(Type, Sym, Sec, Offset, Addend, Expr)) {
+ I += (Processed - 1);
+ return;
+ }
+ // If a relocation needs PLT, we create PLT and GOTPLT slots for the symbol.
+ if (needsPlt(Expr) && !Sym.isInPlt()) {
+ if (Sym.isGnuIFunc() && !Sym.IsPreemptible)
+ addPltEntry<ELFT>(In.Iplt, In.IgotPlt, In.RelaIplt, Target->IRelativeRel,
+ Sym);
+ else
+ addPltEntry<ELFT>(In.Plt, In.GotPlt, In.RelaPlt, Target->PltRel, Sym);
+ }
+ // Create a GOT slot if a relocation needs GOT.
+ if (needsGot(Expr)) {
+ if (Config->EMachine == EM_MIPS) {
+ // MIPS ABI has special rules to process GOT entries and doesn't
+ // require relocation entries for them. A special case is TLS
+ // relocations. In that case dynamic loader applies dynamic
+ // relocations to initialize TLS GOT entries.
+ // See "Global Offset Table" in Chapter 5 in the following document
+ // for detailed description:
+ // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+ In.MipsGot->addEntry(*Sec.File, Sym, Addend, Expr);
+ } else if (!Sym.isInGot()) {
+ addGotEntry<ELFT>(Sym);
+ }
+ }
+ processRelocAux<ELFT>(Sec, Expr, Type, Offset, Sym, Rel, Addend);
+template <class ELFT, class RelTy>
+static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
+ OffsetGetter GetOffset(Sec);
+ // Not all relocations end up in Sec.Relocations, but a lot do.
+ Sec.Relocations.reserve(Rels.size());
+ for (auto I = Rels.begin(), End = Rels.end(); I != End;)
+ scanReloc<ELFT>(Sec, GetOffset, I, End);
+ // Sort relocations by offset to binary search for R_RISCV_PCREL_HI20
+ if (Config->EMachine == EM_RISCV)
+ std::stable_sort(Sec.Relocations.begin(), Sec.Relocations.end(),
+ RelocationOffsetComparator{});
+template <class ELFT> void elf::scanRelocations(InputSectionBase &S) {
+ if (S.AreRelocsRela)
+ scanRelocs<ELFT>(S, S.relas<ELFT>());
+ else
+ scanRelocs<ELFT>(S, S.rels<ELFT>());
+static bool mergeCmp(const InputSection *A, const InputSection *B) {
+ // std::merge requires a strict weak ordering.
+ if (A->OutSecOff < B->OutSecOff)
+ return true;
+ if (A->OutSecOff == B->OutSecOff) {
+ auto *TA = dyn_cast<ThunkSection>(A);
+ auto *TB = dyn_cast<ThunkSection>(B);
+ // Check if Thunk is immediately before any specific Target
+ // InputSection for example Mips LA25 Thunks.
+ if (TA && TA->getTargetInputSection() == B)
+ return true;
+ // Place Thunk Sections without specific targets before
+ // non-Thunk Sections.
+ if (TA && !TB && !TA->getTargetInputSection())
+ return true;
+ }
+ return false;
+// Call Fn on every executable InputSection accessed via the linker script
+// InputSectionDescription::Sections.
+static void forEachInputSectionDescription(
+ ArrayRef<OutputSection *> OutputSections,
+ llvm::function_ref<void(OutputSection *, InputSectionDescription *)> Fn) {
+ for (OutputSection *OS : OutputSections) {
+ if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR))
+ continue;
+ for (BaseCommand *BC : OS->SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(BC))
+ Fn(OS, ISD);
+ }
+// Thunk Implementation
+// Thunks (sometimes called stubs, veneers or branch islands) are small pieces
+// of code that the linker inserts inbetween a caller and a callee. The thunks
+// are added at link time rather than compile time as the decision on whether
+// a thunk is needed, such as the caller and callee being out of range, can only
+// be made at link time.
+// It is straightforward to tell given the current state of the program when a
+// thunk is needed for a particular call. The more difficult part is that
+// the thunk needs to be placed in the program such that the caller can reach
+// the thunk and the thunk can reach the callee; furthermore, adding thunks to
+// the program alters addresses, which can mean more thunks etc.
+// In lld we have a synthetic ThunkSection that can hold many Thunks.
+// The decision to have a ThunkSection act as a container means that we can
+// more easily handle the most common case of a single block of contiguous
+// Thunks by inserting just a single ThunkSection.
+// The implementation of Thunks in lld is split across these areas
+// Relocations.cpp : Framework for creating and placing thunks
+// Thunks.cpp : The code generated for each supported thunk
+// Target.cpp : Target specific hooks that the framework uses to decide when
+// a thunk is used
+// Synthetic.cpp : Implementation of ThunkSection
+// Writer.cpp : Iteratively call framework until no more Thunks added
+// Thunk placement requirements:
+// Mips LA25 thunks. These must be placed immediately before the callee section
+// We can assume that the caller is in range of the Thunk. These are modelled
+// by Thunks that return the section they must precede with
+// getTargetInputSection().
+// ARM interworking and range extension thunks. These thunks must be placed
+// within range of the caller. All implemented ARM thunks can always reach the
+// callee as they use an indirect jump via a register that has no range
+// restrictions.
+// Thunk placement algorithm:
+// For Mips LA25 ThunkSections; the placement is explicit, it has to be before
+// getTargetInputSection().
+// For thunks that must be placed within range of the caller there are many
+// possible choices given that the maximum range from the caller is usually
+// much larger than the average InputSection size. Desirable properties include:
+// - Maximize reuse of thunks by multiple callers
+// - Minimize number of ThunkSections to simplify insertion
+// - Handle impact of already added Thunks on addresses
+// - Simple to understand and implement
+// In lld for the first pass, we pre-create one or more ThunkSections per
+// InputSectionDescription at Target specific intervals. A ThunkSection is
+// placed so that the estimated end of the ThunkSection is within range of the
+// start of the InputSectionDescription or the previous ThunkSection. For
+// example:
+// InputSectionDescription
+// Section 0
+// ...
+// Section N
+// ThunkSection 0
+// Section N + 1
+// ...
+// Section N + K
+// Thunk Section 1
+// The intention is that we can add a Thunk to a ThunkSection that is well
+// spaced enough to service a number of callers without having to do a lot
+// of work. An important principle is that it is not an error if a Thunk cannot
+// be placed in a pre-created ThunkSection; when this happens we create a new
+// ThunkSection placed next to the caller. This allows us to handle the vast
+// majority of thunks simply, but also handle rare cases where the branch range
+// is smaller than the target specific spacing.
+// The algorithm is expected to create all the thunks that are needed in a
+// single pass, with a small number of programs needing a second pass due to
+// the insertion of thunks in the first pass increasing the offset between
+// callers and callees that were only just in range.
+// A consequence of allowing new ThunkSections to be created outside of the
+// pre-created ThunkSections is that in rare cases calls to Thunks that were in
+// range in pass K, are out of range in some pass > K due to the insertion of
+// more Thunks in between the caller and callee. When this happens we retarget
+// the relocation back to the original target and create another Thunk.
+// Remove ThunkSections that are empty, this should only be the initial set
+// precreated on pass 0.
+// Insert the Thunks for OutputSection OS into their designated place
+// in the Sections vector, and recalculate the InputSection output section
+// offsets.
+// This may invalidate any output section offsets stored outside of InputSection
+void ThunkCreator::mergeThunks(ArrayRef<OutputSection *> OutputSections) {
+ forEachInputSectionDescription(
+ OutputSections, [&](OutputSection *OS, InputSectionDescription *ISD) {
+ if (ISD->ThunkSections.empty())
+ return;
+ // Remove any zero sized precreated Thunks.
+ llvm::erase_if(ISD->ThunkSections,
+ [](const std::pair<ThunkSection *, uint32_t> &TS) {
+ return TS.first->getSize() == 0;
+ });
+ // ISD->ThunkSections contains all created ThunkSections, including
+ // those inserted in previous passes. Extract the Thunks created this
+ // pass and order them in ascending OutSecOff.
+ std::vector<ThunkSection *> NewThunks;
+ for (const std::pair<ThunkSection *, uint32_t> TS : ISD->ThunkSections)
+ if (TS.second == Pass)
+ NewThunks.push_back(TS.first);
+ std::stable_sort(NewThunks.begin(), NewThunks.end(),
+ [](const ThunkSection *A, const ThunkSection *B) {
+ return A->OutSecOff < B->OutSecOff;
+ });
+ // Merge sorted vectors of Thunks and InputSections by OutSecOff
+ std::vector<InputSection *> Tmp;
+ Tmp.reserve(ISD->Sections.size() + NewThunks.size());
+ std::merge(ISD->Sections.begin(), ISD->Sections.end(),
+ NewThunks.begin(), NewThunks.end(), std::back_inserter(Tmp),
+ mergeCmp);
+ ISD->Sections = std::move(Tmp);
+ });
+// Find or create a ThunkSection within the InputSectionDescription (ISD) that
+// is in range of Src. An ISD maps to a range of InputSections described by a
+// linker script section pattern such as { .text .text.* }.
+ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *OS, InputSection *IS,
+ InputSectionDescription *ISD,
+ uint32_t Type, uint64_t Src) {
+ for (std::pair<ThunkSection *, uint32_t> TP : ISD->ThunkSections) {
+ ThunkSection *TS = TP.first;
+ uint64_t TSBase = OS->Addr + TS->OutSecOff;
+ uint64_t TSLimit = TSBase + TS->getSize();
+ if (Target->inBranchRange(Type, Src, (Src > TSLimit) ? TSBase : TSLimit))
+ return TS;
+ }
+ // No suitable ThunkSection exists. This can happen when there is a branch
+ // with lower range than the ThunkSection spacing or when there are too
+ // many Thunks. Create a new ThunkSection as close to the InputSection as
+ // possible. Error if InputSection is so large we cannot place ThunkSection
+ // anywhere in Range.
+ uint64_t ThunkSecOff = IS->OutSecOff;
+ if (!Target->inBranchRange(Type, Src, OS->Addr + ThunkSecOff)) {
+ ThunkSecOff = IS->OutSecOff + IS->getSize();
+ if (!Target->inBranchRange(Type, Src, OS->Addr + ThunkSecOff))
+ fatal("InputSection too large for range extension thunk " +
+ IS->getObjMsg(Src - (OS->Addr + IS->OutSecOff)));
+ }
+ return addThunkSection(OS, ISD, ThunkSecOff);
+// Add a Thunk that needs to be placed in a ThunkSection that immediately
+// precedes its Target.
+ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS) {
+ ThunkSection *TS = ThunkedSections.lookup(IS);
+ if (TS)
+ return TS;
+ // Find InputSectionRange within Target Output Section (TOS) that the
+ // InputSection (IS) that we need to precede is in.
+ OutputSection *TOS = IS->getParent();
+ for (BaseCommand *BC : TOS->SectionCommands) {
+ auto *ISD = dyn_cast<InputSectionDescription>(BC);
+ if (!ISD || ISD->Sections.empty())
+ continue;
+ InputSection *First = ISD->Sections.front();
+ InputSection *Last = ISD->Sections.back();
+ if (IS->OutSecOff < First->OutSecOff || Last->OutSecOff < IS->OutSecOff)
+ continue;
+ TS = addThunkSection(TOS, ISD, IS->OutSecOff);
+ ThunkedSections[IS] = TS;
+ return TS;
+ }
+ return nullptr;
+// Create one or more ThunkSections per OS that can be used to place Thunks.
+// We attempt to place the ThunkSections using the following desirable
+// properties:
+// - Within range of the maximum number of callers
+// - Minimise the number of ThunkSections
+// We follow a simple but conservative heuristic to place ThunkSections at
+// offsets that are multiples of a Target specific branch range.
+// For an InputSectionDescription that is smaller than the range, a single
+// ThunkSection at the end of the range will do.
+// For an InputSectionDescription that is more than twice the size of the range,
+// we place the last ThunkSection at range bytes from the end of the
+// InputSectionDescription in order to increase the likelihood that the
+// distance from a thunk to its target will be sufficiently small to
+// allow for the creation of a short thunk.
+void ThunkCreator::createInitialThunkSections(
+ ArrayRef<OutputSection *> OutputSections) {
+ uint32_t ThunkSectionSpacing = Target->getThunkSectionSpacing();
+ forEachInputSectionDescription(
+ OutputSections, [&](OutputSection *OS, InputSectionDescription *ISD) {
+ if (ISD->Sections.empty())
+ return;
+ uint32_t ISDBegin = ISD->Sections.front()->OutSecOff;
+ uint32_t ISDEnd =
+ ISD->Sections.back()->OutSecOff + ISD->Sections.back()->getSize();
+ uint32_t LastThunkLowerBound = -1;
+ if (ISDEnd - ISDBegin > ThunkSectionSpacing * 2)
+ LastThunkLowerBound = ISDEnd - ThunkSectionSpacing;
+ uint32_t ISLimit;
+ uint32_t PrevISLimit = ISDBegin;
+ uint32_t ThunkUpperBound = ISDBegin + ThunkSectionSpacing;
+ for (const InputSection *IS : ISD->Sections) {
+ ISLimit = IS->OutSecOff + IS->getSize();
+ if (ISLimit > ThunkUpperBound) {
+ addThunkSection(OS, ISD, PrevISLimit);
+ ThunkUpperBound = PrevISLimit + ThunkSectionSpacing;
+ }
+ if (ISLimit > LastThunkLowerBound)
+ break;
+ PrevISLimit = ISLimit;
+ }
+ addThunkSection(OS, ISD, ISLimit);
+ });
+ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS,
+ InputSectionDescription *ISD,
+ uint64_t Off) {
+ auto *TS = make<ThunkSection>(OS, Off);
+ ISD->ThunkSections.push_back({TS, Pass});
+ return TS;
+std::pair<Thunk *, bool> ThunkCreator::getThunk(Symbol &Sym, RelType Type,
+ uint64_t Src) {
+ std::vector<Thunk *> *ThunkVec = nullptr;
+ // We use (section, offset) pair to find the thunk position if possible so
+ // that we create only one thunk for aliased symbols or ICFed sections.
+ if (auto *D = dyn_cast<Defined>(&Sym))
+ if (!D->isInPlt() && D->Section)
+ ThunkVec = &ThunkedSymbolsBySection[{D->Section->Repl, D->Value}];
+ if (!ThunkVec)
+ ThunkVec = &ThunkedSymbols[&Sym];
+ // Check existing Thunks for Sym to see if they can be reused
+ for (Thunk *T : *ThunkVec)
+ if (T->isCompatibleWith(Type) &&
+ Target->inBranchRange(Type, Src, T->getThunkTargetSym()->getVA()))
+ return std::make_pair(T, false);
+ // No existing compatible Thunk in range, create a new one
+ Thunk *T = addThunk(Type, Sym);
+ ThunkVec->push_back(T);
+ return std::make_pair(T, true);
+// Return true if the relocation target is an in range Thunk.
+// Return false if the relocation is not to a Thunk. If the relocation target
+// was originally to a Thunk, but is no longer in range we revert the
+// relocation back to its original non-Thunk target.
+bool ThunkCreator::normalizeExistingThunk(Relocation &Rel, uint64_t Src) {
+ if (Thunk *T = Thunks.lookup(Rel.Sym)) {
+ if (Target->inBranchRange(Rel.Type, Src, Rel.Sym->getVA()))
+ return true;
+ Rel.Sym = &T->Destination;
+ if (Rel.Sym->isInPlt())
+ Rel.Expr = toPlt(Rel.Expr);
+ }
+ return false;
+// Process all relocations from the InputSections that have been assigned
+// to InputSectionDescriptions and redirect through Thunks if needed. The
+// function should be called iteratively until it returns false.
+// PreConditions:
+// All InputSections that may need a Thunk are reachable from
+// OutputSectionCommands.
+// All OutputSections have an address and all InputSections have an offset
+// within the OutputSection.
+// The offsets between caller (relocation place) and callee
+// (relocation target) will not be modified outside of createThunks().
+// PostConditions:
+// If return value is true then ThunkSections have been inserted into
+// OutputSections. All relocations that needed a Thunk based on the information
+// available to createThunks() on entry have been redirected to a Thunk. Note
+// that adding Thunks changes offsets between caller and callee so more Thunks
+// may be required.
+// If return value is false then no more Thunks are needed, and createThunks has
+// made no changes. If the target requires range extension thunks, currently
+// ARM, then any future change in offset between caller and callee risks a
+// relocation out of range error.
+bool ThunkCreator::createThunks(ArrayRef<OutputSection *> OutputSections) {
+ bool AddressesChanged = false;
+ if (Pass == 0 && Target->getThunkSectionSpacing())
+ createInitialThunkSections(OutputSections);
+ // With Thunk Size much smaller than branch range we expect to
+ // converge quickly; if we get to 10 something has gone wrong.
+ if (Pass == 10)
+ fatal("thunk creation not converged");
+ // Create all the Thunks and insert them into synthetic ThunkSections. The
+ // ThunkSections are later inserted back into InputSectionDescriptions.
+ // We separate the creation of ThunkSections from the insertion of the
+ // ThunkSections as ThunkSections are not always inserted into the same
+ // InputSectionDescription as the caller.
+ forEachInputSectionDescription(
+ OutputSections, [&](OutputSection *OS, InputSectionDescription *ISD) {
+ for (InputSection *IS : ISD->Sections)
+ for (Relocation &Rel : IS->Relocations) {
+ uint64_t Src = IS->getVA(Rel.Offset);
+ // If we are a relocation to an existing Thunk, check if it is
+ // still in range. If not then Rel will be altered to point to its
+ // original target so another Thunk can be generated.
+ if (Pass > 0 && normalizeExistingThunk(Rel, Src))
+ continue;
+ if (!Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Src,
+ *Rel.Sym))
+ continue;
+ Thunk *T;
+ bool IsNew;
+ std::tie(T, IsNew) = getThunk(*Rel.Sym, Rel.Type, Src);
+ if (IsNew) {
+ // Find or create a ThunkSection for the new Thunk
+ ThunkSection *TS;
+ if (auto *TIS = T->getTargetInputSection())
+ TS = getISThunkSec(TIS);
+ else
+ TS = getISDThunkSec(OS, IS, ISD, Rel.Type, Src);
+ TS->addThunk(T);
+ Thunks[T->getThunkTargetSym()] = T;
+ }
+ // Redirect relocation to Thunk, we never go via the PLT to a Thunk
+ Rel.Sym = T->getThunkTargetSym();
+ Rel.Expr = fromPlt(Rel.Expr);
+ }
+ for (auto &P : ISD->ThunkSections)
+ AddressesChanged |= P.first->assignOffsets();
+ });
+ for (auto &P : ThunkedSections)
+ AddressesChanged |= P.second->assignOffsets();
+ // Merge all created synthetic ThunkSections back into OutputSection
+ mergeThunks(OutputSections);
+ ++Pass;
+ return AddressesChanged;
+template void elf::scanRelocations<ELF32LE>(InputSectionBase &);
+template void elf::scanRelocations<ELF32BE>(InputSectionBase &);
+template void elf::scanRelocations<ELF64LE>(InputSectionBase &);
+template void elf::scanRelocations<ELF64BE>(InputSectionBase &);
diff --git a/contrib/llvm/tools/lld/ELF/Relocations.h b/contrib/llvm/tools/lld/ELF/Relocations.h
new file mode 100644
index 000000000000..d00e68bd36e6
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Relocations.h
@@ -0,0 +1,219 @@
+//===- Relocations.h -------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+#include <vector>
+namespace lld {
+namespace elf {
+class Symbol;
+class InputSection;
+class InputSectionBase;
+class OutputSection;
+class SectionBase;
+// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
+typedef uint32_t RelType;
+// List of target-independent relocation types. Relocations read
+// from files are converted to these types so that the main code
+// doesn't have to know about architecture-specific details.
+enum RelExpr {
+ R_ABS,
+ // The expression is used for IFUNC support. Describes PC-relative
+ // address of the memory page of GOT entry. This entry is used for
+ // a redirection to IPLT.
+ R_GOT,
+ // The expression is used for IFUNC support. Evaluates to GOT entry,
+ // containing redirection to the IPLT.
+ R_PC,
+ R_PLT,
+ R_TLS,
+// Build a bitmask with one bit set for each RelExpr.
+// Constexpr function arguments can't be used in static asserts, so we
+// use template arguments to build the mask.
+// But function template partial specializations don't exist (needed
+// for base case of the recursion), so we need a dummy struct.
+template <RelExpr... Exprs> struct RelExprMaskBuilder {
+ static inline uint64_t build() { return 0; }
+// Specialization for recursive case.
+template <RelExpr Head, RelExpr... Tail>
+struct RelExprMaskBuilder<Head, Tail...> {
+ static inline uint64_t build() {
+ static_assert(0 <= Head && Head < 64,
+ "RelExpr is too large for 64-bit mask!");
+ return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build();
+ }
+// Return true if `Expr` is one of `Exprs`.
+// There are fewer than 64 RelExpr's, so we can represent any set of
+// RelExpr's as a constant bit mask and test for membership with a
+// couple cheap bitwise operations.
+template <RelExpr... Exprs> bool isRelExprOneOf(RelExpr Expr) {
+ assert(0 <= Expr && (int)Expr < 64 &&
+ "RelExpr is too large for 64-bit mask!");
+ return (uint64_t(1) << Expr) & RelExprMaskBuilder<Exprs...>::build();
+// Architecture-neutral representation of relocation.
+struct Relocation {
+ RelExpr Expr;
+ RelType Type;
+ uint64_t Offset;
+ int64_t Addend;
+ Symbol *Sym;
+struct RelocationOffsetComparator {
+ bool operator()(const Relocation &Lhs, const Relocation &Rhs) {
+ return Lhs.Offset < Rhs.Offset;
+ }
+ // For std::lower_bound, std::upper_bound, std::equal_range.
+ bool operator()(const Relocation &Rel, uint64_t Val) {
+ return Rel.Offset < Val;
+ }
+ bool operator()(uint64_t Val, const Relocation &Rel) {
+ return Val < Rel.Offset;
+ }
+template <class ELFT> void scanRelocations(InputSectionBase &);
+class ThunkSection;
+class Thunk;
+struct InputSectionDescription;
+class ThunkCreator {
+ // Return true if Thunks have been added to OutputSections
+ bool createThunks(ArrayRef<OutputSection *> OutputSections);
+ // The number of completed passes of createThunks this permits us
+ // to do one time initialization on Pass 0 and put a limit on the
+ // number of times it can be called to prevent infinite loops.
+ uint32_t Pass = 0;
+ void mergeThunks(ArrayRef<OutputSection *> OutputSections);
+ ThunkSection *getISDThunkSec(OutputSection *OS, InputSection *IS,
+ InputSectionDescription *ISD, uint32_t Type,
+ uint64_t Src);
+ ThunkSection *getISThunkSec(InputSection *IS);
+ void createInitialThunkSections(ArrayRef<OutputSection *> OutputSections);
+ std::pair<Thunk *, bool> getThunk(Symbol &Sym, RelType Type, uint64_t Src);
+ ThunkSection *addThunkSection(OutputSection *OS, InputSectionDescription *,
+ uint64_t Off);
+ bool normalizeExistingThunk(Relocation &Rel, uint64_t Src);
+ // Record all the available Thunks for a Symbol
+ llvm::DenseMap<std::pair<SectionBase *, uint64_t>, std::vector<Thunk *>>
+ ThunkedSymbolsBySection;
+ llvm::DenseMap<Symbol *, std::vector<Thunk *>> ThunkedSymbols;
+ // Find a Thunk from the Thunks symbol definition, we can use this to find
+ // the Thunk from a relocation to the Thunks symbol definition.
+ llvm::DenseMap<Symbol *, Thunk *> Thunks;
+ // Track InputSections that have an inline ThunkSection placed in front
+ // an inline ThunkSection may have control fall through to the section below
+ // so we need to make sure that there is only one of them.
+ // The Mips LA25 Thunk is an example of an inline ThunkSection.
+ llvm::DenseMap<InputSection *, ThunkSection *> ThunkedSections;
+// Return a int64_t to make sure we get the sign extension out of the way as
+// early as possible.
+template <class ELFT>
+static inline int64_t getAddend(const typename ELFT::Rel &Rel) {
+ return 0;
+template <class ELFT>
+static inline int64_t getAddend(const typename ELFT::Rela &Rel) {
+ return Rel.r_addend;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp b/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp
new file mode 100644
index 000000000000..9a372c6d1c6f
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp
@@ -0,0 +1,298 @@
+//===- ScriptLexer.cpp ----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file defines a lexer for the linker script.
+// The linker script's grammar is not complex but ambiguous due to the
+// lack of the formal specification of the language. What we are trying to
+// do in this and other files in LLD is to make a "reasonable" linker
+// script processor.
+// Among simplicity, compatibility and efficiency, we put the most
+// emphasis on simplicity when we wrote this lexer. Compatibility with the
+// GNU linkers is important, but we did not try to clone every tiny corner
+// case of their lexers, as even ld.bfd and ld.gold are subtly different
+// in various corner cases. We do not care much about efficiency because
+// the time spent in parsing linker scripts is usually negligible.
+// Our grammar of the linker script is LL(2), meaning that it needs at
+// most two-token lookahead to parse. The only place we need two-token
+// lookahead is labels in version scripts, where we need to parse "local :"
+// as if "local:".
+// Overall, this lexer works fine for most linker scripts. There might
+// be room for improving compatibility, but that's probably not at the
+// top of our todo list.
+#include "ScriptLexer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+using namespace lld;
+using namespace lld::elf;
+// Returns a whole line containing the current token.
+StringRef ScriptLexer::getLine() {
+ StringRef S = getCurrentMB().getBuffer();
+ StringRef Tok = Tokens[Pos - 1];
+ size_t Pos = S.rfind('\n', Tok.data() - S.data());
+ if (Pos != StringRef::npos)
+ S = S.substr(Pos + 1);
+ return S.substr(0, S.find_first_of("\r\n"));
+// Returns 1-based line number of the current token.
+size_t ScriptLexer::getLineNumber() {
+ StringRef S = getCurrentMB().getBuffer();
+ StringRef Tok = Tokens[Pos - 1];
+ return S.substr(0, Tok.data() - S.data()).count('\n') + 1;
+// Returns 0-based column number of the current token.
+size_t ScriptLexer::getColumnNumber() {
+ StringRef Tok = Tokens[Pos - 1];
+ return Tok.data() - getLine().data();
+std::string ScriptLexer::getCurrentLocation() {
+ std::string Filename = getCurrentMB().getBufferIdentifier();
+ return (Filename + ":" + Twine(getLineNumber())).str();
+ScriptLexer::ScriptLexer(MemoryBufferRef MB) { tokenize(MB); }
+// We don't want to record cascading errors. Keep only the first one.
+void ScriptLexer::setError(const Twine &Msg) {
+ if (errorCount())
+ return;
+ std::string S = (getCurrentLocation() + ": " + Msg).str();
+ if (Pos)
+ S += "\n>>> " + getLine().str() + "\n>>> " +
+ std::string(getColumnNumber(), ' ') + "^";
+ error(S);
+// Split S into linker script tokens.
+void ScriptLexer::tokenize(MemoryBufferRef MB) {
+ std::vector<StringRef> Vec;
+ MBs.push_back(MB);
+ StringRef S = MB.getBuffer();
+ StringRef Begin = S;
+ for (;;) {
+ S = skipSpace(S);
+ if (S.empty())
+ break;
+ // Quoted token. Note that double-quote characters are parts of a token
+ // because, in a glob match context, only unquoted tokens are interpreted
+ // as glob patterns. Double-quoted tokens are literal patterns in that
+ // context.
+ if (S.startswith("\"")) {
+ size_t E = S.find("\"", 1);
+ if (E == StringRef::npos) {
+ StringRef Filename = MB.getBufferIdentifier();
+ size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n');
+ error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote");
+ return;
+ }
+ Vec.push_back(S.take_front(E + 1));
+ S = S.substr(E + 1);
+ continue;
+ }
+ // ">foo" is parsed to ">" and "foo", but ">>" is parsed to ">>".
+ // "|", "||", "&" and "&&" are different operators.
+ if (S.startswith("<<") || S.startswith("<=") || S.startswith(">>") ||
+ S.startswith(">=") || S.startswith("||") || S.startswith("&&")) {
+ Vec.push_back(S.substr(0, 2));
+ S = S.substr(2);
+ continue;
+ }
+ // Unquoted token. This is more relaxed than tokens in C-like language,
+ // so that you can write "file-name.cpp" as one bare token, for example.
+ size_t Pos = S.find_first_not_of(
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789_.$/\\~=+[]*?-!^:");
+ // A character that cannot start a word (which is usually a
+ // punctuation) forms a single character token.
+ if (Pos == 0)
+ Pos = 1;
+ Vec.push_back(S.substr(0, Pos));
+ S = S.substr(Pos);
+ }
+ Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end());
+// Skip leading whitespace characters or comments.
+StringRef ScriptLexer::skipSpace(StringRef S) {
+ for (;;) {
+ if (S.startswith("/*")) {
+ size_t E = S.find("*/", 2);
+ if (E == StringRef::npos) {
+ error("unclosed comment in a linker script");
+ return "";
+ }
+ S = S.substr(E + 2);
+ continue;
+ }
+ if (S.startswith("#")) {
+ size_t E = S.find('\n', 1);
+ if (E == StringRef::npos)
+ E = S.size() - 1;
+ S = S.substr(E + 1);
+ continue;
+ }
+ size_t Size = S.size();
+ S = S.ltrim();
+ if (S.size() == Size)
+ return S;
+ }
+// An erroneous token is handled as if it were the last token before EOF.
+bool ScriptLexer::atEOF() { return errorCount() || Tokens.size() == Pos; }
+// Split a given string as an expression.
+// This function returns "3", "*" and "5" for "3*5" for example.
+static std::vector<StringRef> tokenizeExpr(StringRef S) {
+ StringRef Ops = "+-*/:!~"; // List of operators
+ // Quoted strings are literal strings, so we don't want to split it.
+ if (S.startswith("\""))
+ return {S};
+ // Split S with operators as separators.
+ std::vector<StringRef> Ret;
+ while (!S.empty()) {
+ size_t E = S.find_first_of(Ops);
+ // No need to split if there is no operator.
+ if (E == StringRef::npos) {
+ Ret.push_back(S);
+ break;
+ }
+ // Get a token before the opreator.
+ if (E != 0)
+ Ret.push_back(S.substr(0, E));
+ // Get the operator as a token. Keep != as one token.
+ if (S.substr(E).startswith("!=")) {
+ Ret.push_back(S.substr(E, 2));
+ S = S.substr(E + 2);
+ } else {
+ Ret.push_back(S.substr(E, 1));
+ S = S.substr(E + 1);
+ }
+ }
+ return Ret;
+// In contexts where expressions are expected, the lexer should apply
+// different tokenization rules than the default one. By default,
+// arithmetic operator characters are regular characters, but in the
+// expression context, they should be independent tokens.
+// For example, "foo*3" should be tokenized to "foo", "*" and "3" only
+// in the expression context.
+// This function may split the current token into multiple tokens.
+void ScriptLexer::maybeSplitExpr() {
+ if (!InExpr || errorCount() || atEOF())
+ return;
+ std::vector<StringRef> V = tokenizeExpr(Tokens[Pos]);
+ if (V.size() == 1)
+ return;
+ Tokens.erase(Tokens.begin() + Pos);
+ Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
+StringRef ScriptLexer::next() {
+ maybeSplitExpr();
+ if (errorCount())
+ return "";
+ if (atEOF()) {
+ setError("unexpected EOF");
+ return "";
+ }
+ return Tokens[Pos++];
+StringRef ScriptLexer::peek() {
+ StringRef Tok = next();
+ if (errorCount())
+ return "";
+ Pos = Pos - 1;
+ return Tok;
+StringRef ScriptLexer::peek2() {
+ skip();
+ StringRef Tok = next();
+ if (errorCount())
+ return "";
+ Pos = Pos - 2;
+ return Tok;
+bool ScriptLexer::consume(StringRef Tok) {
+ if (peek() == Tok) {
+ skip();
+ return true;
+ }
+ return false;
+// Consumes Tok followed by ":". Space is allowed between Tok and ":".
+bool ScriptLexer::consumeLabel(StringRef Tok) {
+ if (consume((Tok + ":").str()))
+ return true;
+ if (Tokens.size() >= Pos + 2 && Tokens[Pos] == Tok &&
+ Tokens[Pos + 1] == ":") {
+ Pos += 2;
+ return true;
+ }
+ return false;
+void ScriptLexer::skip() { (void)next(); }
+void ScriptLexer::expect(StringRef Expect) {
+ if (errorCount())
+ return;
+ StringRef Tok = next();
+ if (Tok != Expect)
+ setError(Expect + " expected, but got " + Tok);
+// Returns true if S encloses T.
+static bool encloses(StringRef S, StringRef T) {
+ return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end();
+MemoryBufferRef ScriptLexer::getCurrentMB() {
+ // Find input buffer containing the current token.
+ assert(!MBs.empty() && Pos > 0);
+ for (MemoryBufferRef MB : MBs)
+ if (encloses(MB.getBuffer(), Tokens[Pos - 1]))
+ return MB;
+ llvm_unreachable("getCurrentMB: failed to find a token");
diff --git a/contrib/llvm/tools/lld/ELF/ScriptLexer.h b/contrib/llvm/tools/lld/ELF/ScriptLexer.h
new file mode 100644
index 000000000000..fc6b5b1008a7
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/ScriptLexer.h
@@ -0,0 +1,56 @@
+//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <utility>
+#include <vector>
+namespace lld {
+namespace elf {
+class ScriptLexer {
+ explicit ScriptLexer(MemoryBufferRef MB);
+ void setError(const Twine &Msg);
+ void tokenize(MemoryBufferRef MB);
+ static StringRef skipSpace(StringRef S);
+ bool atEOF();
+ StringRef next();
+ StringRef peek();
+ StringRef peek2();
+ void skip();
+ bool consume(StringRef Tok);
+ void expect(StringRef Expect);
+ bool consumeLabel(StringRef Tok);
+ std::string getCurrentLocation();
+ std::vector<MemoryBufferRef> MBs;
+ std::vector<StringRef> Tokens;
+ bool InExpr = false;
+ size_t Pos = 0;
+ void maybeSplitExpr();
+ StringRef getLine();
+ size_t getLineNumber();
+ size_t getColumnNumber();
+ MemoryBufferRef getCurrentMB();
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp
new file mode 100644
index 000000000000..7dbe1641622b
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp
@@ -0,0 +1,1545 @@
+//===- ScriptParser.cpp ---------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains a recursive-descendent parser for linker scripts.
+// Parsed results are stored to Config and Script global objects.
+#include "ScriptParser.h"
+#include "Config.h"
+#include "Driver.h"
+#include "InputSection.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "ScriptLexer.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/Memory.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include <cassert>
+#include <limits>
+#include <vector>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::elf;
+static bool isUnderSysroot(StringRef Path);
+namespace {
+class ScriptParser final : ScriptLexer {
+ ScriptParser(MemoryBufferRef MB)
+ : ScriptLexer(MB),
+ IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {}
+ void readLinkerScript();
+ void readVersionScript();
+ void readDynamicList();
+ void readDefsym(StringRef Name);
+ void addFile(StringRef Path);
+ void readAsNeeded();
+ void readEntry();
+ void readExtern();
+ void readGroup();
+ void readInclude();
+ void readInput();
+ void readMemory();
+ void readOutput();
+ void readOutputArch();
+ void readOutputFormat();
+ void readPhdrs();
+ void readRegionAlias();
+ void readSearchDir();
+ void readSections();
+ void readTarget();
+ void readVersion();
+ void readVersionScriptCommand();
+ SymbolAssignment *readSymbolAssignment(StringRef Name);
+ ByteCommand *readByteCommand(StringRef Tok);
+ std::array<uint8_t, 4> readFill();
+ std::array<uint8_t, 4> parseFill(StringRef Tok);
+ bool readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2);
+ void readSectionAddressType(OutputSection *Cmd);
+ OutputSection *readOverlaySectionDescription();
+ OutputSection *readOutputSectionDescription(StringRef OutSec);
+ std::vector<BaseCommand *> readOverlay();
+ std::vector<StringRef> readOutputSectionPhdrs();
+ InputSectionDescription *readInputSectionDescription(StringRef Tok);
+ StringMatcher readFilePatterns();
+ std::vector<SectionPattern> readInputSectionsList();
+ InputSectionDescription *readInputSectionRules(StringRef FilePattern);
+ unsigned readPhdrType();
+ SortSectionPolicy readSortKind();
+ SymbolAssignment *readProvideHidden(bool Provide, bool Hidden);
+ SymbolAssignment *readAssignment(StringRef Tok);
+ void readSort();
+ Expr readAssert();
+ Expr readConstant();
+ Expr getPageSize();
+ uint64_t readMemoryAssignment(StringRef, StringRef, StringRef);
+ std::pair<uint32_t, uint32_t> readMemoryAttributes();
+ Expr combine(StringRef Op, Expr L, Expr R);
+ Expr readExpr();
+ Expr readExpr1(Expr Lhs, int MinPrec);
+ StringRef readParenLiteral();
+ Expr readPrimary();
+ Expr readTernary(Expr Cond);
+ Expr readParenExpr();
+ // For parsing version script.
+ std::vector<SymbolVersion> readVersionExtern();
+ void readAnonymousDeclaration();
+ void readVersionDeclaration(StringRef VerStr);
+ std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
+ readSymbols();
+ // True if a script being read is in a subdirectory specified by -sysroot.
+ bool IsUnderSysroot;
+ // A set to detect an INCLUDE() cycle.
+ StringSet<> Seen;
+} // namespace
+static StringRef unquote(StringRef S) {
+ if (S.startswith("\""))
+ return S.substr(1, S.size() - 2);
+ return S;
+static bool isUnderSysroot(StringRef Path) {
+ if (Config->Sysroot == "")
+ return false;
+ for (; !Path.empty(); Path = sys::path::parent_path(Path))
+ if (sys::fs::equivalent(Config->Sysroot, Path))
+ return true;
+ return false;
+// Some operations only support one non absolute value. Move the
+// absolute one to the right hand side for convenience.
+static void moveAbsRight(ExprValue &A, ExprValue &B) {
+ if (A.Sec == nullptr || (A.ForceAbsolute && !B.isAbsolute()))
+ std::swap(A, B);
+ if (!B.isAbsolute())
+ error(A.Loc + ": at least one side of the expression must be absolute");
+static ExprValue add(ExprValue A, ExprValue B) {
+ moveAbsRight(A, B);
+ return {A.Sec, A.ForceAbsolute, A.getSectionOffset() + B.getValue(), A.Loc};
+static ExprValue sub(ExprValue A, ExprValue B) {
+ // The distance between two symbols in sections is absolute.
+ if (!A.isAbsolute() && !B.isAbsolute())
+ return A.getValue() - B.getValue();
+ return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc};
+static ExprValue bitAnd(ExprValue A, ExprValue B) {
+ moveAbsRight(A, B);
+ return {A.Sec, A.ForceAbsolute,
+ (A.getValue() & B.getValue()) - A.getSecAddr(), A.Loc};
+static ExprValue bitOr(ExprValue A, ExprValue B) {
+ moveAbsRight(A, B);
+ return {A.Sec, A.ForceAbsolute,
+ (A.getValue() | B.getValue()) - A.getSecAddr(), A.Loc};
+void ScriptParser::readDynamicList() {
+ Config->HasDynamicList = true;
+ expect("{");
+ std::vector<SymbolVersion> Locals;
+ std::vector<SymbolVersion> Globals;
+ std::tie(Locals, Globals) = readSymbols();
+ expect(";");
+ if (!atEOF()) {
+ setError("EOF expected, but got " + next());
+ return;
+ }
+ if (!Locals.empty()) {
+ setError("\"local:\" scope not supported in --dynamic-list");
+ return;
+ }
+ for (SymbolVersion V : Globals)
+ Config->DynamicList.push_back(V);
+void ScriptParser::readVersionScript() {
+ readVersionScriptCommand();
+ if (!atEOF())
+ setError("EOF expected, but got " + next());
+void ScriptParser::readVersionScriptCommand() {
+ if (consume("{")) {
+ readAnonymousDeclaration();
+ return;
+ }
+ while (!atEOF() && !errorCount() && peek() != "}") {
+ StringRef VerStr = next();
+ if (VerStr == "{") {
+ setError("anonymous version definition is used in "
+ "combination with other version definitions");
+ return;
+ }
+ expect("{");
+ readVersionDeclaration(VerStr);
+ }
+void ScriptParser::readVersion() {
+ expect("{");
+ readVersionScriptCommand();
+ expect("}");
+void ScriptParser::readLinkerScript() {
+ while (!atEOF()) {
+ StringRef Tok = next();
+ if (Tok == ";")
+ continue;
+ if (Tok == "ENTRY") {
+ readEntry();
+ } else if (Tok == "EXTERN") {
+ readExtern();
+ } else if (Tok == "GROUP") {
+ readGroup();
+ } else if (Tok == "INCLUDE") {
+ readInclude();
+ } else if (Tok == "INPUT") {
+ readInput();
+ } else if (Tok == "MEMORY") {
+ readMemory();
+ } else if (Tok == "OUTPUT") {
+ readOutput();
+ } else if (Tok == "OUTPUT_ARCH") {
+ readOutputArch();
+ } else if (Tok == "OUTPUT_FORMAT") {
+ readOutputFormat();
+ } else if (Tok == "PHDRS") {
+ readPhdrs();
+ } else if (Tok == "REGION_ALIAS") {
+ readRegionAlias();
+ } else if (Tok == "SEARCH_DIR") {
+ readSearchDir();
+ } else if (Tok == "SECTIONS") {
+ readSections();
+ } else if (Tok == "TARGET") {
+ readTarget();
+ } else if (Tok == "VERSION") {
+ readVersion();
+ } else if (SymbolAssignment *Cmd = readAssignment(Tok)) {
+ Script->SectionCommands.push_back(Cmd);
+ } else {
+ setError("unknown directive: " + Tok);
+ }
+ }
+void ScriptParser::readDefsym(StringRef Name) {
+ if (errorCount())
+ return;
+ Expr E = readExpr();
+ if (!atEOF())
+ setError("EOF expected, but got " + next());
+ SymbolAssignment *Cmd = make<SymbolAssignment>(Name, E, getCurrentLocation());
+ Script->SectionCommands.push_back(Cmd);
+void ScriptParser::addFile(StringRef S) {
+ if (IsUnderSysroot && S.startswith("/")) {
+ SmallString<128> PathData;
+ StringRef Path = (Config->Sysroot + S).toStringRef(PathData);
+ if (sys::fs::exists(Path)) {
+ Driver->addFile(Saver.save(Path), /*WithLOption=*/false);
+ return;
+ }
+ }
+ if (S.startswith("/")) {
+ Driver->addFile(S, /*WithLOption=*/false);
+ } else if (S.startswith("=")) {
+ if (Config->Sysroot.empty())
+ Driver->addFile(S.substr(1), /*WithLOption=*/false);
+ else
+ Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)),
+ /*WithLOption=*/false);
+ } else if (S.startswith("-l")) {
+ Driver->addLibrary(S.substr(2));
+ } else if (sys::fs::exists(S)) {
+ Driver->addFile(S, /*WithLOption=*/false);
+ } else {
+ if (Optional<std::string> Path = findFromSearchPaths(S))
+ Driver->addFile(Saver.save(*Path), /*WithLOption=*/true);
+ else
+ setError("unable to find " + S);
+ }
+void ScriptParser::readAsNeeded() {
+ expect("(");
+ bool Orig = Config->AsNeeded;
+ Config->AsNeeded = true;
+ while (!errorCount() && !consume(")"))
+ addFile(unquote(next()));
+ Config->AsNeeded = Orig;
+void ScriptParser::readEntry() {
+ // -e <symbol> takes predecence over ENTRY(<symbol>).
+ expect("(");
+ StringRef Tok = next();
+ if (Config->Entry.empty())
+ Config->Entry = Tok;
+ expect(")");
+void ScriptParser::readExtern() {
+ expect("(");
+ while (!errorCount() && !consume(")"))
+ Config->Undefined.push_back(next());
+void ScriptParser::readGroup() {
+ bool Orig = InputFile::IsInGroup;
+ InputFile::IsInGroup = true;
+ readInput();
+ InputFile::IsInGroup = Orig;
+ if (!Orig)
+ ++InputFile::NextGroupId;
+void ScriptParser::readInclude() {
+ StringRef Tok = unquote(next());
+ if (!Seen.insert(Tok).second) {
+ setError("there is a cycle in linker script INCLUDEs");
+ return;
+ }
+ if (Optional<std::string> Path = searchScript(Tok)) {
+ if (Optional<MemoryBufferRef> MB = readFile(*Path))
+ tokenize(*MB);
+ return;
+ }
+ setError("cannot find linker script " + Tok);
+void ScriptParser::readInput() {
+ expect("(");
+ while (!errorCount() && !consume(")")) {
+ if (consume("AS_NEEDED"))
+ readAsNeeded();
+ else
+ addFile(unquote(next()));
+ }
+void ScriptParser::readOutput() {
+ // -o <file> takes predecence over OUTPUT(<file>).
+ expect("(");
+ StringRef Tok = next();
+ if (Config->OutputFile.empty())
+ Config->OutputFile = unquote(Tok);
+ expect(")");
+void ScriptParser::readOutputArch() {
+ // OUTPUT_ARCH is ignored for now.
+ expect("(");
+ while (!errorCount() && !consume(")"))
+ skip();
+static std::pair<ELFKind, uint16_t> parseBfdName(StringRef S) {
+ return StringSwitch<std::pair<ELFKind, uint16_t>>(S)
+ .Case("elf32-i386", {ELF32LEKind, EM_386})
+ .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU})
+ .Case("elf32-littlearm", {ELF32LEKind, EM_ARM})
+ .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64})
+ .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64})
+ .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64})
+ .Case("elf32-powerpc", {ELF32BEKind, EM_PPC})
+ .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64})
+ .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64})
+ .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64})
+ .Cases("elf32-tradbigmips", "elf32-bigmips", {ELF32BEKind, EM_MIPS})
+ .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS})
+ .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS})
+ .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS})
+ .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS})
+ .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS})
+ .Default({ELFNoneKind, EM_NONE});
+// Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(bfdname, big, little).
+// Currently we ignore big and little parameters.
+void ScriptParser::readOutputFormat() {
+ expect("(");
+ StringRef Name = unquote(next());
+ StringRef S = Name;
+ if (S.consume_back("-freebsd"))
+ std::tie(Config->EKind, Config->EMachine) = parseBfdName(S);
+ if (Config->EMachine == EM_NONE)
+ setError("unknown output format name: " + Name);
+ if (S == "elf32-ntradlittlemips" || S == "elf32-ntradbigmips")
+ Config->MipsN32Abi = true;
+ if (consume(")"))
+ return;
+ expect(",");
+ skip();
+ expect(",");
+ skip();
+ expect(")");
+void ScriptParser::readPhdrs() {
+ expect("{");
+ while (!errorCount() && !consume("}")) {
+ PhdrsCommand Cmd;
+ Cmd.Name = next();
+ Cmd.Type = readPhdrType();
+ while (!errorCount() && !consume(";")) {
+ if (consume("FILEHDR"))
+ Cmd.HasFilehdr = true;
+ else if (consume("PHDRS"))
+ Cmd.HasPhdrs = true;
+ else if (consume("AT"))
+ Cmd.LMAExpr = readParenExpr();
+ else if (consume("FLAGS"))
+ Cmd.Flags = readParenExpr()().getValue();
+ else
+ setError("unexpected header attribute: " + next());
+ }
+ Script->PhdrsCommands.push_back(Cmd);
+ }
+void ScriptParser::readRegionAlias() {
+ expect("(");
+ StringRef Alias = unquote(next());
+ expect(",");
+ StringRef Name = next();
+ expect(")");
+ if (Script->MemoryRegions.count(Alias))
+ setError("redefinition of memory region '" + Alias + "'");
+ if (!Script->MemoryRegions.count(Name))
+ setError("memory region '" + Name + "' is not defined");
+ Script->MemoryRegions.insert({Alias, Script->MemoryRegions[Name]});
+void ScriptParser::readSearchDir() {
+ expect("(");
+ StringRef Tok = next();
+ if (!Config->Nostdlib)
+ Config->SearchPaths.push_back(unquote(Tok));
+ expect(")");
+// This reads an overlay description. Overlays are used to describe output
+// sections that use the same virtual memory range and normally would trigger
+// linker's sections sanity check failures.
+// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
+std::vector<BaseCommand *> ScriptParser::readOverlay() {
+ // VA and LMA expressions are optional, though for simplicity of
+ // implementation we assume they are not. That is what OVERLAY was designed
+ // for first of all: to allow sections with overlapping VAs at different LMAs.
+ Expr AddrExpr = readExpr();
+ expect(":");
+ expect("AT");
+ Expr LMAExpr = readParenExpr();
+ expect("{");
+ std::vector<BaseCommand *> V;
+ OutputSection *Prev = nullptr;
+ while (!errorCount() && !consume("}")) {
+ // VA is the same for all sections. The LMAs are consecutive in memory
+ // starting from the base load address specified.
+ OutputSection *OS = readOverlaySectionDescription();
+ OS->AddrExpr = AddrExpr;
+ if (Prev)
+ OS->LMAExpr = [=] { return Prev->getLMA() + Prev->Size; };
+ else
+ OS->LMAExpr = LMAExpr;
+ V.push_back(OS);
+ Prev = OS;
+ }
+ // According to the specification, at the end of the overlay, the location
+ // counter should be equal to the overlay base address plus size of the
+ // largest section seen in the overlay.
+ // Here we want to create the Dot assignment command to achieve that.
+ Expr MoveDot = [=] {
+ uint64_t Max = 0;
+ for (BaseCommand *Cmd : V)
+ Max = std::max(Max, cast<OutputSection>(Cmd)->Size);
+ return AddrExpr().getValue() + Max;
+ };
+ V.push_back(make<SymbolAssignment>(".", MoveDot, getCurrentLocation()));
+ return V;
+void ScriptParser::readSections() {
+ Script->HasSectionsCommand = true;
+ // -no-rosegment is used to avoid placing read only non-executable sections in
+ // their own segment. We do the same if SECTIONS command is present in linker
+ // script. See comment for computeFlags().
+ Config->SingleRoRx = true;
+ expect("{");
+ std::vector<BaseCommand *> V;
+ while (!errorCount() && !consume("}")) {
+ StringRef Tok = next();
+ if (Tok == "OVERLAY") {
+ for (BaseCommand *Cmd : readOverlay())
+ V.push_back(Cmd);
+ continue;
+ } else if (Tok == "INCLUDE") {
+ readInclude();
+ continue;
+ }
+ if (BaseCommand *Cmd = readAssignment(Tok))
+ V.push_back(Cmd);
+ else
+ V.push_back(readOutputSectionDescription(Tok));
+ }
+ if (!atEOF() && consume("INSERT")) {
+ std::vector<BaseCommand *> *Dest = nullptr;
+ if (consume("AFTER"))
+ Dest = &Script->InsertAfterCommands[next()];
+ else if (consume("BEFORE"))
+ Dest = &Script->InsertBeforeCommands[next()];
+ else
+ setError("expected AFTER/BEFORE, but got '" + next() + "'");
+ if (Dest)
+ Dest->insert(Dest->end(), V.begin(), V.end());
+ return;
+ }
+ Script->SectionCommands.insert(Script->SectionCommands.end(), V.begin(),
+ V.end());
+void ScriptParser::readTarget() {
+ // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers,
+ // we accept only a limited set of BFD names (i.e. "elf" or "binary")
+ // for --format. We recognize only /^elf/ and "binary" in the linker
+ // script as well.
+ expect("(");
+ StringRef Tok = next();
+ expect(")");
+ if (Tok.startswith("elf"))
+ Config->FormatBinary = false;
+ else if (Tok == "binary")
+ Config->FormatBinary = true;
+ else
+ setError("unknown target: " + Tok);
+static int precedence(StringRef Op) {
+ return StringSwitch<int>(Op)
+ .Cases("*", "/", "%", 8)
+ .Cases("+", "-", 7)
+ .Cases("<<", ">>", 6)
+ .Cases("<", "<=", ">", ">=", "==", "!=", 5)
+ .Case("&", 4)
+ .Case("|", 3)
+ .Case("&&", 2)
+ .Case("||", 1)
+ .Default(-1);
+StringMatcher ScriptParser::readFilePatterns() {
+ std::vector<StringRef> V;
+ while (!errorCount() && !consume(")"))
+ V.push_back(next());
+ return StringMatcher(V);
+SortSectionPolicy ScriptParser::readSortKind() {
+ if (consume("SORT") || consume("SORT_BY_NAME"))
+ return SortSectionPolicy::Name;
+ if (consume("SORT_BY_ALIGNMENT"))
+ return SortSectionPolicy::Alignment;
+ if (consume("SORT_BY_INIT_PRIORITY"))
+ return SortSectionPolicy::Priority;
+ if (consume("SORT_NONE"))
+ return SortSectionPolicy::None;
+ return SortSectionPolicy::Default;
+// Reads SECTIONS command contents in the following form:
+// <contents> ::= <elem>*
+// <elem> ::= <exclude>? <glob-pattern>
+// <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
+// For example,
+// *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
+// is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
+// The semantics of that is section .foo in any file, section .bar in
+// any file but a.o, and section .baz in any file but b.o.
+std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
+ std::vector<SectionPattern> Ret;
+ while (!errorCount() && peek() != ")") {
+ StringMatcher ExcludeFilePat;
+ if (consume("EXCLUDE_FILE")) {
+ expect("(");
+ ExcludeFilePat = readFilePatterns();
+ }
+ std::vector<StringRef> V;
+ while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE")
+ V.push_back(next());
+ if (!V.empty())
+ Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
+ else
+ setError("section pattern is expected");
+ }
+ return Ret;
+// Reads contents of "SECTIONS" directive. That directive contains a
+// list of glob patterns for input sections. The grammar is as follows.
+// <patterns> ::= <section-list>
+// | <sort> "(" <section-list> ")"
+// | <sort> "(" <sort> "(" <section-list> ")" ")"
+// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
+// <section-list> is parsed by readInputSectionsList().
+InputSectionDescription *
+ScriptParser::readInputSectionRules(StringRef FilePattern) {
+ auto *Cmd = make<InputSectionDescription>(FilePattern);
+ expect("(");
+ while (!errorCount() && !consume(")")) {
+ SortSectionPolicy Outer = readSortKind();
+ SortSectionPolicy Inner = SortSectionPolicy::Default;
+ std::vector<SectionPattern> V;
+ if (Outer != SortSectionPolicy::Default) {
+ expect("(");
+ Inner = readSortKind();
+ if (Inner != SortSectionPolicy::Default) {
+ expect("(");
+ V = readInputSectionsList();
+ expect(")");
+ } else {
+ V = readInputSectionsList();
+ }
+ expect(")");
+ } else {
+ V = readInputSectionsList();
+ }
+ for (SectionPattern &Pat : V) {
+ Pat.SortInner = Inner;
+ Pat.SortOuter = Outer;
+ }
+ std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns));
+ }
+ return Cmd;
+InputSectionDescription *
+ScriptParser::readInputSectionDescription(StringRef Tok) {
+ // Input section wildcard can be surrounded by KEEP.
+ // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
+ if (Tok == "KEEP") {
+ expect("(");
+ StringRef FilePattern = next();
+ InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
+ expect(")");
+ Script->KeptSections.push_back(Cmd);
+ return Cmd;
+ }
+ return readInputSectionRules(Tok);
+void ScriptParser::readSort() {
+ expect("(");
+ expect("CONSTRUCTORS");
+ expect(")");
+Expr ScriptParser::readAssert() {
+ expect("(");
+ Expr E = readExpr();
+ expect(",");
+ StringRef Msg = unquote(next());
+ expect(")");
+ return [=] {
+ if (!E().getValue())
+ error(Msg);
+ return Script->getDot();
+ };
+// Reads a FILL(expr) command. We handle the FILL command as an
+// alias for =fillexp section attribute, which is different from
+// what GNU linkers do.
+// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
+std::array<uint8_t, 4> ScriptParser::readFill() {
+ expect("(");
+ std::array<uint8_t, 4> V = parseFill(next());
+ expect(")");
+ return V;
+// Tries to read the special directive for an output section definition which
+// can be one of following: "(NOLOAD)", "(COPY)", "(INFO)" or "(OVERLAY)".
+// Tok1 and Tok2 are next 2 tokens peeked. See comment for readSectionAddressType below.
+bool ScriptParser::readSectionDirective(OutputSection *Cmd, StringRef Tok1, StringRef Tok2) {
+ if (Tok1 != "(")
+ return false;
+ if (Tok2 != "NOLOAD" && Tok2 != "COPY" && Tok2 != "INFO" && Tok2 != "OVERLAY")
+ return false;
+ expect("(");
+ if (consume("NOLOAD")) {
+ Cmd->Noload = true;
+ } else {
+ skip(); // This is "COPY", "INFO" or "OVERLAY".
+ Cmd->NonAlloc = true;
+ }
+ expect(")");
+ return true;
+// Reads an expression and/or the special directive for an output
+// section definition. Directive is one of following: "(NOLOAD)",
+// "(COPY)", "(INFO)" or "(OVERLAY)".
+// An output section name can be followed by an address expression
+// and/or directive. This grammar is not LL(1) because "(" can be
+// interpreted as either the beginning of some expression or beginning
+// of directive.
+// https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
+// https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
+void ScriptParser::readSectionAddressType(OutputSection *Cmd) {
+ if (readSectionDirective(Cmd, peek(), peek2()))
+ return;
+ Cmd->AddrExpr = readExpr();
+ if (peek() == "(" && !readSectionDirective(Cmd, "(", peek2()))
+ setError("unknown section directive: " + peek2());
+static Expr checkAlignment(Expr E, std::string &Loc) {
+ return [=] {
+ uint64_t Alignment = std::max((uint64_t)1, E().getValue());
+ if (!isPowerOf2_64(Alignment)) {
+ error(Loc + ": alignment must be power of 2");
+ return (uint64_t)1; // Return a dummy value.
+ }
+ return Alignment;
+ };
+OutputSection *ScriptParser::readOverlaySectionDescription() {
+ OutputSection *Cmd =
+ Script->createOutputSection(next(), getCurrentLocation());
+ Cmd->InOverlay = true;
+ expect("{");
+ while (!errorCount() && !consume("}"))
+ Cmd->SectionCommands.push_back(readInputSectionRules(next()));
+ Cmd->Phdrs = readOutputSectionPhdrs();
+ return Cmd;
+OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
+ OutputSection *Cmd =
+ Script->createOutputSection(OutSec, getCurrentLocation());
+ size_t SymbolsReferenced = Script->ReferencedSymbols.size();
+ if (peek() != ":")
+ readSectionAddressType(Cmd);
+ expect(":");
+ std::string Location = getCurrentLocation();
+ if (consume("AT"))
+ Cmd->LMAExpr = readParenExpr();
+ if (consume("ALIGN"))
+ Cmd->AlignExpr = checkAlignment(readParenExpr(), Location);
+ if (consume("SUBALIGN"))
+ Cmd->SubalignExpr = checkAlignment(readParenExpr(), Location);
+ // Parse constraints.
+ if (consume("ONLY_IF_RO"))
+ Cmd->Constraint = ConstraintKind::ReadOnly;
+ if (consume("ONLY_IF_RW"))
+ Cmd->Constraint = ConstraintKind::ReadWrite;
+ expect("{");
+ while (!errorCount() && !consume("}")) {
+ StringRef Tok = next();
+ if (Tok == ";") {
+ // Empty commands are allowed. Do nothing here.
+ } else if (SymbolAssignment *Assign = readAssignment(Tok)) {
+ Cmd->SectionCommands.push_back(Assign);
+ } else if (ByteCommand *Data = readByteCommand(Tok)) {
+ Cmd->SectionCommands.push_back(Data);
+ } else if (Tok == "CONSTRUCTORS") {
+ // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
+ // by name. This is for very old file formats such as ECOFF/XCOFF.
+ // For ELF, we should ignore.
+ } else if (Tok == "FILL") {
+ Cmd->Filler = readFill();
+ } else if (Tok == "SORT") {
+ readSort();
+ } else if (Tok == "INCLUDE") {
+ readInclude();
+ } else if (peek() == "(") {
+ Cmd->SectionCommands.push_back(readInputSectionDescription(Tok));
+ } else {
+ // We have a file name and no input sections description. It is not a
+ // commonly used syntax, but still acceptable. In that case, all sections
+ // from the file will be included.
+ auto *ISD = make<InputSectionDescription>(Tok);
+ ISD->SectionPatterns.push_back({{}, StringMatcher({"*"})});
+ Cmd->SectionCommands.push_back(ISD);
+ }
+ }
+ if (consume(">"))
+ Cmd->MemoryRegionName = next();
+ if (consume("AT")) {
+ expect(">");
+ Cmd->LMARegionName = next();
+ }
+ if (Cmd->LMAExpr && !Cmd->LMARegionName.empty())
+ error("section can't have both LMA and a load region");
+ Cmd->Phdrs = readOutputSectionPhdrs();
+ if (consume("="))
+ Cmd->Filler = parseFill(next());
+ else if (peek().startswith("="))
+ Cmd->Filler = parseFill(next().drop_front());
+ // Consume optional comma following output section command.
+ consume(",");
+ if (Script->ReferencedSymbols.size() > SymbolsReferenced)
+ Cmd->ExpressionsUseSymbols = true;
+ return Cmd;
+// Parses a given string as a octal/decimal/hexadecimal number and
+// returns it as a big-endian number. Used for `=<fillexp>`.
+// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
+// When reading a hexstring, ld.bfd handles it as a blob of arbitrary
+// size, while ld.gold always handles it as a 32-bit big-endian number.
+// We are compatible with ld.gold because it's easier to implement.
+std::array<uint8_t, 4> ScriptParser::parseFill(StringRef Tok) {
+ uint32_t V = 0;
+ if (!to_integer(Tok, V))
+ setError("invalid filler expression: " + Tok);
+ std::array<uint8_t, 4> Buf;
+ write32be(Buf.data(), V);
+ return Buf;
+SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) {
+ expect("(");
+ SymbolAssignment *Cmd = readSymbolAssignment(next());
+ Cmd->Provide = Provide;
+ Cmd->Hidden = Hidden;
+ expect(")");
+ return Cmd;
+SymbolAssignment *ScriptParser::readAssignment(StringRef Tok) {
+ // Assert expression returns Dot, so this is equal to ".=."
+ if (Tok == "ASSERT")
+ return make<SymbolAssignment>(".", readAssert(), getCurrentLocation());
+ size_t OldPos = Pos;
+ SymbolAssignment *Cmd = nullptr;
+ if (peek() == "=" || peek() == "+=")
+ Cmd = readSymbolAssignment(Tok);
+ else if (Tok == "PROVIDE")
+ Cmd = readProvideHidden(true, false);
+ else if (Tok == "HIDDEN")
+ Cmd = readProvideHidden(false, true);
+ else if (Tok == "PROVIDE_HIDDEN")
+ Cmd = readProvideHidden(true, true);
+ if (Cmd) {
+ Cmd->CommandString =
+ Tok.str() + " " +
+ llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " ");
+ expect(";");
+ }
+ return Cmd;
+SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) {
+ StringRef Op = next();
+ assert(Op == "=" || Op == "+=");
+ Expr E = readExpr();
+ if (Op == "+=") {
+ std::string Loc = getCurrentLocation();
+ E = [=] { return add(Script->getSymbolValue(Name, Loc), E()); };
+ }
+ return make<SymbolAssignment>(Name, E, getCurrentLocation());
+// This is an operator-precedence parser to parse a linker
+// script expression.
+Expr ScriptParser::readExpr() {
+ // Our lexer is context-aware. Set the in-expression bit so that
+ // they apply different tokenization rules.
+ bool Orig = InExpr;
+ InExpr = true;
+ Expr E = readExpr1(readPrimary(), 0);
+ InExpr = Orig;
+ return E;
+Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) {
+ if (Op == "+")
+ return [=] { return add(L(), R()); };
+ if (Op == "-")
+ return [=] { return sub(L(), R()); };
+ if (Op == "*")
+ return [=] { return L().getValue() * R().getValue(); };
+ if (Op == "/") {
+ std::string Loc = getCurrentLocation();
+ return [=]() -> uint64_t {
+ if (uint64_t RV = R().getValue())
+ return L().getValue() / RV;
+ error(Loc + ": division by zero");
+ return 0;
+ };
+ }
+ if (Op == "%") {
+ std::string Loc = getCurrentLocation();
+ return [=]() -> uint64_t {
+ if (uint64_t RV = R().getValue())
+ return L().getValue() % RV;
+ error(Loc + ": modulo by zero");
+ return 0;
+ };
+ }
+ if (Op == "<<")
+ return [=] { return L().getValue() << R().getValue(); };
+ if (Op == ">>")
+ return [=] { return L().getValue() >> R().getValue(); };
+ if (Op == "<")
+ return [=] { return L().getValue() < R().getValue(); };
+ if (Op == ">")
+ return [=] { return L().getValue() > R().getValue(); };
+ if (Op == ">=")
+ return [=] { return L().getValue() >= R().getValue(); };
+ if (Op == "<=")
+ return [=] { return L().getValue() <= R().getValue(); };
+ if (Op == "==")
+ return [=] { return L().getValue() == R().getValue(); };
+ if (Op == "!=")
+ return [=] { return L().getValue() != R().getValue(); };
+ if (Op == "||")
+ return [=] { return L().getValue() || R().getValue(); };
+ if (Op == "&&")
+ return [=] { return L().getValue() && R().getValue(); };
+ if (Op == "&")
+ return [=] { return bitAnd(L(), R()); };
+ if (Op == "|")
+ return [=] { return bitOr(L(), R()); };
+ llvm_unreachable("invalid operator");
+// This is a part of the operator-precedence parser. This function
+// assumes that the remaining token stream starts with an operator.
+Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
+ while (!atEOF() && !errorCount()) {
+ // Read an operator and an expression.
+ if (consume("?"))
+ return readTernary(Lhs);
+ StringRef Op1 = peek();
+ if (precedence(Op1) < MinPrec)
+ break;
+ skip();
+ Expr Rhs = readPrimary();
+ // Evaluate the remaining part of the expression first if the
+ // next operator has greater precedence than the previous one.
+ // For example, if we have read "+" and "3", and if the next
+ // operator is "*", then we'll evaluate 3 * ... part first.
+ while (!atEOF()) {
+ StringRef Op2 = peek();
+ if (precedence(Op2) <= precedence(Op1))
+ break;
+ Rhs = readExpr1(Rhs, precedence(Op2));
+ }
+ Lhs = combine(Op1, Lhs, Rhs);
+ }
+ return Lhs;
+Expr ScriptParser::getPageSize() {
+ std::string Location = getCurrentLocation();
+ return [=]() -> uint64_t {
+ if (Target)
+ return Target->PageSize;
+ error(Location + ": unable to calculate page size");
+ return 4096; // Return a dummy value.
+ };
+Expr ScriptParser::readConstant() {
+ StringRef S = readParenLiteral();
+ return getPageSize();
+ if (S == "MAXPAGESIZE")
+ return [] { return Config->MaxPageSize; };
+ setError("unknown constant: " + S);
+ return [] { return 0; };
+// Parses Tok as an integer. It recognizes hexadecimal (prefixed with
+// "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
+// have "K" (Ki) or "M" (Mi) suffixes.
+static Optional<uint64_t> parseInt(StringRef Tok) {
+ // Hexadecimal
+ uint64_t Val;
+ if (Tok.startswith_lower("0x")) {
+ if (!to_integer(Tok.substr(2), Val, 16))
+ return None;
+ return Val;
+ }
+ if (Tok.endswith_lower("H")) {
+ if (!to_integer(Tok.drop_back(), Val, 16))
+ return None;
+ return Val;
+ }
+ // Decimal
+ if (Tok.endswith_lower("K")) {
+ if (!to_integer(Tok.drop_back(), Val, 10))
+ return None;
+ return Val * 1024;
+ }
+ if (Tok.endswith_lower("M")) {
+ if (!to_integer(Tok.drop_back(), Val, 10))
+ return None;
+ return Val * 1024 * 1024;
+ }
+ if (!to_integer(Tok, Val, 10))
+ return None;
+ return Val;
+ByteCommand *ScriptParser::readByteCommand(StringRef Tok) {
+ int Size = StringSwitch<int>(Tok)
+ .Case("BYTE", 1)
+ .Case("SHORT", 2)
+ .Case("LONG", 4)
+ .Case("QUAD", 8)
+ .Default(-1);
+ if (Size == -1)
+ return nullptr;
+ size_t OldPos = Pos;
+ Expr E = readParenExpr();
+ std::string CommandString =
+ Tok.str() + " " +
+ llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " ");
+ return make<ByteCommand>(E, Size, CommandString);
+StringRef ScriptParser::readParenLiteral() {
+ expect("(");
+ bool Orig = InExpr;
+ InExpr = false;
+ StringRef Tok = next();
+ InExpr = Orig;
+ expect(")");
+ return Tok;
+static void checkIfExists(OutputSection *Cmd, StringRef Location) {
+ if (Cmd->Location.empty() && Script->ErrorOnMissingSection)
+ error(Location + ": undefined section " + Cmd->Name);
+Expr ScriptParser::readPrimary() {
+ if (peek() == "(")
+ return readParenExpr();
+ if (consume("~")) {
+ Expr E = readPrimary();
+ return [=] { return ~E().getValue(); };
+ }
+ if (consume("!")) {
+ Expr E = readPrimary();
+ return [=] { return !E().getValue(); };
+ }
+ if (consume("-")) {
+ Expr E = readPrimary();
+ return [=] { return -E().getValue(); };
+ }
+ StringRef Tok = next();
+ std::string Location = getCurrentLocation();
+ // Built-in functions are parsed here.
+ // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
+ if (Tok == "ABSOLUTE") {
+ Expr Inner = readParenExpr();
+ return [=] {
+ ExprValue I = Inner();
+ I.ForceAbsolute = true;
+ return I;
+ };
+ }
+ if (Tok == "ADDR") {
+ StringRef Name = readParenLiteral();
+ OutputSection *Sec = Script->getOrCreateOutputSection(Name);
+ return [=]() -> ExprValue {
+ checkIfExists(Sec, Location);
+ return {Sec, false, 0, Location};
+ };
+ }
+ if (Tok == "ALIGN") {
+ expect("(");
+ Expr E = readExpr();
+ if (consume(")")) {
+ E = checkAlignment(E, Location);
+ return [=] { return alignTo(Script->getDot(), E().getValue()); };
+ }
+ expect(",");
+ Expr E2 = checkAlignment(readExpr(), Location);
+ expect(")");
+ return [=] {
+ ExprValue V = E();
+ V.Alignment = E2().getValue();
+ return V;
+ };
+ }
+ if (Tok == "ALIGNOF") {
+ StringRef Name = readParenLiteral();
+ OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
+ return [=] {
+ checkIfExists(Cmd, Location);
+ return Cmd->Alignment;
+ };
+ }
+ if (Tok == "ASSERT")
+ return readAssert();
+ if (Tok == "CONSTANT")
+ return readConstant();
+ if (Tok == "DATA_SEGMENT_ALIGN") {
+ expect("(");
+ Expr E = readExpr();
+ expect(",");
+ readExpr();
+ expect(")");
+ return [=] {
+ return alignTo(Script->getDot(), std::max((uint64_t)1, E().getValue()));
+ };
+ }
+ if (Tok == "DATA_SEGMENT_END") {
+ expect("(");
+ expect(".");
+ expect(")");
+ return [] { return Script->getDot(); };
+ }
+ if (Tok == "DATA_SEGMENT_RELRO_END") {
+ // GNU linkers implements more complicated logic to handle
+ // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
+ // just align to the next page boundary for simplicity.
+ expect("(");
+ readExpr();
+ expect(",");
+ readExpr();
+ expect(")");
+ Expr E = getPageSize();
+ return [=] { return alignTo(Script->getDot(), E().getValue()); };
+ }
+ if (Tok == "DEFINED") {
+ StringRef Name = readParenLiteral();
+ return [=] { return Symtab->find(Name) ? 1 : 0; };
+ }
+ if (Tok == "LENGTH") {
+ StringRef Name = readParenLiteral();
+ if (Script->MemoryRegions.count(Name) == 0) {
+ setError("memory region not defined: " + Name);
+ return [] { return 0; };
+ }
+ return [=] { return Script->MemoryRegions[Name]->Length; };
+ }
+ if (Tok == "LOADADDR") {
+ StringRef Name = readParenLiteral();
+ OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
+ return [=] {
+ checkIfExists(Cmd, Location);
+ return Cmd->getLMA();
+ };
+ }
+ if (Tok == "MAX" || Tok == "MIN") {
+ expect("(");
+ Expr A = readExpr();
+ expect(",");
+ Expr B = readExpr();
+ expect(")");
+ if (Tok == "MIN")
+ return [=] { return std::min(A().getValue(), B().getValue()); };
+ return [=] { return std::max(A().getValue(), B().getValue()); };
+ }
+ if (Tok == "ORIGIN") {
+ StringRef Name = readParenLiteral();
+ if (Script->MemoryRegions.count(Name) == 0) {
+ setError("memory region not defined: " + Name);
+ return [] { return 0; };
+ }
+ return [=] { return Script->MemoryRegions[Name]->Origin; };
+ }
+ if (Tok == "SEGMENT_START") {
+ expect("(");
+ skip();
+ expect(",");
+ Expr E = readExpr();
+ expect(")");
+ return [=] { return E(); };
+ }
+ if (Tok == "SIZEOF") {
+ StringRef Name = readParenLiteral();
+ OutputSection *Cmd = Script->getOrCreateOutputSection(Name);
+ // Linker script does not create an output section if its content is empty.
+ // We want to allow SIZEOF(.foo) where .foo is a section which happened to
+ // be empty.
+ return [=] { return Cmd->Size; };
+ }
+ if (Tok == "SIZEOF_HEADERS")
+ return [=] { return elf::getHeaderSize(); };
+ // Tok is the dot.
+ if (Tok == ".")
+ return [=] { return Script->getSymbolValue(Tok, Location); };
+ // Tok is a literal number.
+ if (Optional<uint64_t> Val = parseInt(Tok))
+ return [=] { return *Val; };
+ // Tok is a symbol name.
+ if (!isValidCIdentifier(Tok))
+ setError("malformed number: " + Tok);
+ Script->ReferencedSymbols.push_back(Tok);
+ return [=] { return Script->getSymbolValue(Tok, Location); };
+Expr ScriptParser::readTernary(Expr Cond) {
+ Expr L = readExpr();
+ expect(":");
+ Expr R = readExpr();
+ return [=] { return Cond().getValue() ? L() : R(); };
+Expr ScriptParser::readParenExpr() {
+ expect("(");
+ Expr E = readExpr();
+ expect(")");
+ return E;
+std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
+ std::vector<StringRef> Phdrs;
+ while (!errorCount() && peek().startswith(":")) {
+ StringRef Tok = next();
+ Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1));
+ }
+ return Phdrs;
+// Read a program header type name. The next token must be a
+// name of a program header type or a constant (e.g. "0x3").
+unsigned ScriptParser::readPhdrType() {
+ StringRef Tok = next();
+ if (Optional<uint64_t> Val = parseInt(Tok))
+ return *Val;
+ unsigned Ret = StringSwitch<unsigned>(Tok)
+ .Case("PT_NULL", PT_NULL)
+ .Case("PT_LOAD", PT_LOAD)
+ .Case("PT_NOTE", PT_NOTE)
+ .Case("PT_PHDR", PT_PHDR)
+ .Case("PT_TLS", PT_TLS)
+ .Default(-1);
+ if (Ret == (unsigned)-1) {
+ setError("invalid program header type: " + Tok);
+ return PT_NULL;
+ }
+ return Ret;
+// Reads an anonymous version declaration.
+void ScriptParser::readAnonymousDeclaration() {
+ std::vector<SymbolVersion> Locals;
+ std::vector<SymbolVersion> Globals;
+ std::tie(Locals, Globals) = readSymbols();
+ for (SymbolVersion V : Locals) {
+ if (V.Name == "*")
+ Config->DefaultSymbolVersion = VER_NDX_LOCAL;
+ else
+ Config->VersionScriptLocals.push_back(V);
+ }
+ for (SymbolVersion V : Globals)
+ Config->VersionScriptGlobals.push_back(V);
+ expect(";");
+// Reads a non-anonymous version definition,
+// e.g. "VerStr { global: foo; bar; local: *; };".
+void ScriptParser::readVersionDeclaration(StringRef VerStr) {
+ // Read a symbol list.
+ std::vector<SymbolVersion> Locals;
+ std::vector<SymbolVersion> Globals;
+ std::tie(Locals, Globals) = readSymbols();
+ for (SymbolVersion V : Locals) {
+ if (V.Name == "*")
+ Config->DefaultSymbolVersion = VER_NDX_LOCAL;
+ else
+ Config->VersionScriptLocals.push_back(V);
+ }
+ // Create a new version definition and add that to the global symbols.
+ VersionDefinition Ver;
+ Ver.Name = VerStr;
+ Ver.Globals = Globals;
+ // User-defined version number starts from 2 because 0 and 1 are
+ // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively.
+ Ver.Id = Config->VersionDefinitions.size() + 2;
+ Config->VersionDefinitions.push_back(Ver);
+ // Each version may have a parent version. For example, "Ver2"
+ // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
+ // as a parent. This version hierarchy is, probably against your
+ // instinct, purely for hint; the runtime doesn't care about it
+ // at all. In LLD, we simply ignore it.
+ if (peek() != ";")
+ skip();
+ expect(";");
+static bool hasWildcard(StringRef S) {
+ return S.find_first_of("?*[") != StringRef::npos;
+// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
+std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
+ScriptParser::readSymbols() {
+ std::vector<SymbolVersion> Locals;
+ std::vector<SymbolVersion> Globals;
+ std::vector<SymbolVersion> *V = &Globals;
+ while (!errorCount()) {
+ if (consume("}"))
+ break;
+ if (consumeLabel("local")) {
+ V = &Locals;
+ continue;
+ }
+ if (consumeLabel("global")) {
+ V = &Globals;
+ continue;
+ }
+ if (consume("extern")) {
+ std::vector<SymbolVersion> Ext = readVersionExtern();
+ V->insert(V->end(), Ext.begin(), Ext.end());
+ } else {
+ StringRef Tok = next();
+ V->push_back({unquote(Tok), false, hasWildcard(Tok)});
+ }
+ expect(";");
+ }
+ return {Locals, Globals};
+// Reads an "extern C++" directive, e.g.,
+// "extern "C++" { ns::*; "f(int, double)"; };"
+// The last semicolon is optional. E.g. this is OK:
+// "extern "C++" { ns::*; "f(int, double)" };"
+std::vector<SymbolVersion> ScriptParser::readVersionExtern() {
+ StringRef Tok = next();
+ bool IsCXX = Tok == "\"C++\"";
+ if (!IsCXX && Tok != "\"C\"")
+ setError("Unknown language");
+ expect("{");
+ std::vector<SymbolVersion> Ret;
+ while (!errorCount() && peek() != "}") {
+ StringRef Tok = next();
+ bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok);
+ Ret.push_back({unquote(Tok), IsCXX, HasWildcard});
+ if (consume("}"))
+ return Ret;
+ expect(";");
+ }
+ expect("}");
+ return Ret;
+uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2,
+ StringRef S3) {
+ if (!consume(S1) && !consume(S2) && !consume(S3)) {
+ setError("expected one of: " + S1 + ", " + S2 + ", or " + S3);
+ return 0;
+ }
+ expect("=");
+ return readExpr()().getValue();
+// Parse the MEMORY command as specified in:
+// https://sourceware.org/binutils/docs/ld/MEMORY.html
+// MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
+void ScriptParser::readMemory() {
+ expect("{");
+ while (!errorCount() && !consume("}")) {
+ StringRef Tok = next();
+ if (Tok == "INCLUDE") {
+ readInclude();
+ continue;
+ }
+ uint32_t Flags = 0;
+ uint32_t NegFlags = 0;
+ if (consume("(")) {
+ std::tie(Flags, NegFlags) = readMemoryAttributes();
+ expect(")");
+ }
+ expect(":");
+ uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o");
+ expect(",");
+ uint64_t Length = readMemoryAssignment("LENGTH", "len", "l");
+ // Add the memory region to the region map.
+ MemoryRegion *MR = make<MemoryRegion>(Tok, Origin, Length, Flags, NegFlags);
+ if (!Script->MemoryRegions.insert({Tok, MR}).second)
+ setError("region '" + Tok + "' already defined");
+ }
+// This function parses the attributes used to match against section
+// flags when placing output sections in a memory region. These flags
+// are only used when an explicit memory region name is not used.
+std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
+ uint32_t Flags = 0;
+ uint32_t NegFlags = 0;
+ bool Invert = false;
+ for (char C : next().lower()) {
+ uint32_t Flag = 0;
+ if (C == '!')
+ Invert = !Invert;
+ else if (C == 'w')
+ Flag = SHF_WRITE;
+ else if (C == 'x')
+ else if (C == 'a')
+ Flag = SHF_ALLOC;
+ else if (C != 'r')
+ setError("invalid memory region attribute");
+ if (Invert)
+ NegFlags |= Flag;
+ else
+ Flags |= Flag;
+ }
+ return {Flags, NegFlags};
+void elf::readLinkerScript(MemoryBufferRef MB) {
+ ScriptParser(MB).readLinkerScript();
+void elf::readVersionScript(MemoryBufferRef MB) {
+ ScriptParser(MB).readVersionScript();
+void elf::readDynamicList(MemoryBufferRef MB) {
+ ScriptParser(MB).readDynamicList();
+void elf::readDefsym(StringRef Name, MemoryBufferRef MB) {
+ ScriptParser(MB).readDefsym(Name);
diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.h b/contrib/llvm/tools/lld/ELF/ScriptParser.h
new file mode 100644
index 000000000000..d48d5aa2115e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/ScriptParser.h
@@ -0,0 +1,34 @@
+//===- ScriptParser.h -------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+namespace lld {
+namespace elf {
+// Parses a linker script. Calling this function updates
+// Config and ScriptConfig.
+void readLinkerScript(MemoryBufferRef MB);
+// Parses a version script.
+void readVersionScript(MemoryBufferRef MB);
+void readDynamicList(MemoryBufferRef MB);
+// Parses the defsym expression.
+void readDefsym(StringRef Name, MemoryBufferRef MB);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp
new file mode 100644
index 000000000000..7615e12199fa
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp
@@ -0,0 +1,817 @@
+//===- SymbolTable.cpp ----------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Symbol table is a bag of all known symbols. We put all symbols of
+// all input files to the symbol table. The symbol table is basically
+// a hash table with the logic to resolve symbol name conflicts using
+// the symbol types.
+#include "SymbolTable.h"
+#include "Config.h"
+#include "LinkerScript.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+SymbolTable *elf::Symtab;
+static InputFile *getFirstElf() {
+ if (!ObjectFiles.empty())
+ return ObjectFiles[0];
+ if (!SharedFiles.empty())
+ return SharedFiles[0];
+ return BitcodeFiles[0];
+// All input object files must be for the same architecture
+// (e.g. it does not make sense to link x86 object files with
+// MIPS object files.) This function checks for that error.
+static bool isCompatible(InputFile *F) {
+ if (!F->isElf() && !isa<BitcodeFile>(F))
+ return true;
+ if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) {
+ if (Config->EMachine != EM_MIPS)
+ return true;
+ if (isMipsN32Abi(F) == Config->MipsN32Abi)
+ return true;
+ }
+ if (!Config->Emulation.empty())
+ error(toString(F) + " is incompatible with " + Config->Emulation);
+ else
+ error(toString(F) + " is incompatible with " + toString(getFirstElf()));
+ return false;
+// Add symbols in File to the symbol table.
+template <class ELFT> void SymbolTable::addFile(InputFile *File) {
+ if (!isCompatible(File))
+ return;
+ // Binary file
+ if (auto *F = dyn_cast<BinaryFile>(File)) {
+ BinaryFiles.push_back(F);
+ F->parse();
+ return;
+ }
+ // .a file
+ if (auto *F = dyn_cast<ArchiveFile>(File)) {
+ F->parse<ELFT>();
+ return;
+ }
+ // Lazy object file
+ if (auto *F = dyn_cast<LazyObjFile>(File)) {
+ LazyObjFiles.push_back(F);
+ F->parse<ELFT>();
+ return;
+ }
+ if (Config->Trace)
+ message(toString(File));
+ // .so file
+ if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) {
+ // DSOs are uniquified not by filename but by soname.
+ F->parseSoName();
+ if (errorCount())
+ return;
+ // If a DSO appears more than once on the command line with and without
+ // --as-needed, --no-as-needed takes precedence over --as-needed because a
+ // user can add an extra DSO with --no-as-needed to force it to be added to
+ // the dependency list.
+ DenseMap<StringRef, InputFile *>::iterator It;
+ bool WasInserted;
+ std::tie(It, WasInserted) = SoNames.try_emplace(F->SoName, F);
+ cast<SharedFile<ELFT>>(It->second)->IsNeeded |= F->IsNeeded;
+ if (!WasInserted)
+ return;
+ SharedFiles.push_back(F);
+ F->parseRest();
+ return;
+ }
+ // LLVM bitcode file
+ if (auto *F = dyn_cast<BitcodeFile>(File)) {
+ BitcodeFiles.push_back(F);
+ F->parse<ELFT>(ComdatGroups);
+ return;
+ }
+ // Regular object file
+ ObjectFiles.push_back(File);
+ cast<ObjFile<ELFT>>(File)->parse(ComdatGroups);
+// This function is where all the optimizations of link-time
+// optimization happens. When LTO is in use, some input files are
+// not in native object file format but in the LLVM bitcode format.
+// This function compiles bitcode files into a few big native files
+// using LLVM functions and replaces bitcode symbols with the results.
+// Because all bitcode files that the program consists of are passed
+// to the compiler at once, it can do whole-program optimization.
+template <class ELFT> void SymbolTable::addCombinedLTOObject() {
+ if (BitcodeFiles.empty())
+ return;
+ // Compile bitcode files and replace bitcode symbols.
+ LTO.reset(new BitcodeCompiler);
+ for (BitcodeFile *F : BitcodeFiles)
+ LTO->add(*F);
+ for (InputFile *File : LTO->compile()) {
+ DenseSet<CachedHashStringRef> DummyGroups;
+ auto *Obj = cast<ObjFile<ELFT>>(File);
+ Obj->parse(DummyGroups);
+ for (Symbol *Sym : Obj->getGlobalSymbols())
+ Sym->parseSymbolVersion();
+ ObjectFiles.push_back(File);
+ }
+// Set a flag for --trace-symbol so that we can print out a log message
+// if a new symbol with the same name is inserted into the symbol table.
+void SymbolTable::trace(StringRef Name) {
+ SymMap.insert({CachedHashStringRef(Name), -1});
+void SymbolTable::wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap) {
+ // Swap symbols as instructed by -wrap.
+ int &Idx1 = SymMap[CachedHashStringRef(Sym->getName())];
+ int &Idx2 = SymMap[CachedHashStringRef(Real->getName())];
+ int &Idx3 = SymMap[CachedHashStringRef(Wrap->getName())];
+ Idx2 = Idx1;
+ Idx1 = Idx3;
+ // Now renaming is complete. No one refers Real symbol. We could leave
+ // Real as-is, but if Real is written to the symbol table, that may
+ // contain irrelevant values. So, we copy all values from Sym to Real.
+ StringRef S = Real->getName();
+ memcpy(Real, Sym, sizeof(SymbolUnion));
+ Real->setName(S);
+static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
+ if (VA == STV_DEFAULT)
+ return VB;
+ if (VB == STV_DEFAULT)
+ return VA;
+ return std::min(VA, VB);
+// Find an existing symbol or create and insert a new one.
+std::pair<Symbol *, bool> SymbolTable::insertName(StringRef Name) {
+ // <name>@@<version> means the symbol is the default version. In that
+ // case <name>@@<version> will be used to resolve references to <name>.
+ //
+ // Since this is a hot path, the following string search code is
+ // optimized for speed. StringRef::find(char) is much faster than
+ // StringRef::find(StringRef).
+ size_t Pos = Name.find('@');
+ if (Pos != StringRef::npos && Pos + 1 < Name.size() && Name[Pos + 1] == '@')
+ Name = Name.take_front(Pos);
+ auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()});
+ int &SymIndex = P.first->second;
+ bool IsNew = P.second;
+ bool Traced = false;
+ if (SymIndex == -1) {
+ SymIndex = SymVector.size();
+ IsNew = true;
+ Traced = true;
+ }
+ if (!IsNew)
+ return {SymVector[SymIndex], false};
+ auto *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
+ Sym->SymbolKind = Symbol::PlaceholderKind;
+ Sym->Visibility = STV_DEFAULT;
+ Sym->IsUsedInRegularObj = false;
+ Sym->ExportDynamic = false;
+ Sym->CanInline = true;
+ Sym->Traced = Traced;
+ Sym->VersionId = Config->DefaultSymbolVersion;
+ SymVector.push_back(Sym);
+ return {Sym, true};
+// Find an existing symbol or create and insert a new one, then apply the given
+// attributes.
+std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name,
+ uint8_t Visibility,
+ bool CanOmitFromDynSym,
+ InputFile *File) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insertName(Name);
+ // Merge in the new symbol's visibility.
+ S->Visibility = getMinVisibility(S->Visibility, Visibility);
+ if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic))
+ S->ExportDynamic = true;
+ if (!File || File->kind() == InputFile::ObjKind)
+ S->IsUsedInRegularObj = true;
+ return {S, WasInserted};
+static uint8_t getVisibility(uint8_t StOther) { return StOther & 3; }
+template <class ELFT>
+Symbol *SymbolTable::addUndefined(StringRef Name, uint8_t Binding,
+ uint8_t StOther, uint8_t Type,
+ bool CanOmitFromDynSym, InputFile *File) {
+ Symbol *S;
+ bool WasInserted;
+ uint8_t Visibility = getVisibility(StOther);
+ std::tie(S, WasInserted) = insert(Name, Visibility, CanOmitFromDynSym, File);
+ // An undefined symbol with non default visibility must be satisfied
+ // in the same DSO.
+ if (WasInserted || (isa<SharedSymbol>(S) && Visibility != STV_DEFAULT)) {
+ replaceSymbol<Undefined>(S, File, Name, Binding, StOther, Type);
+ return S;
+ }
+ if (S->isShared() || S->isLazy() || (S->isUndefined() && Binding != STB_WEAK))
+ S->Binding = Binding;
+ if (S->isLazy()) {
+ // An undefined weak will not fetch archive members. See comment on Lazy in
+ // Symbols.h for the details.
+ if (Binding == STB_WEAK) {
+ S->Type = Type;
+ return S;
+ }
+ // Do extra check for --warn-backrefs.
+ //
+ // --warn-backrefs is an option to prevent an undefined reference from
+ // fetching an archive member written earlier in the command line. It can be
+ // used to keep compatibility with GNU linkers to some degree.
+ // I'll explain the feature and why you may find it useful in this comment.
+ //
+ // lld's symbol resolution semantics is more relaxed than traditional Unix
+ // linkers. For example,
+ //
+ // ld.lld foo.a bar.o
+ //
+ // succeeds even if bar.o contains an undefined symbol that has to be
+ // resolved by some object file in foo.a. Traditional Unix linkers don't
+ // allow this kind of backward reference, as they visit each file only once
+ // from left to right in the command line while resolving all undefined
+ // symbols at the moment of visiting.
+ //
+ // In the above case, since there's no undefined symbol when a linker visits
+ // foo.a, no files are pulled out from foo.a, and because the linker forgets
+ // about foo.a after visiting, it can't resolve undefined symbols in bar.o
+ // that could have been resolved otherwise.
+ //
+ // That lld accepts more relaxed form means that (besides it'd make more
+ // sense) you can accidentally write a command line or a build file that
+ // works only with lld, even if you have a plan to distribute it to wider
+ // users who may be using GNU linkers. With --warn-backrefs, you can detect
+ // a library order that doesn't work with other Unix linkers.
+ //
+ // The option is also useful to detect cyclic dependencies between static
+ // archives. Again, lld accepts
+ //
+ // ld.lld foo.a bar.a
+ //
+ // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
+ // handled as an error.
+ //
+ // Here is how the option works. We assign a group ID to each file. A file
+ // with a smaller group ID can pull out object files from an archive file
+ // with an equal or greater group ID. Otherwise, it is a reverse dependency
+ // and an error.
+ //
+ // A file outside --{start,end}-group gets a fresh ID when instantiated. All
+ // files within the same --{start,end}-group get the same group ID. E.g.
+ //
+ // ld.lld A B --start-group C D --end-group E
+ //
+ // A forms group 0. B form group 1. C and D (including their member object
+ // files) form group 2. E forms group 3. I think that you can see how this
+ // group assignment rule simulates the traditional linker's semantics.
+ bool Backref =
+ Config->WarnBackrefs && File && S->File->GroupId < File->GroupId;
+ fetchLazy<ELFT>(S);
+ // We don't report backward references to weak symbols as they can be
+ // overridden later.
+ if (Backref && S->Binding != STB_WEAK)
+ warn("backward reference detected: " + Name + " in " + toString(File) +
+ " refers to " + toString(S->File));
+ }
+ return S;
+// Using .symver foo,foo@@VER unfortunately creates two symbols: foo and
+// foo@@VER. We want to effectively ignore foo, so give precedence to
+// foo@@VER.
+// FIXME: If users can transition to using
+// .symver foo,foo@@@VER
+// we can delete this hack.
+static int compareVersion(Symbol *S, StringRef Name) {
+ bool A = Name.contains("@@");
+ bool B = S->getName().contains("@@");
+ if (A && !B)
+ return 1;
+ if (!A && B)
+ return -1;
+ return 0;
+// We have a new defined symbol with the specified binding. Return 1 if the new
+// symbol should win, -1 if the new symbol should lose, or 0 if both symbols are
+// strong defined symbols.
+static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding,
+ StringRef Name) {
+ if (WasInserted)
+ return 1;
+ if (!S->isDefined())
+ return 1;
+ if (int R = compareVersion(S, Name))
+ return R;
+ if (Binding == STB_WEAK)
+ return -1;
+ if (S->isWeak())
+ return 1;
+ return 0;
+// We have a new non-common defined symbol with the specified binding. Return 1
+// if the new symbol should win, -1 if the new symbol should lose, or 0 if there
+// is a conflict. If the new symbol wins, also update the binding.
+static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding,
+ bool IsAbsolute, uint64_t Value,
+ StringRef Name) {
+ if (int Cmp = compareDefined(S, WasInserted, Binding, Name))
+ return Cmp;
+ if (auto *R = dyn_cast<Defined>(S)) {
+ if (R->Section && isa<BssSection>(R->Section)) {
+ // Non-common symbols take precedence over common symbols.
+ if (Config->WarnCommon)
+ warn("common " + S->getName() + " is overridden");
+ return 1;
+ }
+ if (R->Section == nullptr && Binding == STB_GLOBAL && IsAbsolute &&
+ R->Value == Value)
+ return -1;
+ }
+ return 0;
+Symbol *SymbolTable::addCommon(StringRef N, uint64_t Size, uint32_t Alignment,
+ uint8_t Binding, uint8_t StOther, uint8_t Type,
+ InputFile &File) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(N, getVisibility(StOther),
+ /*CanOmitFromDynSym*/ false, &File);
+ int Cmp = compareDefined(S, WasInserted, Binding, N);
+ if (Cmp < 0)
+ return S;
+ if (Cmp > 0) {
+ auto *Bss = make<BssSection>("COMMON", Size, Alignment);
+ Bss->File = &File;
+ Bss->Live = !Config->GcSections;
+ InputSections.push_back(Bss);
+ replaceSymbol<Defined>(S, &File, N, Binding, StOther, Type, 0, Size, Bss);
+ return S;
+ }
+ auto *D = cast<Defined>(S);
+ auto *Bss = dyn_cast_or_null<BssSection>(D->Section);
+ if (!Bss) {
+ // Non-common symbols take precedence over common symbols.
+ if (Config->WarnCommon)
+ warn("common " + S->getName() + " is overridden");
+ return S;
+ }
+ if (Config->WarnCommon)
+ warn("multiple common of " + D->getName());
+ Bss->Alignment = std::max(Bss->Alignment, Alignment);
+ if (Size > Bss->Size) {
+ D->File = Bss->File = &File;
+ D->Size = Bss->Size = Size;
+ }
+ return S;
+static void reportDuplicate(Symbol *Sym, InputFile *NewFile,
+ InputSectionBase *ErrSec, uint64_t ErrOffset) {
+ if (Config->AllowMultipleDefinition)
+ return;
+ Defined *D = cast<Defined>(Sym);
+ if (!D->Section || !ErrSec) {
+ error("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " +
+ toString(Sym->File) + "\n>>> defined in " + toString(NewFile));
+ return;
+ }
+ // Construct and print an error message in the form of:
+ //
+ // ld.lld: error: duplicate symbol: foo
+ // >>> defined at bar.c:30
+ // >>> bar.o (/home/alice/src/bar.o)
+ // >>> defined at baz.c:563
+ // >>> baz.o in archive libbaz.a
+ auto *Sec1 = cast<InputSectionBase>(D->Section);
+ std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value);
+ std::string Obj1 = Sec1->getObjMsg(D->Value);
+ std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset);
+ std::string Obj2 = ErrSec->getObjMsg(ErrOffset);
+ std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at ";
+ if (!Src1.empty())
+ Msg += Src1 + "\n>>> ";
+ Msg += Obj1 + "\n>>> defined at ";
+ if (!Src2.empty())
+ Msg += Src2 + "\n>>> ";
+ Msg += Obj2;
+ error(Msg);
+Defined *SymbolTable::addDefined(StringRef Name, uint8_t StOther, uint8_t Type,
+ uint64_t Value, uint64_t Size, uint8_t Binding,
+ SectionBase *Section, InputFile *File) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name, getVisibility(StOther),
+ /*CanOmitFromDynSym*/ false, File);
+ int Cmp = compareDefinedNonCommon(S, WasInserted, Binding, Section == nullptr,
+ Value, Name);
+ if (Cmp > 0)
+ replaceSymbol<Defined>(S, File, Name, Binding, StOther, Type, Value, Size,
+ Section);
+ else if (Cmp == 0)
+ reportDuplicate(S, File, dyn_cast_or_null<InputSectionBase>(Section),
+ Value);
+ return cast<Defined>(S);
+template <typename ELFT>
+void SymbolTable::addShared(StringRef Name, SharedFile<ELFT> &File,
+ const typename ELFT::Sym &Sym, uint32_t Alignment,
+ uint32_t VerdefIndex) {
+ // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT
+ // as the visibility, which will leave the visibility in the symbol table
+ // unchanged.
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name, STV_DEFAULT,
+ /*CanOmitFromDynSym*/ true, &File);
+ // Make sure we preempt DSO symbols with default visibility.
+ if (Sym.getVisibility() == STV_DEFAULT)
+ S->ExportDynamic = true;
+ // An undefined symbol with non default visibility must be satisfied
+ // in the same DSO.
+ auto Replace = [&](uint8_t Binding) {
+ replaceSymbol<SharedSymbol>(S, File, Name, Binding, Sym.st_other,
+ Sym.getType(), Sym.st_value, Sym.st_size,
+ Alignment, VerdefIndex);
+ };
+ if (WasInserted)
+ Replace(Sym.getBinding());
+ else if (S->Visibility == STV_DEFAULT && (S->isUndefined() || S->isLazy()))
+ Replace(S->Binding);
+Symbol *SymbolTable::addBitcode(StringRef Name, uint8_t Binding,
+ uint8_t StOther, uint8_t Type,
+ bool CanOmitFromDynSym, BitcodeFile &F) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) =
+ insert(Name, getVisibility(StOther), CanOmitFromDynSym, &F);
+ int Cmp = compareDefinedNonCommon(S, WasInserted, Binding,
+ /*IsAbs*/ false, /*Value*/ 0, Name);
+ if (Cmp > 0)
+ replaceSymbol<Defined>(S, &F, Name, Binding, StOther, Type, 0, 0, nullptr);
+ else if (Cmp == 0)
+ reportDuplicate(S, &F, nullptr, 0);
+ return S;
+Symbol *SymbolTable::find(StringRef Name) {
+ auto It = SymMap.find(CachedHashStringRef(Name));
+ if (It == SymMap.end())
+ return nullptr;
+ if (It->second == -1)
+ return nullptr;
+ return SymVector[It->second];
+template <class ELFT>
+void SymbolTable::addLazyArchive(StringRef Name, ArchiveFile &File,
+ const object::Archive::Symbol Sym) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insertName(Name);
+ if (WasInserted) {
+ replaceSymbol<LazyArchive>(S, File, STT_NOTYPE, Sym);
+ return;
+ }
+ if (!S->isUndefined())
+ return;
+ // An undefined weak will not fetch archive members. See comment on Lazy in
+ // Symbols.h for the details.
+ if (S->isWeak()) {
+ replaceSymbol<LazyArchive>(S, File, S->Type, Sym);
+ S->Binding = STB_WEAK;
+ return;
+ }
+ if (InputFile *F = File.fetch(Sym))
+ addFile<ELFT>(F);
+template <class ELFT>
+void SymbolTable::addLazyObject(StringRef Name, LazyObjFile &File) {
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insertName(Name);
+ if (WasInserted) {
+ replaceSymbol<LazyObject>(S, File, STT_NOTYPE, Name);
+ return;
+ }
+ if (!S->isUndefined())
+ return;
+ // An undefined weak will not fetch archive members. See comment on Lazy in
+ // Symbols.h for the details.
+ if (S->isWeak()) {
+ replaceSymbol<LazyObject>(S, File, S->Type, Name);
+ S->Binding = STB_WEAK;
+ return;
+ }
+ if (InputFile *F = File.fetch())
+ addFile<ELFT>(F);
+template <class ELFT> void SymbolTable::fetchLazy(Symbol *Sym) {
+ if (auto *S = dyn_cast<LazyArchive>(Sym)) {
+ if (InputFile *File = S->fetch())
+ addFile<ELFT>(File);
+ return;
+ }
+ auto *S = cast<LazyObject>(Sym);
+ if (InputFile *File = cast<LazyObjFile>(S->File)->fetch())
+ addFile<ELFT>(File);
+// Initialize DemangledSyms with a map from demangled symbols to symbol
+// objects. Used to handle "extern C++" directive in version scripts.
+// The map will contain all demangled symbols. That can be very large,
+// and in LLD we generally want to avoid do anything for each symbol.
+// Then, why are we doing this? Here's why.
+// Users can use "extern C++ {}" directive to match against demangled
+// C++ symbols. For example, you can write a pattern such as
+// "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
+// other than trying to match a pattern against all demangled symbols.
+// So, if "extern C++" feature is used, we need to demangle all known
+// symbols.
+StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() {
+ if (!DemangledSyms) {
+ DemangledSyms.emplace();
+ for (Symbol *Sym : SymVector) {
+ if (!Sym->isDefined())
+ continue;
+ if (Optional<std::string> S = demangleItanium(Sym->getName()))
+ (*DemangledSyms)[*S].push_back(Sym);
+ else
+ (*DemangledSyms)[Sym->getName()].push_back(Sym);
+ }
+ }
+ return *DemangledSyms;
+std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion Ver) {
+ if (Ver.IsExternCpp)
+ return getDemangledSyms().lookup(Ver.Name);
+ if (Symbol *B = find(Ver.Name))
+ if (B->isDefined())
+ return {B};
+ return {};
+std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion Ver) {
+ std::vector<Symbol *> Res;
+ StringMatcher M(Ver.Name);
+ if (Ver.IsExternCpp) {
+ for (auto &P : getDemangledSyms())
+ if (M.match(P.first()))
+ Res.insert(Res.end(), P.second.begin(), P.second.end());
+ return Res;
+ }
+ for (Symbol *Sym : SymVector)
+ if (Sym->isDefined() && M.match(Sym->getName()))
+ Res.push_back(Sym);
+ return Res;
+// If there's only one anonymous version definition in a version
+// script file, the script does not actually define any symbol version,
+// but just specifies symbols visibilities.
+void SymbolTable::handleAnonymousVersion() {
+ for (SymbolVersion &Ver : Config->VersionScriptGlobals)
+ assignExactVersion(Ver, VER_NDX_GLOBAL, "global");
+ for (SymbolVersion &Ver : Config->VersionScriptGlobals)
+ assignWildcardVersion(Ver, VER_NDX_GLOBAL);
+ for (SymbolVersion &Ver : Config->VersionScriptLocals)
+ assignExactVersion(Ver, VER_NDX_LOCAL, "local");
+ for (SymbolVersion &Ver : Config->VersionScriptLocals)
+ assignWildcardVersion(Ver, VER_NDX_LOCAL);
+// Handles -dynamic-list.
+void SymbolTable::handleDynamicList() {
+ for (SymbolVersion &Ver : Config->DynamicList) {
+ std::vector<Symbol *> Syms;
+ if (Ver.HasWildcard)
+ Syms = findAllByVersion(Ver);
+ else
+ Syms = findByVersion(Ver);
+ for (Symbol *B : Syms) {
+ if (!Config->Shared)
+ B->ExportDynamic = true;
+ else if (B->includeInDynsym())
+ B->IsPreemptible = true;
+ }
+ }
+// Set symbol versions to symbols. This function handles patterns
+// containing no wildcard characters.
+void SymbolTable::assignExactVersion(SymbolVersion Ver, uint16_t VersionId,
+ StringRef VersionName) {
+ if (Ver.HasWildcard)
+ return;
+ // Get a list of symbols which we need to assign the version to.
+ std::vector<Symbol *> Syms = findByVersion(Ver);
+ if (Syms.empty()) {
+ if (!Config->UndefinedVersion)
+ error("version script assignment of '" + VersionName + "' to symbol '" +
+ Ver.Name + "' failed: symbol not defined");
+ return;
+ }
+ // Assign the version.
+ for (Symbol *Sym : Syms) {
+ // Skip symbols containing version info because symbol versions
+ // specified by symbol names take precedence over version scripts.
+ // See parseSymbolVersion().
+ if (Sym->getName().contains('@'))
+ continue;
+ if (Sym->VersionId != Config->DefaultSymbolVersion &&
+ Sym->VersionId != VersionId)
+ error("duplicate symbol '" + Ver.Name + "' in version script");
+ Sym->VersionId = VersionId;
+ }
+void SymbolTable::assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId) {
+ if (!Ver.HasWildcard)
+ return;
+ // Exact matching takes precendence over fuzzy matching,
+ // so we set a version to a symbol only if no version has been assigned
+ // to the symbol. This behavior is compatible with GNU.
+ for (Symbol *B : findAllByVersion(Ver))
+ if (B->VersionId == Config->DefaultSymbolVersion)
+ B->VersionId = VersionId;
+// This function processes version scripts by updating VersionId
+// member of symbols.
+void SymbolTable::scanVersionScript() {
+ // Handle edge cases first.
+ handleAnonymousVersion();
+ handleDynamicList();
+ // Now we have version definitions, so we need to set version ids to symbols.
+ // Each version definition has a glob pattern, and all symbols that match
+ // with the pattern get that version.
+ // First, we assign versions to exact matching symbols,
+ // i.e. version definitions not containing any glob meta-characters.
+ for (VersionDefinition &V : Config->VersionDefinitions)
+ for (SymbolVersion &Ver : V.Globals)
+ assignExactVersion(Ver, V.Id, V.Name);
+ // Next, we assign versions to fuzzy matching symbols,
+ // i.e. version definitions containing glob meta-characters.
+ // Note that because the last match takes precedence over previous matches,
+ // we iterate over the definitions in the reverse order.
+ for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions))
+ for (SymbolVersion &Ver : V.Globals)
+ assignWildcardVersion(Ver, V.Id);
+ // Symbol themselves might know their versions because symbols
+ // can contain versions in the form of <name>@<version>.
+ // Let them parse and update their names to exclude version suffix.
+ for (Symbol *Sym : SymVector)
+ Sym->parseSymbolVersion();
+template void SymbolTable::addFile<ELF32LE>(InputFile *);
+template void SymbolTable::addFile<ELF32BE>(InputFile *);
+template void SymbolTable::addFile<ELF64LE>(InputFile *);
+template void SymbolTable::addFile<ELF64BE>(InputFile *);
+template Symbol *SymbolTable::addUndefined<ELF32LE>(StringRef, uint8_t, uint8_t,
+ uint8_t, bool, InputFile *);
+template Symbol *SymbolTable::addUndefined<ELF32BE>(StringRef, uint8_t, uint8_t,
+ uint8_t, bool, InputFile *);
+template Symbol *SymbolTable::addUndefined<ELF64LE>(StringRef, uint8_t, uint8_t,
+ uint8_t, bool, InputFile *);
+template Symbol *SymbolTable::addUndefined<ELF64BE>(StringRef, uint8_t, uint8_t,
+ uint8_t, bool, InputFile *);
+template void SymbolTable::addCombinedLTOObject<ELF32LE>();
+template void SymbolTable::addCombinedLTOObject<ELF32BE>();
+template void SymbolTable::addCombinedLTOObject<ELF64LE>();
+template void SymbolTable::addCombinedLTOObject<ELF64BE>();
+template void
+SymbolTable::addLazyArchive<ELF32LE>(StringRef, ArchiveFile &,
+ const object::Archive::Symbol);
+template void
+SymbolTable::addLazyArchive<ELF32BE>(StringRef, ArchiveFile &,
+ const object::Archive::Symbol);
+template void
+SymbolTable::addLazyArchive<ELF64LE>(StringRef, ArchiveFile &,
+ const object::Archive::Symbol);
+template void
+SymbolTable::addLazyArchive<ELF64BE>(StringRef, ArchiveFile &,
+ const object::Archive::Symbol);
+template void SymbolTable::addLazyObject<ELF32LE>(StringRef, LazyObjFile &);
+template void SymbolTable::addLazyObject<ELF32BE>(StringRef, LazyObjFile &);
+template void SymbolTable::addLazyObject<ELF64LE>(StringRef, LazyObjFile &);
+template void SymbolTable::addLazyObject<ELF64BE>(StringRef, LazyObjFile &);
+template void SymbolTable::fetchLazy<ELF32LE>(Symbol *);
+template void SymbolTable::fetchLazy<ELF32BE>(Symbol *);
+template void SymbolTable::fetchLazy<ELF64LE>(Symbol *);
+template void SymbolTable::fetchLazy<ELF64BE>(Symbol *);
+template void SymbolTable::addShared<ELF32LE>(StringRef, SharedFile<ELF32LE> &,
+ const typename ELF32LE::Sym &,
+ uint32_t Alignment, uint32_t);
+template void SymbolTable::addShared<ELF32BE>(StringRef, SharedFile<ELF32BE> &,
+ const typename ELF32BE::Sym &,
+ uint32_t Alignment, uint32_t);
+template void SymbolTable::addShared<ELF64LE>(StringRef, SharedFile<ELF64LE> &,
+ const typename ELF64LE::Sym &,
+ uint32_t Alignment, uint32_t);
+template void SymbolTable::addShared<ELF64BE>(StringRef, SharedFile<ELF64BE> &,
+ const typename ELF64BE::Sym &,
+ uint32_t Alignment, uint32_t);
diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.h b/contrib/llvm/tools/lld/ELF/SymbolTable.h
new file mode 100644
index 000000000000..898185fc9612
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/SymbolTable.h
@@ -0,0 +1,127 @@
+//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputFiles.h"
+#include "LTO.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/DenseMap.h"
+namespace lld {
+namespace elf {
+class Defined;
+class SectionBase;
+// SymbolTable is a bucket of all known symbols, including defined,
+// undefined, or lazy symbols (the last one is symbols in archive
+// files whose archive members are not yet loaded).
+// We put all symbols of all files to a SymbolTable, and the
+// SymbolTable selects the "best" symbols if there are name
+// conflicts. For example, obviously, a defined symbol is better than
+// an undefined symbol. Or, if there's a conflict between a lazy and a
+// undefined, it'll read an archive member to read a real definition
+// to replace the lazy symbol. The logic is implemented in the
+// add*() functions, which are called by input files as they are parsed. There
+// is one add* function per symbol type.
+class SymbolTable {
+ template <class ELFT> void addFile(InputFile *File);
+ template <class ELFT> void addCombinedLTOObject();
+ void wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap);
+ ArrayRef<Symbol *> getSymbols() const { return SymVector; }
+ template <class ELFT>
+ Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
+ uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
+ Defined *addDefined(StringRef Name, uint8_t StOther, uint8_t Type,
+ uint64_t Value, uint64_t Size, uint8_t Binding,
+ SectionBase *Section, InputFile *File);
+ template <class ELFT>
+ void addShared(StringRef Name, SharedFile<ELFT> &F,
+ const typename ELFT::Sym &Sym, uint32_t Alignment,
+ uint32_t VerdefIndex);
+ template <class ELFT>
+ void addLazyArchive(StringRef Name, ArchiveFile &F,
+ const llvm::object::Archive::Symbol S);
+ template <class ELFT> void addLazyObject(StringRef Name, LazyObjFile &Obj);
+ Symbol *addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther,
+ uint8_t Type, bool CanOmitFromDynSym, BitcodeFile &File);
+ Symbol *addCommon(StringRef Name, uint64_t Size, uint32_t Alignment,
+ uint8_t Binding, uint8_t StOther, uint8_t Type,
+ InputFile &File);
+ std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Visibility,
+ bool CanOmitFromDynSym, InputFile *File);
+ template <class ELFT> void fetchLazy(Symbol *Sym);
+ void scanVersionScript();
+ Symbol *find(StringRef Name);
+ void trace(StringRef Name);
+ void handleDynamicList();
+ std::pair<Symbol *, bool> insertName(StringRef Name);
+ std::vector<Symbol *> findByVersion(SymbolVersion Ver);
+ std::vector<Symbol *> findAllByVersion(SymbolVersion Ver);
+ llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms();
+ void handleAnonymousVersion();
+ void assignExactVersion(SymbolVersion Ver, uint16_t VersionId,
+ StringRef VersionName);
+ void assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId);
+ // The order the global symbols are in is not defined. We can use an arbitrary
+ // order, but it has to be reproducible. That is true even when cross linking.
+ // The default hashing of StringRef produces different results on 32 and 64
+ // bit systems so we use a map to a vector. That is arbitrary, deterministic
+ // but a bit inefficient.
+ // FIXME: Experiment with passing in a custom hashing or sorting the symbols
+ // once symbol resolution is finished.
+ llvm::DenseMap<llvm::CachedHashStringRef, int> SymMap;
+ std::vector<Symbol *> SymVector;
+ // Comdat groups define "link once" sections. If two comdat groups have the
+ // same name, only one of them is linked, and the other is ignored. This set
+ // is used to uniquify them.
+ llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups;
+ // Set of .so files to not link the same shared object file more than once.
+ llvm::DenseMap<StringRef, InputFile *> SoNames;
+ // A map from demangled symbol names to their symbol objects.
+ // This mapping is 1:N because two symbols with different versions
+ // can have the same name. We use this map to handle "extern C++ {}"
+ // directive in version scripts.
+ llvm::Optional<llvm::StringMap<std::vector<Symbol *>>> DemangledSyms;
+ // For LTO.
+ std::unique_ptr<BitcodeCompiler> LTO;
+extern SymbolTable *Symtab;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp
new file mode 100644
index 000000000000..a713ec539d82
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp
@@ -0,0 +1,305 @@
+//===- Symbols.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Symbols.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "OutputSections.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "Writer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Path.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+Defined *ElfSym::Bss;
+Defined *ElfSym::Etext1;
+Defined *ElfSym::Etext2;
+Defined *ElfSym::Edata1;
+Defined *ElfSym::Edata2;
+Defined *ElfSym::End1;
+Defined *ElfSym::End2;
+Defined *ElfSym::GlobalOffsetTable;
+Defined *ElfSym::MipsGp;
+Defined *ElfSym::MipsGpDisp;
+Defined *ElfSym::MipsLocalGp;
+Defined *ElfSym::RelaIpltStart;
+Defined *ElfSym::RelaIpltEnd;
+static uint64_t getSymVA(const Symbol &Sym, int64_t &Addend) {
+ switch (Sym.kind()) {
+ case Symbol::DefinedKind: {
+ auto &D = cast<Defined>(Sym);
+ SectionBase *IS = D.Section;
+ // According to the ELF spec reference to a local symbol from outside
+ // the group are not allowed. Unfortunately .eh_frame breaks that rule
+ // and must be treated specially. For now we just replace the symbol with
+ // 0.
+ if (IS == &InputSection::Discarded)
+ return 0;
+ // This is an absolute symbol.
+ if (!IS)
+ return D.Value;
+ IS = IS->Repl;
+ uint64_t Offset = D.Value;
+ // An object in an SHF_MERGE section might be referenced via a
+ // section symbol (as a hack for reducing the number of local
+ // symbols).
+ // Depending on the addend, the reference via a section symbol
+ // refers to a different object in the merge section.
+ // Since the objects in the merge section are not necessarily
+ // contiguous in the output, the addend can thus affect the final
+ // VA in a non-linear way.
+ // To make this work, we incorporate the addend into the section
+ // offset (and zero out the addend for later processing) so that
+ // we find the right object in the section.
+ if (D.isSection()) {
+ Offset += Addend;
+ Addend = 0;
+ }
+ // In the typical case, this is actually very simple and boils
+ // down to adding together 3 numbers:
+ // 1. The address of the output section.
+ // 2. The offset of the input section within the output section.
+ // 3. The offset within the input section (this addition happens
+ // inside InputSection::getOffset).
+ //
+ // If you understand the data structures involved with this next
+ // line (and how they get built), then you have a pretty good
+ // understanding of the linker.
+ uint64_t VA = IS->getVA(Offset);
+ if (D.isTls() && !Config->Relocatable) {
+ // Use the address of the TLS segment's first section rather than the
+ // segment's address, because segment addresses aren't initialized until
+ // after sections are finalized. (e.g. Measuring the size of .rela.dyn
+ // for Android relocation packing requires knowing TLS symbol addresses
+ // during section finalization.)
+ if (!Out::TlsPhdr || !Out::TlsPhdr->FirstSec)
+ fatal(toString(D.File) +
+ " has an STT_TLS symbol but doesn't have an SHF_TLS section");
+ return VA - Out::TlsPhdr->FirstSec->Addr;
+ }
+ return VA;
+ }
+ case Symbol::SharedKind:
+ case Symbol::UndefinedKind:
+ return 0;
+ case Symbol::LazyArchiveKind:
+ case Symbol::LazyObjectKind:
+ assert(Sym.IsUsedInRegularObj && "lazy symbol reached writer");
+ return 0;
+ case Symbol::PlaceholderKind:
+ llvm_unreachable("placeholder symbol reached writer");
+ }
+ llvm_unreachable("invalid symbol kind");
+uint64_t Symbol::getVA(int64_t Addend) const {
+ uint64_t OutVA = getSymVA(*this, Addend);
+ return OutVA + Addend;
+uint64_t Symbol::getGotVA() const { return In.Got->getVA() + getGotOffset(); }
+uint64_t Symbol::getGotOffset() const {
+ return GotIndex * Target->GotEntrySize;
+uint64_t Symbol::getGotPltVA() const {
+ if (this->IsInIgot)
+ return In.IgotPlt->getVA() + getGotPltOffset();
+ return In.GotPlt->getVA() + getGotPltOffset();
+uint64_t Symbol::getGotPltOffset() const {
+ if (IsInIgot)
+ return PltIndex * Target->GotPltEntrySize;
+ return (PltIndex + Target->GotPltHeaderEntriesNum) * Target->GotPltEntrySize;
+uint64_t Symbol::getPPC64LongBranchOffset() const {
+ assert(PPC64BranchltIndex != 0xffff);
+ return PPC64BranchltIndex * Target->GotPltEntrySize;
+uint64_t Symbol::getPltVA() const {
+ PltSection *Plt = IsInIplt ? In.Iplt : In.Plt;
+ return Plt->getVA() + Plt->HeaderSize + PltIndex * Target->PltEntrySize;
+uint64_t Symbol::getPPC64LongBranchTableVA() const {
+ assert(PPC64BranchltIndex != 0xffff);
+ return In.PPC64LongBranchTarget->getVA() +
+ PPC64BranchltIndex * Target->GotPltEntrySize;
+uint64_t Symbol::getSize() const {
+ if (const auto *DR = dyn_cast<Defined>(this))
+ return DR->Size;
+ return cast<SharedSymbol>(this)->Size;
+OutputSection *Symbol::getOutputSection() const {
+ if (auto *S = dyn_cast<Defined>(this)) {
+ if (auto *Sec = S->Section)
+ return Sec->Repl->getOutputSection();
+ return nullptr;
+ }
+ return nullptr;
+// If a symbol name contains '@', the characters after that is
+// a symbol version name. This function parses that.
+void Symbol::parseSymbolVersion() {
+ StringRef S = getName();
+ size_t Pos = S.find('@');
+ if (Pos == 0 || Pos == StringRef::npos)
+ return;
+ StringRef Verstr = S.substr(Pos + 1);
+ if (Verstr.empty())
+ return;
+ // Truncate the symbol name so that it doesn't include the version string.
+ NameSize = Pos;
+ // If this is not in this DSO, it is not a definition.
+ if (!isDefined())
+ return;
+ // '@@' in a symbol name means the default version.
+ // It is usually the most recent one.
+ bool IsDefault = (Verstr[0] == '@');
+ if (IsDefault)
+ Verstr = Verstr.substr(1);
+ for (VersionDefinition &Ver : Config->VersionDefinitions) {
+ if (Ver.Name != Verstr)
+ continue;
+ if (IsDefault)
+ VersionId = Ver.Id;
+ else
+ VersionId = Ver.Id | VERSYM_HIDDEN;
+ return;
+ }
+ // It is an error if the specified version is not defined.
+ // Usually version script is not provided when linking executable,
+ // but we may still want to override a versioned symbol from DSO,
+ // so we do not report error in this case. We also do not error
+ // if the symbol has a local version as it won't be in the dynamic
+ // symbol table.
+ if (Config->Shared && VersionId != VER_NDX_LOCAL)
+ error(toString(File) + ": symbol " + S + " has undefined version " +
+ Verstr);
+InputFile *LazyArchive::fetch() { return cast<ArchiveFile>(File)->fetch(Sym); }
+MemoryBufferRef LazyArchive::getMemberBuffer() {
+ Archive::Child C = CHECK(
+ Sym.getMember(), "could not get the member for symbol " + Sym.getName());
+ return CHECK(C.getMemoryBufferRef(),
+ "could not get the buffer for the member defining symbol " +
+ Sym.getName());
+uint8_t Symbol::computeBinding() const {
+ if (Config->Relocatable)
+ return Binding;
+ if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED)
+ return STB_LOCAL;
+ if (VersionId == VER_NDX_LOCAL && isDefined() && !IsPreemptible)
+ return STB_LOCAL;
+ if (!Config->GnuUnique && Binding == STB_GNU_UNIQUE)
+ return STB_GLOBAL;
+ return Binding;
+bool Symbol::includeInDynsym() const {
+ if (!Config->HasDynSymTab)
+ return false;
+ if (computeBinding() == STB_LOCAL)
+ return false;
+ if (!isDefined())
+ return true;
+ return ExportDynamic;
+// Print out a log message for --trace-symbol.
+void elf::printTraceSymbol(Symbol *Sym) {
+ std::string S;
+ if (Sym->isUndefined())
+ S = ": reference to ";
+ else if (Sym->isLazy())
+ S = ": lazy definition of ";
+ else if (Sym->isShared())
+ S = ": shared definition of ";
+ else if (dyn_cast_or_null<BssSection>(cast<Defined>(Sym)->Section))
+ S = ": common definition of ";
+ else
+ S = ": definition of ";
+ message(toString(Sym->File) + S + Sym->getName());
+void elf::maybeWarnUnorderableSymbol(const Symbol *Sym) {
+ if (!Config->WarnSymbolOrdering)
+ return;
+ // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning
+ // is emitted. It makes sense to not warn on undefined symbols.
+ //
+ // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols,
+ // but we don't have to be compatible here.
+ if (Sym->isUndefined() &&
+ Config->UnresolvedSymbols == UnresolvedPolicy::Ignore)
+ return;
+ const InputFile *File = Sym->File;
+ auto *D = dyn_cast<Defined>(Sym);
+ auto Warn = [&](StringRef S) { warn(toString(File) + S + Sym->getName()); };
+ if (Sym->isUndefined())
+ Warn(": unable to order undefined symbol: ");
+ else if (Sym->isShared())
+ Warn(": unable to order shared symbol: ");
+ else if (D && !D->Section)
+ Warn(": unable to order absolute symbol: ");
+ else if (D && isa<OutputSection>(D->Section))
+ Warn(": unable to order synthetic symbol: ");
+ else if (D && !D->Section->Repl->Live)
+ Warn(": unable to order discarded symbol: ");
+// Returns a symbol for an error message.
+std::string lld::toString(const Symbol &B) {
+ if (Config->Demangle)
+ if (Optional<std::string> S = demangleItanium(B.getName()))
+ return *S;
+ return B.getName();
diff --git a/contrib/llvm/tools/lld/ELF/Symbols.h b/contrib/llvm/tools/lld/ELF/Symbols.h
new file mode 100644
index 000000000000..4d55405d8936
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Symbols.h
@@ -0,0 +1,419 @@
+//===- Symbols.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file defines various types of Symbols.
+#include "InputSection.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Common/Strings.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ELF.h"
+namespace lld {
+namespace elf {
+class Symbol;
+class InputFile;
+} // namespace elf
+std::string toString(const elf::Symbol &);
+std::string toString(const elf::InputFile *);
+namespace elf {
+class ArchiveFile;
+class BitcodeFile;
+class BssSection;
+class InputFile;
+class LazyObjFile;
+template <class ELFT> class ObjFile;
+class OutputSection;
+template <class ELFT> class SharedFile;
+// This is a StringRef-like container that doesn't run strlen().
+// ELF string tables contain a lot of null-terminated strings. Most of them
+// are not necessary for the linker because they are names of local symbols,
+// and the linker doesn't use local symbol names for name resolution. So, we
+// use this class to represents strings read from string tables.
+struct StringRefZ {
+ StringRefZ(const char *S) : Data(S), Size(-1) {}
+ StringRefZ(StringRef S) : Data(S.data()), Size(S.size()) {}
+ const char *Data;
+ const uint32_t Size;
+// The base class for real symbol classes.
+class Symbol {
+ enum Kind {
+ PlaceholderKind,
+ DefinedKind,
+ SharedKind,
+ UndefinedKind,
+ LazyArchiveKind,
+ LazyObjectKind,
+ };
+ Kind kind() const { return static_cast<Kind>(SymbolKind); }
+ // The file from which this symbol was created.
+ InputFile *File;
+ const char *NameData;
+ mutable uint32_t NameSize;
+ uint32_t DynsymIndex = 0;
+ uint32_t GotIndex = -1;
+ uint32_t PltIndex = -1;
+ uint32_t GlobalDynIndex = -1;
+ // This field is a index to the symbol's version definition.
+ uint32_t VerdefIndex = -1;
+ // Version definition index.
+ uint16_t VersionId;
+ // An index into the .branch_lt section on PPC64.
+ uint16_t PPC64BranchltIndex = -1;
+ // Symbol binding. This is not overwritten by replaceSymbol to track
+ // changes during resolution. In particular:
+ // - An undefined weak is still weak when it resolves to a shared library.
+ // - An undefined weak will not fetch archive members, but we have to
+ // remember it is weak.
+ uint8_t Binding;
+ // The following fields have the same meaning as the ELF symbol attributes.
+ uint8_t Type; // symbol type
+ uint8_t StOther; // st_other field value
+ uint8_t SymbolKind;
+ // Symbol visibility. This is the computed minimum visibility of all
+ // observed non-DSO symbols.
+ unsigned Visibility : 2;
+ // True if the symbol was used for linking and thus need to be added to the
+ // output file's symbol table. This is true for all symbols except for
+ // unreferenced DSO symbols and bitcode symbols that are unreferenced except
+ // by other bitcode objects.
+ unsigned IsUsedInRegularObj : 1;
+ // If this flag is true and the symbol has protected or default visibility, it
+ // will appear in .dynsym. This flag is set by interposable DSO symbols in
+ // executables, by most symbols in DSOs and executables built with
+ // --export-dynamic, and by dynamic lists.
+ unsigned ExportDynamic : 1;
+ // False if LTO shouldn't inline whatever this symbol points to. If a symbol
+ // is overwritten after LTO, LTO shouldn't inline the symbol because it
+ // doesn't know the final contents of the symbol.
+ unsigned CanInline : 1;
+ // True if this symbol is specified by --trace-symbol option.
+ unsigned Traced : 1;
+ bool includeInDynsym() const;
+ uint8_t computeBinding() const;
+ bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; }
+ bool isUndefined() const { return SymbolKind == UndefinedKind; }
+ bool isDefined() const { return SymbolKind == DefinedKind; }
+ bool isShared() const { return SymbolKind == SharedKind; }
+ bool isLocal() const { return Binding == llvm::ELF::STB_LOCAL; }
+ bool isLazy() const {
+ return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind;
+ }
+ // True if this is an undefined weak symbol. This only works once
+ // all input files have been added.
+ bool isUndefWeak() const {
+ // See comment on lazy symbols for details.
+ return isWeak() && (isUndefined() || isLazy());
+ }
+ StringRef getName() const {
+ if (NameSize == (uint32_t)-1)
+ NameSize = strlen(NameData);
+ return {NameData, NameSize};
+ }
+ void setName(StringRef S) {
+ NameData = S.data();
+ NameSize = S.size();
+ }
+ void parseSymbolVersion();
+ bool isInGot() const { return GotIndex != -1U; }
+ bool isInPlt() const { return PltIndex != -1U; }
+ bool isInPPC64Branchlt() const { return PPC64BranchltIndex != 0xffff; }
+ uint64_t getVA(int64_t Addend = 0) const;
+ uint64_t getGotOffset() const;
+ uint64_t getGotVA() const;
+ uint64_t getGotPltOffset() const;
+ uint64_t getGotPltVA() const;
+ uint64_t getPltVA() const;
+ uint64_t getPPC64LongBranchTableVA() const;
+ uint64_t getPPC64LongBranchOffset() const;
+ uint64_t getSize() const;
+ OutputSection *getOutputSection() const;
+ Symbol(Kind K, InputFile *File, StringRefZ Name, uint8_t Binding,
+ uint8_t StOther, uint8_t Type)
+ : File(File), NameData(Name.Data), NameSize(Name.Size), Binding(Binding),
+ Type(Type), StOther(StOther), SymbolKind(K), NeedsPltAddr(false),
+ IsInIplt(false), IsInIgot(false), IsPreemptible(false),
+ Used(!Config->GcSections), NeedsTocRestore(false),
+ ScriptDefined(false) {}
+ // True the symbol should point to its PLT entry.
+ // For SharedSymbol only.
+ unsigned NeedsPltAddr : 1;
+ // True if this symbol is in the Iplt sub-section of the Plt.
+ unsigned IsInIplt : 1;
+ // True if this symbol is in the Igot sub-section of the .got.plt or .got.
+ unsigned IsInIgot : 1;
+ // True if this symbol is preemptible at load time.
+ unsigned IsPreemptible : 1;
+ // True if an undefined or shared symbol is used from a live section.
+ unsigned Used : 1;
+ // True if a call to this symbol needs to be followed by a restore of the
+ // PPC64 toc pointer.
+ unsigned NeedsTocRestore : 1;
+ // True if this symbol is defined by a linker script.
+ unsigned ScriptDefined : 1;
+ bool isSection() const { return Type == llvm::ELF::STT_SECTION; }
+ bool isTls() const { return Type == llvm::ELF::STT_TLS; }
+ bool isFunc() const { return Type == llvm::ELF::STT_FUNC; }
+ bool isGnuIFunc() const { return Type == llvm::ELF::STT_GNU_IFUNC; }
+ bool isObject() const { return Type == llvm::ELF::STT_OBJECT; }
+ bool isFile() const { return Type == llvm::ELF::STT_FILE; }
+// Represents a symbol that is defined in the current output file.
+class Defined : public Symbol {
+ Defined(InputFile *File, StringRefZ Name, uint8_t Binding, uint8_t StOther,
+ uint8_t Type, uint64_t Value, uint64_t Size, SectionBase *Section)
+ : Symbol(DefinedKind, File, Name, Binding, StOther, Type), Value(Value),
+ Size(Size), Section(Section) {}
+ static bool classof(const Symbol *S) { return S->isDefined(); }
+ uint64_t Value;
+ uint64_t Size;
+ SectionBase *Section;
+class Undefined : public Symbol {
+ Undefined(InputFile *File, StringRefZ Name, uint8_t Binding, uint8_t StOther,
+ uint8_t Type)
+ : Symbol(UndefinedKind, File, Name, Binding, StOther, Type) {}
+ static bool classof(const Symbol *S) { return S->kind() == UndefinedKind; }
+class SharedSymbol : public Symbol {
+ static bool classof(const Symbol *S) { return S->kind() == SharedKind; }
+ SharedSymbol(InputFile &File, StringRef Name, uint8_t Binding,
+ uint8_t StOther, uint8_t Type, uint64_t Value, uint64_t Size,
+ uint32_t Alignment, uint32_t VerdefIndex)
+ : Symbol(SharedKind, &File, Name, Binding, StOther, Type),
+ Alignment(Alignment), Value(Value), Size(Size) {
+ this->VerdefIndex = VerdefIndex;
+ // GNU ifunc is a mechanism to allow user-supplied functions to
+ // resolve PLT slot values at load-time. This is contrary to the
+ // regular symbol resolution scheme in which symbols are resolved just
+ // by name. Using this hook, you can program how symbols are solved
+ // for you program. For example, you can make "memcpy" to be resolved
+ // to a SSE-enabled version of memcpy only when a machine running the
+ // program supports the SSE instruction set.
+ //
+ // Naturally, such symbols should always be called through their PLT
+ // slots. What GNU ifunc symbols point to are resolver functions, and
+ // calling them directly doesn't make sense (unless you are writing a
+ // loader).
+ //
+ // For DSO symbols, we always call them through PLT slots anyway.
+ // So there's no difference between GNU ifunc and regular function
+ // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
+ if (this->Type == llvm::ELF::STT_GNU_IFUNC)
+ this->Type = llvm::ELF::STT_FUNC;
+ }
+ template <class ELFT> SharedFile<ELFT> &getFile() const {
+ return *cast<SharedFile<ELFT>>(File);
+ }
+ uint32_t Alignment;
+ uint64_t Value; // st_value
+ uint64_t Size; // st_size
+// LazyArchive and LazyObject represent a symbols that is not yet in the link,
+// but we know where to find it if needed. If the resolver finds both Undefined
+// and Lazy for the same name, it will ask the Lazy to load a file.
+// A special complication is the handling of weak undefined symbols. They should
+// not load a file, but we have to remember we have seen both the weak undefined
+// and the lazy. We represent that with a lazy symbol with a weak binding. This
+// means that code looking for undefined symbols normally also has to take lazy
+// symbols into consideration.
+// This class represents a symbol defined in an archive file. It is
+// created from an archive file header, and it knows how to load an
+// object file from an archive to replace itself with a defined
+// symbol.
+class LazyArchive : public Symbol {
+ LazyArchive(InputFile &File, uint8_t Type,
+ const llvm::object::Archive::Symbol S)
+ : Symbol(LazyArchiveKind, &File, S.getName(), llvm::ELF::STB_GLOBAL,
+ llvm::ELF::STV_DEFAULT, Type),
+ Sym(S) {}
+ static bool classof(const Symbol *S) { return S->kind() == LazyArchiveKind; }
+ InputFile *fetch();
+ MemoryBufferRef getMemberBuffer();
+ const llvm::object::Archive::Symbol Sym;
+// LazyObject symbols represents symbols in object files between
+// --start-lib and --end-lib options.
+class LazyObject : public Symbol {
+ LazyObject(InputFile &File, uint8_t Type, StringRef Name)
+ : Symbol(LazyObjectKind, &File, Name, llvm::ELF::STB_GLOBAL,
+ llvm::ELF::STV_DEFAULT, Type) {}
+ static bool classof(const Symbol *S) { return S->kind() == LazyObjectKind; }
+// Some linker-generated symbols need to be created as
+// Defined symbols.
+struct ElfSym {
+ // __bss_start
+ static Defined *Bss;
+ // etext and _etext
+ static Defined *Etext1;
+ static Defined *Etext2;
+ // edata and _edata
+ static Defined *Edata1;
+ static Defined *Edata2;
+ // end and _end
+ static Defined *End1;
+ static Defined *End2;
+ // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
+ // be at some offset from the base of the .got section, usually 0 or
+ // the end of the .got.
+ static Defined *GlobalOffsetTable;
+ // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
+ static Defined *MipsGp;
+ static Defined *MipsGpDisp;
+ static Defined *MipsLocalGp;
+ // __rel{,a}_iplt_{start,end} symbols.
+ static Defined *RelaIpltStart;
+ static Defined *RelaIpltEnd;
+// A buffer class that is large enough to hold any Symbol-derived
+// object. We allocate memory using this class and instantiate a symbol
+// using the placement new.
+union SymbolUnion {
+ alignas(Defined) char A[sizeof(Defined)];
+ alignas(Undefined) char C[sizeof(Undefined)];
+ alignas(SharedSymbol) char D[sizeof(SharedSymbol)];
+ alignas(LazyArchive) char E[sizeof(LazyArchive)];
+ alignas(LazyObject) char F[sizeof(LazyObject)];
+void printTraceSymbol(Symbol *Sym);
+template <typename T, typename... ArgT>
+void replaceSymbol(Symbol *S, ArgT &&... Arg) {
+ using llvm::ELF::STT_TLS;
+ static_assert(std::is_trivially_destructible<T>(),
+ "Symbol types must be trivially destructible");
+ static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
+ static_assert(alignof(T) <= alignof(SymbolUnion),
+ "SymbolUnion not aligned enough");
+ assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
+ "Not a Symbol");
+ Symbol Sym = *S;
+ new (S) T(std::forward<ArgT>(Arg)...);
+ S->VersionId = Sym.VersionId;
+ S->Visibility = Sym.Visibility;
+ S->IsUsedInRegularObj = Sym.IsUsedInRegularObj;
+ S->ExportDynamic = Sym.ExportDynamic;
+ S->CanInline = Sym.CanInline;
+ S->Traced = Sym.Traced;
+ S->ScriptDefined = Sym.ScriptDefined;
+ // Symbols representing thread-local variables must be referenced by
+ // TLS-aware relocations, and non-TLS symbols must be reference by
+ // non-TLS relocations, so there's a clear distinction between TLS
+ // and non-TLS symbols. It is an error if the same symbol is defined
+ // as a TLS symbol in one file and as a non-TLS symbol in other file.
+ bool TlsMismatch = (Sym.Type == STT_TLS && S->Type != STT_TLS) ||
+ (Sym.Type != STT_TLS && S->Type == STT_TLS);
+ if (Sym.SymbolKind != Symbol::PlaceholderKind && TlsMismatch && !Sym.isLazy())
+ error("TLS attribute mismatch: " + toString(Sym) + "\n>>> defined in " +
+ toString(Sym.File) + "\n>>> defined in " + toString(S->File));
+ // Print out a log message if --trace-symbol was specified.
+ // This is for debugging.
+ if (S->Traced)
+ printTraceSymbol(S);
+void maybeWarnUnorderableSymbol(const Symbol *Sym);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp
new file mode 100644
index 000000000000..b1a3f8bc70ae
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp
@@ -0,0 +1,3226 @@
+//===- SyntheticSections.cpp ----------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains linker-synthesized sections. Currently,
+// synthetic sections are created either output sections or input sections,
+// but we are rewriting code so that all synthetic sections are created as
+// input sections.
+#include "SyntheticSections.h"
+#include "Bits.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "Writer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/Threads.h"
+#include "lld/Common/Version.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/xxhash.h"
+#include <cstdlib>
+#include <thread>
+using namespace llvm;
+using namespace llvm::dwarf;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support;
+using namespace lld;
+using namespace lld::elf;
+using llvm::support::endian::read32le;
+using llvm::support::endian::write32le;
+using llvm::support::endian::write64le;
+constexpr size_t MergeNoTailSection::NumShards;
+// Returns an LLD version string.
+static ArrayRef<uint8_t> getVersion() {
+ // Check LLD_VERSION first for ease of testing.
+ // You can get consistent output by using the environment variable.
+ // This is only for testing.
+ StringRef S = getenv("LLD_VERSION");
+ if (S.empty())
+ S = Saver.save(Twine("Linker: ") + getLLDVersion());
+ // +1 to include the terminating '\0'.
+ return {(const uint8_t *)S.data(), S.size() + 1};
+// Creates a .comment section containing LLD version info.
+// With this feature, you can identify LLD-generated binaries easily
+// by "readelf --string-dump .comment <file>".
+// The returned object is a mergeable string section.
+MergeInputSection *elf::createCommentSection() {
+ return make<MergeInputSection>(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1,
+ getVersion(), ".comment");
+// .MIPS.abiflags section.
+template <class ELFT>
+MipsAbiFlagsSection<ELFT>::MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags)
+ : SyntheticSection(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"),
+ Flags(Flags) {
+ this->Entsize = sizeof(Elf_Mips_ABIFlags);
+template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *Buf) {
+ memcpy(Buf, &Flags, sizeof(Flags));
+template <class ELFT>
+MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() {
+ Elf_Mips_ABIFlags Flags = {};
+ bool Create = false;
+ for (InputSectionBase *Sec : InputSections) {
+ if (Sec->Type != SHT_MIPS_ABIFLAGS)
+ continue;
+ Sec->Live = false;
+ Create = true;
+ std::string Filename = toString(Sec->File);
+ const size_t Size = Sec->data().size();
+ // Older version of BFD (such as the default FreeBSD linker) concatenate
+ // .MIPS.abiflags instead of merging. To allow for this case (or potential
+ // zero padding) we ignore everything after the first Elf_Mips_ABIFlags
+ if (Size < sizeof(Elf_Mips_ABIFlags)) {
+ error(Filename + ": invalid size of .MIPS.abiflags section: got " +
+ Twine(Size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags)));
+ return nullptr;
+ }
+ auto *S = reinterpret_cast<const Elf_Mips_ABIFlags *>(Sec->data().data());
+ if (S->version != 0) {
+ error(Filename + ": unexpected .MIPS.abiflags version " +
+ Twine(S->version));
+ return nullptr;
+ }
+ // LLD checks ISA compatibility in calcMipsEFlags(). Here we just
+ // select the highest number of ISA/Rev/Ext.
+ Flags.isa_level = std::max(Flags.isa_level, S->isa_level);
+ Flags.isa_rev = std::max(Flags.isa_rev, S->isa_rev);
+ Flags.isa_ext = std::max(Flags.isa_ext, S->isa_ext);
+ Flags.gpr_size = std::max(Flags.gpr_size, S->gpr_size);
+ Flags.cpr1_size = std::max(Flags.cpr1_size, S->cpr1_size);
+ Flags.cpr2_size = std::max(Flags.cpr2_size, S->cpr2_size);
+ Flags.ases |= S->ases;
+ Flags.flags1 |= S->flags1;
+ Flags.flags2 |= S->flags2;
+ Flags.fp_abi = elf::getMipsFpAbiFlag(Flags.fp_abi, S->fp_abi, Filename);
+ };
+ if (Create)
+ return make<MipsAbiFlagsSection<ELFT>>(Flags);
+ return nullptr;
+// .MIPS.options section.
+template <class ELFT>
+MipsOptionsSection<ELFT>::MipsOptionsSection(Elf_Mips_RegInfo Reginfo)
+ : SyntheticSection(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"),
+ Reginfo(Reginfo) {
+ this->Entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo);
+template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *Buf) {
+ auto *Options = reinterpret_cast<Elf_Mips_Options *>(Buf);
+ Options->kind = ODK_REGINFO;
+ Options->size = getSize();
+ if (!Config->Relocatable)
+ Reginfo.ri_gp_value = In.MipsGot->getGp();
+ memcpy(Buf + sizeof(Elf_Mips_Options), &Reginfo, sizeof(Reginfo));
+template <class ELFT>
+MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() {
+ // N64 ABI only.
+ if (!ELFT::Is64Bits)
+ return nullptr;
+ std::vector<InputSectionBase *> Sections;
+ for (InputSectionBase *Sec : InputSections)
+ if (Sec->Type == SHT_MIPS_OPTIONS)
+ Sections.push_back(Sec);
+ if (Sections.empty())
+ return nullptr;
+ Elf_Mips_RegInfo Reginfo = {};
+ for (InputSectionBase *Sec : Sections) {
+ Sec->Live = false;
+ std::string Filename = toString(Sec->File);
+ ArrayRef<uint8_t> D = Sec->data();
+ while (!D.empty()) {
+ if (D.size() < sizeof(Elf_Mips_Options)) {
+ error(Filename + ": invalid size of .MIPS.options section");
+ break;
+ }
+ auto *Opt = reinterpret_cast<const Elf_Mips_Options *>(D.data());
+ if (Opt->kind == ODK_REGINFO) {
+ Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask;
+ Sec->getFile<ELFT>()->MipsGp0 = Opt->getRegInfo().ri_gp_value;
+ break;
+ }
+ if (!Opt->size)
+ fatal(Filename + ": zero option descriptor size");
+ D = D.slice(Opt->size);
+ }
+ };
+ return make<MipsOptionsSection<ELFT>>(Reginfo);
+// MIPS .reginfo section.
+template <class ELFT>
+MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo Reginfo)
+ : SyntheticSection(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"),
+ Reginfo(Reginfo) {
+ this->Entsize = sizeof(Elf_Mips_RegInfo);
+template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *Buf) {
+ if (!Config->Relocatable)
+ Reginfo.ri_gp_value = In.MipsGot->getGp();
+ memcpy(Buf, &Reginfo, sizeof(Reginfo));
+template <class ELFT>
+MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() {
+ // Section should be alive for O32 and N32 ABIs only.
+ if (ELFT::Is64Bits)
+ return nullptr;
+ std::vector<InputSectionBase *> Sections;
+ for (InputSectionBase *Sec : InputSections)
+ if (Sec->Type == SHT_MIPS_REGINFO)
+ Sections.push_back(Sec);
+ if (Sections.empty())
+ return nullptr;
+ Elf_Mips_RegInfo Reginfo = {};
+ for (InputSectionBase *Sec : Sections) {
+ Sec->Live = false;
+ if (Sec->data().size() != sizeof(Elf_Mips_RegInfo)) {
+ error(toString(Sec->File) + ": invalid size of .reginfo section");
+ return nullptr;
+ }
+ auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->data().data());
+ Reginfo.ri_gprmask |= R->ri_gprmask;
+ Sec->getFile<ELFT>()->MipsGp0 = R->ri_gp_value;
+ };
+ return make<MipsReginfoSection<ELFT>>(Reginfo);
+InputSection *elf::createInterpSection() {
+ // StringSaver guarantees that the returned string ends with '\0'.
+ StringRef S = Saver.save(Config->DynamicLinker);
+ ArrayRef<uint8_t> Contents = {(const uint8_t *)S.data(), S.size() + 1};
+ auto *Sec = make<InputSection>(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, Contents,
+ ".interp");
+ Sec->Live = true;
+ return Sec;
+Defined *elf::addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value,
+ uint64_t Size, InputSectionBase &Section) {
+ auto *S = make<Defined>(Section.File, Name, STB_LOCAL, STV_DEFAULT, Type,
+ Value, Size, &Section);
+ if (In.SymTab)
+ In.SymTab->addSymbol(S);
+ return S;
+static size_t getHashSize() {
+ switch (Config->BuildId) {
+ case BuildIdKind::Fast:
+ return 8;
+ case BuildIdKind::Md5:
+ case BuildIdKind::Uuid:
+ return 16;
+ case BuildIdKind::Sha1:
+ return 20;
+ case BuildIdKind::Hexstring:
+ return Config->BuildIdVector.size();
+ default:
+ llvm_unreachable("unknown BuildIdKind");
+ }
+ : SyntheticSection(SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"),
+ HashSize(getHashSize()) {}
+void BuildIdSection::writeTo(uint8_t *Buf) {
+ write32(Buf, 4); // Name size
+ write32(Buf + 4, HashSize); // Content size
+ write32(Buf + 8, NT_GNU_BUILD_ID); // Type
+ memcpy(Buf + 12, "GNU", 4); // Name string
+ HashBuf = Buf + 16;
+// Split one uint8 array into small pieces of uint8 arrays.
+static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr,
+ size_t ChunkSize) {
+ std::vector<ArrayRef<uint8_t>> Ret;
+ while (Arr.size() > ChunkSize) {
+ Ret.push_back(Arr.take_front(ChunkSize));
+ Arr = Arr.drop_front(ChunkSize);
+ }
+ if (!Arr.empty())
+ Ret.push_back(Arr);
+ return Ret;
+// Computes a hash value of Data using a given hash function.
+// In order to utilize multiple cores, we first split data into 1MB
+// chunks, compute a hash for each chunk, and then compute a hash value
+// of the hash values.
+void BuildIdSection::computeHash(
+ llvm::ArrayRef<uint8_t> Data,
+ std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) {
+ std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024);
+ std::vector<uint8_t> Hashes(Chunks.size() * HashSize);
+ // Compute hash values.
+ parallelForEachN(0, Chunks.size(), [&](size_t I) {
+ HashFn(Hashes.data() + I * HashSize, Chunks[I]);
+ });
+ // Write to the final output buffer.
+ HashFn(HashBuf, Hashes);
+BssSection::BssSection(StringRef Name, uint64_t Size, uint32_t Alignment)
+ : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, Alignment, Name) {
+ this->Bss = true;
+ this->Size = Size;
+void BuildIdSection::writeBuildId(ArrayRef<uint8_t> Buf) {
+ switch (Config->BuildId) {
+ case BuildIdKind::Fast:
+ computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
+ write64le(Dest, xxHash64(Arr));
+ });
+ break;
+ case BuildIdKind::Md5:
+ computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
+ memcpy(Dest, MD5::hash(Arr).data(), 16);
+ });
+ break;
+ case BuildIdKind::Sha1:
+ computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) {
+ memcpy(Dest, SHA1::hash(Arr).data(), 20);
+ });
+ break;
+ case BuildIdKind::Uuid:
+ if (auto EC = getRandomBytes(HashBuf, HashSize))
+ error("entropy source failure: " + EC.message());
+ break;
+ case BuildIdKind::Hexstring:
+ memcpy(HashBuf, Config->BuildIdVector.data(), Config->BuildIdVector.size());
+ break;
+ default:
+ llvm_unreachable("unknown BuildIdKind");
+ }
+ : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {}
+// Search for an existing CIE record or create a new one.
+// CIE records from input object files are uniquified by their contents
+// and where their relocations point to.
+template <class ELFT, class RelTy>
+CieRecord *EhFrameSection::addCie(EhSectionPiece &Cie, ArrayRef<RelTy> Rels) {
+ Symbol *Personality = nullptr;
+ unsigned FirstRelI = Cie.FirstRelocation;
+ if (FirstRelI != (unsigned)-1)
+ Personality =
+ &Cie.Sec->template getFile<ELFT>()->getRelocTargetSym(Rels[FirstRelI]);
+ // Search for an existing CIE by CIE contents/relocation target pair.
+ CieRecord *&Rec = CieMap[{Cie.data(), Personality}];
+ // If not found, create a new one.
+ if (!Rec) {
+ Rec = make<CieRecord>();
+ Rec->Cie = &Cie;
+ CieRecords.push_back(Rec);
+ }
+ return Rec;
+// There is one FDE per function. Returns true if a given FDE
+// points to a live function.
+template <class ELFT, class RelTy>
+bool EhFrameSection::isFdeLive(EhSectionPiece &Fde, ArrayRef<RelTy> Rels) {
+ auto *Sec = cast<EhInputSection>(Fde.Sec);
+ unsigned FirstRelI = Fde.FirstRelocation;
+ // An FDE should point to some function because FDEs are to describe
+ // functions. That's however not always the case due to an issue of
+ // ld.gold with -r. ld.gold may discard only functions and leave their
+ // corresponding FDEs, which results in creating bad .eh_frame sections.
+ // To deal with that, we ignore such FDEs.
+ if (FirstRelI == (unsigned)-1)
+ return false;
+ const RelTy &Rel = Rels[FirstRelI];
+ Symbol &B = Sec->template getFile<ELFT>()->getRelocTargetSym(Rel);
+ // FDEs for garbage-collected or merged-by-ICF sections are dead.
+ if (auto *D = dyn_cast<Defined>(&B))
+ if (SectionBase *Sec = D->Section)
+ return Sec->Live;
+ return false;
+// .eh_frame is a sequence of CIE or FDE records. In general, there
+// is one CIE record per input object file which is followed by
+// a list of FDEs. This function searches an existing CIE or create a new
+// one and associates FDEs to the CIE.
+template <class ELFT, class RelTy>
+void EhFrameSection::addSectionAux(EhInputSection *Sec, ArrayRef<RelTy> Rels) {
+ OffsetToCie.clear();
+ for (EhSectionPiece &Piece : Sec->Pieces) {
+ // The empty record is the end marker.
+ if (Piece.Size == 4)
+ return;
+ size_t Offset = Piece.InputOff;
+ uint32_t ID = read32(Piece.data().data() + 4);
+ if (ID == 0) {
+ OffsetToCie[Offset] = addCie<ELFT>(Piece, Rels);
+ continue;
+ }
+ uint32_t CieOffset = Offset + 4 - ID;
+ CieRecord *Rec = OffsetToCie[CieOffset];
+ if (!Rec)
+ fatal(toString(Sec) + ": invalid CIE reference");
+ if (!isFdeLive<ELFT>(Piece, Rels))
+ continue;
+ Rec->Fdes.push_back(&Piece);
+ NumFdes++;
+ }
+template <class ELFT> void EhFrameSection::addSection(InputSectionBase *C) {
+ auto *Sec = cast<EhInputSection>(C);
+ Sec->Parent = this;
+ Alignment = std::max(Alignment, Sec->Alignment);
+ Sections.push_back(Sec);
+ for (auto *DS : Sec->DependentSections)
+ DependentSections.push_back(DS);
+ if (Sec->Pieces.empty())
+ return;
+ if (Sec->AreRelocsRela)
+ addSectionAux<ELFT>(Sec, Sec->template relas<ELFT>());
+ else
+ addSectionAux<ELFT>(Sec, Sec->template rels<ELFT>());
+static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) {
+ memcpy(Buf, D.data(), D.size());
+ size_t Aligned = alignTo(D.size(), Config->Wordsize);
+ // Zero-clear trailing padding if it exists.
+ memset(Buf + D.size(), 0, Aligned - D.size());
+ // Fix the size field. -4 since size does not include the size field itself.
+ write32(Buf, Aligned - 4);
+void EhFrameSection::finalizeContents() {
+ assert(!this->Size); // Not finalized.
+ size_t Off = 0;
+ for (CieRecord *Rec : CieRecords) {
+ Rec->Cie->OutputOff = Off;
+ Off += alignTo(Rec->Cie->Size, Config->Wordsize);
+ for (EhSectionPiece *Fde : Rec->Fdes) {
+ Fde->OutputOff = Off;
+ Off += alignTo(Fde->Size, Config->Wordsize);
+ }
+ }
+ // The LSB standard does not allow a .eh_frame section with zero
+ // Call Frame Information records. glibc unwind-dw2-fde.c
+ // classify_object_over_fdes expects there is a CIE record length 0 as a
+ // terminator. Thus we add one unconditionally.
+ Off += 4;
+ this->Size = Off;
+// Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table
+// to get an FDE from an address to which FDE is applied. This function
+// returns a list of such pairs.
+std::vector<EhFrameSection::FdeData> EhFrameSection::getFdeData() const {
+ uint8_t *Buf = getParent()->Loc + OutSecOff;
+ std::vector<FdeData> Ret;
+ uint64_t VA = In.EhFrameHdr->getVA();
+ for (CieRecord *Rec : CieRecords) {
+ uint8_t Enc = getFdeEncoding(Rec->Cie);
+ for (EhSectionPiece *Fde : Rec->Fdes) {
+ uint64_t Pc = getFdePc(Buf, Fde->OutputOff, Enc);
+ uint64_t FdeVA = getParent()->Addr + Fde->OutputOff;
+ if (!isInt<32>(Pc - VA))
+ fatal(toString(Fde->Sec) + ": PC offset is too large: 0x" +
+ Twine::utohexstr(Pc - VA));
+ Ret.push_back({uint32_t(Pc - VA), uint32_t(FdeVA - VA)});
+ }
+ }
+ // Sort the FDE list by their PC and uniqueify. Usually there is only
+ // one FDE for a PC (i.e. function), but if ICF merges two functions
+ // into one, there can be more than one FDEs pointing to the address.
+ auto Less = [](const FdeData &A, const FdeData &B) {
+ return A.PcRel < B.PcRel;
+ };
+ std::stable_sort(Ret.begin(), Ret.end(), Less);
+ auto Eq = [](const FdeData &A, const FdeData &B) {
+ return A.PcRel == B.PcRel;
+ };
+ Ret.erase(std::unique(Ret.begin(), Ret.end(), Eq), Ret.end());
+ return Ret;
+static uint64_t readFdeAddr(uint8_t *Buf, int Size) {
+ switch (Size) {
+ case DW_EH_PE_udata2:
+ return read16(Buf);
+ case DW_EH_PE_sdata2:
+ return (int16_t)read16(Buf);
+ case DW_EH_PE_udata4:
+ return read32(Buf);
+ case DW_EH_PE_sdata4:
+ return (int32_t)read32(Buf);
+ case DW_EH_PE_udata8:
+ case DW_EH_PE_sdata8:
+ return read64(Buf);
+ case DW_EH_PE_absptr:
+ return readUint(Buf);
+ }
+ fatal("unknown FDE size encoding");
+// Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to.
+// We need it to create .eh_frame_hdr section.
+uint64_t EhFrameSection::getFdePc(uint8_t *Buf, size_t FdeOff,
+ uint8_t Enc) const {
+ // The starting address to which this FDE applies is
+ // stored at FDE + 8 byte.
+ size_t Off = FdeOff + 8;
+ uint64_t Addr = readFdeAddr(Buf + Off, Enc & 0xf);
+ if ((Enc & 0x70) == DW_EH_PE_absptr)
+ return Addr;
+ if ((Enc & 0x70) == DW_EH_PE_pcrel)
+ return Addr + getParent()->Addr + Off;
+ fatal("unknown FDE size relative encoding");
+void EhFrameSection::writeTo(uint8_t *Buf) {
+ // Write CIE and FDE records.
+ for (CieRecord *Rec : CieRecords) {
+ size_t CieOffset = Rec->Cie->OutputOff;
+ writeCieFde(Buf + CieOffset, Rec->Cie->data());
+ for (EhSectionPiece *Fde : Rec->Fdes) {
+ size_t Off = Fde->OutputOff;
+ writeCieFde(Buf + Off, Fde->data());
+ // FDE's second word should have the offset to an associated CIE.
+ // Write it.
+ write32(Buf + Off + 4, Off + 4 - CieOffset);
+ }
+ }
+ // Apply relocations. .eh_frame section contents are not contiguous
+ // in the output buffer, but relocateAlloc() still works because
+ // getOffset() takes care of discontiguous section pieces.
+ for (EhInputSection *S : Sections)
+ S->relocateAlloc(Buf, nullptr);
+ Target->GotEntrySize, ".got") {
+ // PPC64 saves the ElfSym::GlobalOffsetTable .TOC. as the first entry in the
+ // .got. If there are no references to .TOC. in the symbol table,
+ // ElfSym::GlobalOffsetTable will not be defined and we won't need to save
+ // .TOC. in the .got. When it is defined, we increase NumEntries by the number
+ // of entries used to emit ElfSym::GlobalOffsetTable.
+ if (ElfSym::GlobalOffsetTable && !Target->GotBaseSymInGotPlt)
+ NumEntries += Target->GotHeaderEntriesNum;
+void GotSection::addEntry(Symbol &Sym) {
+ Sym.GotIndex = NumEntries;
+ ++NumEntries;
+bool GotSection::addDynTlsEntry(Symbol &Sym) {
+ if (Sym.GlobalDynIndex != -1U)
+ return false;
+ Sym.GlobalDynIndex = NumEntries;
+ // Global Dynamic TLS entries take two GOT slots.
+ NumEntries += 2;
+ return true;
+// Reserves TLS entries for a TLS module ID and a TLS block offset.
+// In total it takes two GOT slots.
+bool GotSection::addTlsIndex() {
+ if (TlsIndexOff != uint32_t(-1))
+ return false;
+ TlsIndexOff = NumEntries * Config->Wordsize;
+ NumEntries += 2;
+ return true;
+uint64_t GotSection::getGlobalDynAddr(const Symbol &B) const {
+ return this->getVA() + B.GlobalDynIndex * Config->Wordsize;
+uint64_t GotSection::getGlobalDynOffset(const Symbol &B) const {
+ return B.GlobalDynIndex * Config->Wordsize;
+void GotSection::finalizeContents() {
+ Size = NumEntries * Config->Wordsize;
+bool GotSection::empty() const {
+ // We need to emit a GOT even if it's empty if there's a relocation that is
+ // relative to GOT(such as GOTOFFREL) or there's a symbol that points to a GOT
+ // (i.e. _GLOBAL_OFFSET_TABLE_) that the target defines relative to the .got.
+ return NumEntries == 0 && !HasGotOffRel &&
+ !(ElfSym::GlobalOffsetTable && !Target->GotBaseSymInGotPlt);
+void GotSection::writeTo(uint8_t *Buf) {
+ // Buf points to the start of this section's buffer,
+ // whereas InputSectionBase::relocateAlloc() expects its argument
+ // to point to the start of the output section.
+ Target->writeGotHeader(Buf);
+ relocateAlloc(Buf - OutSecOff, Buf - OutSecOff + Size);
+static uint64_t getMipsPageAddr(uint64_t Addr) {
+ return (Addr + 0x8000) & ~0xffff;
+static uint64_t getMipsPageCount(uint64_t Size) {
+ return (Size + 0xfffe) / 0xffff + 1;
+ ".got") {}
+void MipsGotSection::addEntry(InputFile &File, Symbol &Sym, int64_t Addend,
+ RelExpr Expr) {
+ FileGot &G = getGot(File);
+ if (Expr == R_MIPS_GOT_LOCAL_PAGE) {
+ if (const OutputSection *OS = Sym.getOutputSection())
+ G.PagesMap.insert({OS, {}});
+ else
+ G.Local16.insert({{nullptr, getMipsPageAddr(Sym.getVA(Addend))}, 0});
+ } else if (Sym.isTls())
+ G.Tls.insert({&Sym, 0});
+ else if (Sym.IsPreemptible && Expr == R_ABS)
+ G.Relocs.insert({&Sym, 0});
+ else if (Sym.IsPreemptible)
+ G.Global.insert({&Sym, 0});
+ else if (Expr == R_MIPS_GOT_OFF32)
+ G.Local32.insert({{&Sym, Addend}, 0});
+ else
+ G.Local16.insert({{&Sym, Addend}, 0});
+void MipsGotSection::addDynTlsEntry(InputFile &File, Symbol &Sym) {
+ getGot(File).DynTlsSymbols.insert({&Sym, 0});
+void MipsGotSection::addTlsIndex(InputFile &File) {
+ getGot(File).DynTlsSymbols.insert({nullptr, 0});
+size_t MipsGotSection::FileGot::getEntriesNum() const {
+ return getPageEntriesNum() + Local16.size() + Global.size() + Relocs.size() +
+ Tls.size() + DynTlsSymbols.size() * 2;
+size_t MipsGotSection::FileGot::getPageEntriesNum() const {
+ size_t Num = 0;
+ for (const std::pair<const OutputSection *, FileGot::PageBlock> &P : PagesMap)
+ Num += P.second.Count;
+ return Num;
+size_t MipsGotSection::FileGot::getIndexedEntriesNum() const {
+ size_t Count = getPageEntriesNum() + Local16.size() + Global.size();
+ // If there are relocation-only entries in the GOT, TLS entries
+ // are allocated after them. TLS entries should be addressable
+ // by 16-bit index so count both reloc-only and TLS entries.
+ if (!Tls.empty() || !DynTlsSymbols.empty())
+ Count += Relocs.size() + Tls.size() + DynTlsSymbols.size() * 2;
+ return Count;
+MipsGotSection::FileGot &MipsGotSection::getGot(InputFile &F) {
+ if (!F.MipsGotIndex.hasValue()) {
+ Gots.emplace_back();
+ Gots.back().File = &F;
+ F.MipsGotIndex = Gots.size() - 1;
+ }
+ return Gots[*F.MipsGotIndex];
+uint64_t MipsGotSection::getPageEntryOffset(const InputFile *F,
+ const Symbol &Sym,
+ int64_t Addend) const {
+ const FileGot &G = Gots[*F->MipsGotIndex];
+ uint64_t Index = 0;
+ if (const OutputSection *OutSec = Sym.getOutputSection()) {
+ uint64_t SecAddr = getMipsPageAddr(OutSec->Addr);
+ uint64_t SymAddr = getMipsPageAddr(Sym.getVA(Addend));
+ Index = G.PagesMap.lookup(OutSec).FirstIndex + (SymAddr - SecAddr) / 0xffff;
+ } else {
+ Index = G.Local16.lookup({nullptr, getMipsPageAddr(Sym.getVA(Addend))});
+ }
+ return Index * Config->Wordsize;
+uint64_t MipsGotSection::getSymEntryOffset(const InputFile *F, const Symbol &S,
+ int64_t Addend) const {
+ const FileGot &G = Gots[*F->MipsGotIndex];
+ Symbol *Sym = const_cast<Symbol *>(&S);
+ if (Sym->isTls())
+ return G.Tls.lookup(Sym) * Config->Wordsize;
+ if (Sym->IsPreemptible)
+ return G.Global.lookup(Sym) * Config->Wordsize;
+ return G.Local16.lookup({Sym, Addend}) * Config->Wordsize;
+uint64_t MipsGotSection::getTlsIndexOffset(const InputFile *F) const {
+ const FileGot &G = Gots[*F->MipsGotIndex];
+ return G.DynTlsSymbols.lookup(nullptr) * Config->Wordsize;
+uint64_t MipsGotSection::getGlobalDynOffset(const InputFile *F,
+ const Symbol &S) const {
+ const FileGot &G = Gots[*F->MipsGotIndex];
+ Symbol *Sym = const_cast<Symbol *>(&S);
+ return G.DynTlsSymbols.lookup(Sym) * Config->Wordsize;
+const Symbol *MipsGotSection::getFirstGlobalEntry() const {
+ if (Gots.empty())
+ return nullptr;
+ const FileGot &PrimGot = Gots.front();
+ if (!PrimGot.Global.empty())
+ return PrimGot.Global.front().first;
+ if (!PrimGot.Relocs.empty())
+ return PrimGot.Relocs.front().first;
+ return nullptr;
+unsigned MipsGotSection::getLocalEntriesNum() const {
+ if (Gots.empty())
+ return HeaderEntriesNum;
+ return HeaderEntriesNum + Gots.front().getPageEntriesNum() +
+ Gots.front().Local16.size();
+bool MipsGotSection::tryMergeGots(FileGot &Dst, FileGot &Src, bool IsPrimary) {
+ FileGot Tmp = Dst;
+ set_union(Tmp.PagesMap, Src.PagesMap);
+ set_union(Tmp.Local16, Src.Local16);
+ set_union(Tmp.Global, Src.Global);
+ set_union(Tmp.Relocs, Src.Relocs);
+ set_union(Tmp.Tls, Src.Tls);
+ set_union(Tmp.DynTlsSymbols, Src.DynTlsSymbols);
+ size_t Count = IsPrimary ? HeaderEntriesNum : 0;
+ Count += Tmp.getIndexedEntriesNum();
+ if (Count * Config->Wordsize > Config->MipsGotSize)
+ return false;
+ std::swap(Tmp, Dst);
+ return true;
+void MipsGotSection::finalizeContents() { updateAllocSize(); }
+bool MipsGotSection::updateAllocSize() {
+ Size = HeaderEntriesNum * Config->Wordsize;
+ for (const FileGot &G : Gots)
+ Size += G.getEntriesNum() * Config->Wordsize;
+ return false;
+template <class ELFT> void MipsGotSection::build() {
+ if (Gots.empty())
+ return;
+ std::vector<FileGot> MergedGots(1);
+ // For each GOT move non-preemptible symbols from the `Global`
+ // to `Local16` list. Preemptible symbol might become non-preemptible
+ // one if, for example, it gets a related copy relocation.
+ for (FileGot &Got : Gots) {
+ for (auto &P: Got.Global)
+ if (!P.first->IsPreemptible)
+ Got.Local16.insert({{P.first, 0}, 0});
+ Got.Global.remove_if([&](const std::pair<Symbol *, size_t> &P) {
+ return !P.first->IsPreemptible;
+ });
+ }
+ // For each GOT remove "reloc-only" entry if there is "global"
+ // entry for the same symbol. And add local entries which indexed
+ // using 32-bit value at the end of 16-bit entries.
+ for (FileGot &Got : Gots) {
+ Got.Relocs.remove_if([&](const std::pair<Symbol *, size_t> &P) {
+ return Got.Global.count(P.first);
+ });
+ set_union(Got.Local16, Got.Local32);
+ Got.Local32.clear();
+ }
+ // Evaluate number of "reloc-only" entries in the resulting GOT.
+ // To do that put all unique "reloc-only" and "global" entries
+ // from all GOTs to the future primary GOT.
+ FileGot *PrimGot = &MergedGots.front();
+ for (FileGot &Got : Gots) {
+ set_union(PrimGot->Relocs, Got.Global);
+ set_union(PrimGot->Relocs, Got.Relocs);
+ Got.Relocs.clear();
+ }
+ // Evaluate number of "page" entries in each GOT.
+ for (FileGot &Got : Gots) {
+ for (std::pair<const OutputSection *, FileGot::PageBlock> &P :
+ Got.PagesMap) {
+ const OutputSection *OS = P.first;
+ uint64_t SecSize = 0;
+ for (BaseCommand *Cmd : OS->SectionCommands) {
+ if (auto *ISD = dyn_cast<InputSectionDescription>(Cmd))
+ for (InputSection *IS : ISD->Sections) {
+ uint64_t Off = alignTo(SecSize, IS->Alignment);
+ SecSize = Off + IS->getSize();
+ }
+ }
+ P.second.Count = getMipsPageCount(SecSize);
+ }
+ }
+ // Merge GOTs. Try to join as much as possible GOTs but do not exceed
+ // maximum GOT size. At first, try to fill the primary GOT because
+ // the primary GOT can be accessed in the most effective way. If it
+ // is not possible, try to fill the last GOT in the list, and finally
+ // create a new GOT if both attempts failed.
+ for (FileGot &SrcGot : Gots) {
+ InputFile *File = SrcGot.File;
+ if (tryMergeGots(MergedGots.front(), SrcGot, true)) {
+ File->MipsGotIndex = 0;
+ } else {
+ // If this is the first time we failed to merge with the primary GOT,
+ // MergedGots.back() will also be the primary GOT. We must make sure not
+ // to try to merge again with IsPrimary=false, as otherwise, if the
+ // inputs are just right, we could allow the primary GOT to become 1 or 2
+ // words too big due to ignoring the header size.
+ if (MergedGots.size() == 1 ||
+ !tryMergeGots(MergedGots.back(), SrcGot, false)) {
+ MergedGots.emplace_back();
+ std::swap(MergedGots.back(), SrcGot);
+ }
+ File->MipsGotIndex = MergedGots.size() - 1;
+ }
+ }
+ std::swap(Gots, MergedGots);
+ // Reduce number of "reloc-only" entries in the primary GOT
+ // by substracting "global" entries exist in the primary GOT.
+ PrimGot = &Gots.front();
+ PrimGot->Relocs.remove_if([&](const std::pair<Symbol *, size_t> &P) {
+ return PrimGot->Global.count(P.first);
+ });
+ // Calculate indexes for each GOT entry.
+ size_t Index = HeaderEntriesNum;
+ for (FileGot &Got : Gots) {
+ Got.StartIndex = &Got == PrimGot ? 0 : Index;
+ for (std::pair<const OutputSection *, FileGot::PageBlock> &P :
+ Got.PagesMap) {
+ // For each output section referenced by GOT page relocations calculate
+ // and save into PagesMap an upper bound of MIPS GOT entries required
+ // to store page addresses of local symbols. We assume the worst case -
+ // each 64kb page of the output section has at least one GOT relocation
+ // against it. And take in account the case when the section intersects
+ // page boundaries.
+ P.second.FirstIndex = Index;
+ Index += P.second.Count;
+ }
+ for (auto &P: Got.Local16)
+ P.second = Index++;
+ for (auto &P: Got.Global)
+ P.second = Index++;
+ for (auto &P: Got.Relocs)
+ P.second = Index++;
+ for (auto &P: Got.Tls)
+ P.second = Index++;
+ for (auto &P: Got.DynTlsSymbols) {
+ P.second = Index;
+ Index += 2;
+ }
+ }
+ // Update Symbol::GotIndex field to use this
+ // value later in the `sortMipsSymbols` function.
+ for (auto &P : PrimGot->Global)
+ P.first->GotIndex = P.second;
+ for (auto &P : PrimGot->Relocs)
+ P.first->GotIndex = P.second;
+ // Create dynamic relocations.
+ for (FileGot &Got : Gots) {
+ // Create dynamic relocations for TLS entries.
+ for (std::pair<Symbol *, size_t> &P : Got.Tls) {
+ Symbol *S = P.first;
+ uint64_t Offset = P.second * Config->Wordsize;
+ if (S->IsPreemptible)
+ In.RelaDyn->addReloc(Target->TlsGotRel, this, Offset, S);
+ }
+ for (std::pair<Symbol *, size_t> &P : Got.DynTlsSymbols) {
+ Symbol *S = P.first;
+ uint64_t Offset = P.second * Config->Wordsize;
+ if (S == nullptr) {
+ if (!Config->Pic)
+ continue;
+ In.RelaDyn->addReloc(Target->TlsModuleIndexRel, this, Offset, S);
+ } else {
+ // When building a shared library we still need a dynamic relocation
+ // for the module index. Therefore only checking for
+ // S->IsPreemptible is not sufficient (this happens e.g. for
+ // thread-locals that have been marked as local through a linker script)
+ if (!S->IsPreemptible && !Config->Pic)
+ continue;
+ In.RelaDyn->addReloc(Target->TlsModuleIndexRel, this, Offset, S);
+ // However, we can skip writing the TLS offset reloc for non-preemptible
+ // symbols since it is known even in shared libraries
+ if (!S->IsPreemptible)
+ continue;
+ Offset += Config->Wordsize;
+ In.RelaDyn->addReloc(Target->TlsOffsetRel, this, Offset, S);
+ }
+ }
+ // Do not create dynamic relocations for non-TLS
+ // entries in the primary GOT.
+ if (&Got == PrimGot)
+ continue;
+ // Dynamic relocations for "global" entries.
+ for (const std::pair<Symbol *, size_t> &P : Got.Global) {
+ uint64_t Offset = P.second * Config->Wordsize;
+ In.RelaDyn->addReloc(Target->RelativeRel, this, Offset, P.first);
+ }
+ if (!Config->Pic)
+ continue;
+ // Dynamic relocations for "local" entries in case of PIC.
+ for (const std::pair<const OutputSection *, FileGot::PageBlock> &L :
+ Got.PagesMap) {
+ size_t PageCount = L.second.Count;
+ for (size_t PI = 0; PI < PageCount; ++PI) {
+ uint64_t Offset = (L.second.FirstIndex + PI) * Config->Wordsize;
+ In.RelaDyn->addReloc({Target->RelativeRel, this, Offset, L.first,
+ int64_t(PI * 0x10000)});
+ }
+ }
+ for (const std::pair<GotEntry, size_t> &P : Got.Local16) {
+ uint64_t Offset = P.second * Config->Wordsize;
+ In.RelaDyn->addReloc({Target->RelativeRel, this, Offset, true,
+ P.first.first, P.first.second});
+ }
+ }
+bool MipsGotSection::empty() const {
+ // We add the .got section to the result for dynamic MIPS target because
+ // its address and properties are mentioned in the .dynamic section.
+ return Config->Relocatable;
+uint64_t MipsGotSection::getGp(const InputFile *F) const {
+ // For files without related GOT or files refer a primary GOT
+ // returns "common" _gp value. For secondary GOTs calculate
+ // individual _gp values.
+ if (!F || !F->MipsGotIndex.hasValue() || *F->MipsGotIndex == 0)
+ return ElfSym::MipsGp->getVA(0);
+ return getVA() + Gots[*F->MipsGotIndex].StartIndex * Config->Wordsize +
+ 0x7ff0;
+void MipsGotSection::writeTo(uint8_t *Buf) {
+ // Set the MSB of the second GOT slot. This is not required by any
+ // MIPS ABI documentation, though.
+ //
+ // There is a comment in glibc saying that "The MSB of got[1] of a
+ // gnu object is set to identify gnu objects," and in GNU gold it
+ // says "the second entry will be used by some runtime loaders".
+ // But how this field is being used is unclear.
+ //
+ // We are not really willing to mimic other linkers behaviors
+ // without understanding why they do that, but because all files
+ // generated by GNU tools have this special GOT value, and because
+ // we've been doing this for years, it is probably a safe bet to
+ // keep doing this for now. We really need to revisit this to see
+ // if we had to do this.
+ writeUint(Buf + Config->Wordsize, (uint64_t)1 << (Config->Wordsize * 8 - 1));
+ for (const FileGot &G : Gots) {
+ auto Write = [&](size_t I, const Symbol *S, int64_t A) {
+ uint64_t VA = A;
+ if (S) {
+ VA = S->getVA(A);
+ if (S->StOther & STO_MIPS_MICROMIPS)
+ VA |= 1;
+ }
+ writeUint(Buf + I * Config->Wordsize, VA);
+ };
+ // Write 'page address' entries to the local part of the GOT.
+ for (const std::pair<const OutputSection *, FileGot::PageBlock> &L :
+ G.PagesMap) {
+ size_t PageCount = L.second.Count;
+ uint64_t FirstPageAddr = getMipsPageAddr(L.first->Addr);
+ for (size_t PI = 0; PI < PageCount; ++PI)
+ Write(L.second.FirstIndex + PI, nullptr, FirstPageAddr + PI * 0x10000);
+ }
+ // Local, global, TLS, reloc-only entries.
+ // If TLS entry has a corresponding dynamic relocations, leave it
+ // initialized by zero. Write down adjusted TLS symbol's values otherwise.
+ // To calculate the adjustments use offsets for thread-local storage.
+ // https://www.linux-mips.org/wiki/NPTL
+ for (const std::pair<GotEntry, size_t> &P : G.Local16)
+ Write(P.second, P.first.first, P.first.second);
+ // Write VA to the primary GOT only. For secondary GOTs that
+ // will be done by REL32 dynamic relocations.
+ if (&G == &Gots.front())
+ for (const std::pair<const Symbol *, size_t> &P : G.Global)
+ Write(P.second, P.first, 0);
+ for (const std::pair<Symbol *, size_t> &P : G.Relocs)
+ Write(P.second, P.first, 0);
+ for (const std::pair<Symbol *, size_t> &P : G.Tls)
+ Write(P.second, P.first, P.first->IsPreemptible ? 0 : -0x7000);
+ for (const std::pair<Symbol *, size_t> &P : G.DynTlsSymbols) {
+ if (P.first == nullptr && !Config->Pic)
+ Write(P.second, nullptr, 1);
+ else if (P.first && !P.first->IsPreemptible) {
+ // If we are emitting PIC code with relocations we mustn't write
+ // anything to the GOT here. When using Elf_Rel relocations the value
+ // one will be treated as an addend and will cause crashes at runtime
+ if (!Config->Pic)
+ Write(P.second, nullptr, 1);
+ Write(P.second + 1, P.first, -0x8000);
+ }
+ }
+ }
+// On PowerPC the .plt section is used to hold the table of function addresses
+// instead of the .got.plt, and the type is SHT_NOBITS similar to a .bss
+// section. I don't know why we have a BSS style type for the section but it is
+// consitent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI.
+ : SyntheticSection(SHF_ALLOC | SHF_WRITE,
+ Config->EMachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS,
+ Target->GotPltEntrySize,
+ Config->EMachine == EM_PPC64 ? ".plt" : ".got.plt") {}
+void GotPltSection::addEntry(Symbol &Sym) {
+ assert(Sym.PltIndex == Entries.size());
+ Entries.push_back(&Sym);
+size_t GotPltSection::getSize() const {
+ return (Target->GotPltHeaderEntriesNum + Entries.size()) *
+ Target->GotPltEntrySize;
+void GotPltSection::writeTo(uint8_t *Buf) {
+ Target->writeGotPltHeader(Buf);
+ Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize;
+ for (const Symbol *B : Entries) {
+ Target->writeGotPlt(Buf, *B);
+ Buf += Config->Wordsize;
+ }
+bool GotPltSection::empty() const {
+ // We need to emit a GOT.PLT even if it's empty if there's a symbol that
+ // references the _GLOBAL_OFFSET_TABLE_ and the Target defines the symbol
+ // relative to the .got.plt section.
+ return Entries.empty() &&
+ !(ElfSym::GlobalOffsetTable && Target->GotBaseSymInGotPlt);
+static StringRef getIgotPltName() {
+ // On ARM the IgotPltSection is part of the GotSection.
+ if (Config->EMachine == EM_ARM)
+ return ".got";
+ // On PowerPC64 the GotPltSection is renamed to '.plt' so the IgotPltSection
+ // needs to be named the same.
+ if (Config->EMachine == EM_PPC64)
+ return ".plt";
+ return ".got.plt";
+// On PowerPC64 the GotPltSection type is SHT_NOBITS so we have to follow suit
+// with the IgotPltSection.
+ : SyntheticSection(SHF_ALLOC | SHF_WRITE,
+ Config->EMachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS,
+ Target->GotPltEntrySize, getIgotPltName()) {}
+void IgotPltSection::addEntry(Symbol &Sym) {
+ Sym.IsInIgot = true;
+ assert(Sym.PltIndex == Entries.size());
+ Entries.push_back(&Sym);
+size_t IgotPltSection::getSize() const {
+ return Entries.size() * Target->GotPltEntrySize;
+void IgotPltSection::writeTo(uint8_t *Buf) {
+ for (const Symbol *B : Entries) {
+ Target->writeIgotPlt(Buf, *B);
+ Buf += Config->Wordsize;
+ }
+StringTableSection::StringTableSection(StringRef Name, bool Dynamic)
+ : SyntheticSection(Dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, Name),
+ Dynamic(Dynamic) {
+ // ELF string tables start with a NUL byte.
+ addString("");
+// Adds a string to the string table. If HashIt is true we hash and check for
+// duplicates. It is optional because the name of global symbols are already
+// uniqued and hashing them again has a big cost for a small value: uniquing
+// them with some other string that happens to be the same.
+unsigned StringTableSection::addString(StringRef S, bool HashIt) {
+ if (HashIt) {
+ auto R = StringMap.insert(std::make_pair(S, this->Size));
+ if (!R.second)
+ return R.first->second;
+ }
+ unsigned Ret = this->Size;
+ this->Size = this->Size + S.size() + 1;
+ Strings.push_back(S);
+ return Ret;
+void StringTableSection::writeTo(uint8_t *Buf) {
+ for (StringRef S : Strings) {
+ memcpy(Buf, S.data(), S.size());
+ Buf[S.size()] = '\0';
+ Buf += S.size() + 1;
+ }
+// Returns the number of version definition entries. Because the first entry
+// is for the version definition itself, it is the number of versioned symbols
+// plus one. Note that we don't support multiple versions yet.
+static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; }
+template <class ELFT>
+ : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, Config->Wordsize,
+ ".dynamic") {
+ this->Entsize = ELFT::Is64Bits ? 16 : 8;
+ // .dynamic section is not writable on MIPS and on Fuchsia OS
+ // which passes -z rodynamic.
+ // See "Special Section" in Chapter 4 in the following document:
+ // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+ if (Config->EMachine == EM_MIPS || Config->ZRodynamic)
+ this->Flags = SHF_ALLOC;
+ // Add strings to .dynstr early so that .dynstr's size will be
+ // fixed early.
+ for (StringRef S : Config->FilterList)
+ addInt(DT_FILTER, In.DynStrTab->addString(S));
+ for (StringRef S : Config->AuxiliaryList)
+ addInt(DT_AUXILIARY, In.DynStrTab->addString(S));
+ if (!Config->Rpath.empty())
+ addInt(Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH,
+ In.DynStrTab->addString(Config->Rpath));
+ for (InputFile *File : SharedFiles) {
+ SharedFile<ELFT> *F = cast<SharedFile<ELFT>>(File);
+ if (F->IsNeeded)
+ addInt(DT_NEEDED, In.DynStrTab->addString(F->SoName));
+ }
+ if (!Config->SoName.empty())
+ addInt(DT_SONAME, In.DynStrTab->addString(Config->SoName));
+template <class ELFT>
+void DynamicSection<ELFT>::add(int32_t Tag, std::function<uint64_t()> Fn) {
+ Entries.push_back({Tag, Fn});
+template <class ELFT>
+void DynamicSection<ELFT>::addInt(int32_t Tag, uint64_t Val) {
+ Entries.push_back({Tag, [=] { return Val; }});
+template <class ELFT>
+void DynamicSection<ELFT>::addInSec(int32_t Tag, InputSection *Sec) {
+ Entries.push_back({Tag, [=] { return Sec->getVA(0); }});
+template <class ELFT>
+void DynamicSection<ELFT>::addInSecRelative(int32_t Tag, InputSection *Sec) {
+ size_t TagOffset = Entries.size() * Entsize;
+ Entries.push_back(
+ {Tag, [=] { return Sec->getVA(0) - (getVA() + TagOffset); }});
+template <class ELFT>
+void DynamicSection<ELFT>::addOutSec(int32_t Tag, OutputSection *Sec) {
+ Entries.push_back({Tag, [=] { return Sec->Addr; }});
+template <class ELFT>
+void DynamicSection<ELFT>::addSize(int32_t Tag, OutputSection *Sec) {
+ Entries.push_back({Tag, [=] { return Sec->Size; }});
+template <class ELFT>
+void DynamicSection<ELFT>::addSym(int32_t Tag, Symbol *Sym) {
+ Entries.push_back({Tag, [=] { return Sym->getVA(); }});
+// A Linker script may assign the RELA relocation sections to the same
+// output section. When this occurs we cannot just use the OutputSection
+// Size. Moreover the [DT_JMPREL, DT_JMPREL + DT_PLTRELSZ) is permitted to
+// overlap with the [DT_RELA, DT_RELA + DT_RELASZ).
+static uint64_t addPltRelSz() {
+ size_t Size = In.RelaPlt->getSize();
+ if (In.RelaIplt->getParent() == In.RelaPlt->getParent() &&
+ In.RelaIplt->Name == In.RelaPlt->Name)
+ Size += In.RelaIplt->getSize();
+ return Size;
+// Add remaining entries to complete .dynamic contents.
+template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
+ // Set DT_FLAGS and DT_FLAGS_1.
+ uint32_t DtFlags = 0;
+ uint32_t DtFlags1 = 0;
+ if (Config->Bsymbolic)
+ DtFlags |= DF_SYMBOLIC;
+ if (Config->ZGlobal)
+ DtFlags1 |= DF_1_GLOBAL;
+ if (Config->ZInitfirst)
+ DtFlags1 |= DF_1_INITFIRST;
+ if (Config->ZInterpose)
+ DtFlags1 |= DF_1_INTERPOSE;
+ if (Config->ZNodefaultlib)
+ DtFlags1 |= DF_1_NODEFLIB;
+ if (Config->ZNodelete)
+ DtFlags1 |= DF_1_NODELETE;
+ if (Config->ZNodlopen)
+ DtFlags1 |= DF_1_NOOPEN;
+ if (Config->ZNow) {
+ DtFlags |= DF_BIND_NOW;
+ DtFlags1 |= DF_1_NOW;
+ }
+ if (Config->ZOrigin) {
+ DtFlags |= DF_ORIGIN;
+ DtFlags1 |= DF_1_ORIGIN;
+ }
+ if (!Config->ZText)
+ DtFlags |= DF_TEXTREL;
+ if (DtFlags)
+ addInt(DT_FLAGS, DtFlags);
+ if (DtFlags1)
+ addInt(DT_FLAGS_1, DtFlags1);
+ // DT_DEBUG is a pointer to debug informaion used by debuggers at runtime. We
+ // need it for each process, so we don't write it for DSOs. The loader writes
+ // the pointer into this entry.
+ //
+ // DT_DEBUG is the only .dynamic entry that needs to be written to. Some
+ // systems (currently only Fuchsia OS) provide other means to give the
+ // debugger this information. Such systems may choose make .dynamic read-only.
+ // If the target is such a system (used -z rodynamic) don't write DT_DEBUG.
+ if (!Config->Shared && !Config->Relocatable && !Config->ZRodynamic)
+ addInt(DT_DEBUG, 0);
+ if (OutputSection *Sec = In.DynStrTab->getParent())
+ this->Link = Sec->SectionIndex;
+ if (!In.RelaDyn->empty()) {
+ addInSec(In.RelaDyn->DynamicTag, In.RelaDyn);
+ addSize(In.RelaDyn->SizeDynamicTag, In.RelaDyn->getParent());
+ bool IsRela = Config->IsRela;
+ addInt(IsRela ? DT_RELAENT : DT_RELENT,
+ IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel));
+ // MIPS dynamic loader does not support RELCOUNT tag.
+ // The problem is in the tight relation between dynamic
+ // relocations and GOT. So do not emit this tag on MIPS.
+ if (Config->EMachine != EM_MIPS) {
+ size_t NumRelativeRels = In.RelaDyn->getRelativeRelocCount();
+ if (Config->ZCombreloc && NumRelativeRels)
+ addInt(IsRela ? DT_RELACOUNT : DT_RELCOUNT, NumRelativeRels);
+ }
+ }
+ if (In.RelrDyn && !In.RelrDyn->Relocs.empty()) {
+ addInSec(Config->UseAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR,
+ In.RelrDyn);
+ addSize(Config->UseAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ,
+ In.RelrDyn->getParent());
+ addInt(Config->UseAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT,
+ sizeof(Elf_Relr));
+ }
+ // .rel[a].plt section usually consists of two parts, containing plt and
+ // iplt relocations. It is possible to have only iplt relocations in the
+ // output. In that case RelaPlt is empty and have zero offset, the same offset
+ // as RelaIplt have. And we still want to emit proper dynamic tags for that
+ // case, so here we always use RelaPlt as marker for the begining of
+ // .rel[a].plt section.
+ if (In.RelaPlt->getParent()->Live) {
+ addInSec(DT_JMPREL, In.RelaPlt);
+ Entries.push_back({DT_PLTRELSZ, addPltRelSz});
+ switch (Config->EMachine) {
+ case EM_MIPS:
+ addInSec(DT_MIPS_PLTGOT, In.GotPlt);
+ break;
+ case EM_SPARCV9:
+ addInSec(DT_PLTGOT, In.Plt);
+ break;
+ default:
+ addInSec(DT_PLTGOT, In.GotPlt);
+ break;
+ }
+ addInt(DT_PLTREL, Config->IsRela ? DT_RELA : DT_REL);
+ }
+ addInSec(DT_SYMTAB, In.DynSymTab);
+ addInt(DT_SYMENT, sizeof(Elf_Sym));
+ addInSec(DT_STRTAB, In.DynStrTab);
+ addInt(DT_STRSZ, In.DynStrTab->getSize());
+ if (!Config->ZText)
+ addInt(DT_TEXTREL, 0);
+ if (In.GnuHashTab)
+ addInSec(DT_GNU_HASH, In.GnuHashTab);
+ if (In.HashTab)
+ addInSec(DT_HASH, In.HashTab);
+ if (Out::PreinitArray) {
+ addOutSec(DT_PREINIT_ARRAY, Out::PreinitArray);
+ addSize(DT_PREINIT_ARRAYSZ, Out::PreinitArray);
+ }
+ if (Out::InitArray) {
+ addOutSec(DT_INIT_ARRAY, Out::InitArray);
+ addSize(DT_INIT_ARRAYSZ, Out::InitArray);
+ }
+ if (Out::FiniArray) {
+ addOutSec(DT_FINI_ARRAY, Out::FiniArray);
+ addSize(DT_FINI_ARRAYSZ, Out::FiniArray);
+ }
+ if (Symbol *B = Symtab->find(Config->Init))
+ if (B->isDefined())
+ addSym(DT_INIT, B);
+ if (Symbol *B = Symtab->find(Config->Fini))
+ if (B->isDefined())
+ addSym(DT_FINI, B);
+ bool HasVerNeed = InX<ELFT>::VerNeed->getNeedNum() != 0;
+ if (HasVerNeed || In.VerDef)
+ addInSec(DT_VERSYM, InX<ELFT>::VerSym);
+ if (In.VerDef) {
+ addInSec(DT_VERDEF, In.VerDef);
+ addInt(DT_VERDEFNUM, getVerDefNum());
+ }
+ if (HasVerNeed) {
+ addInSec(DT_VERNEED, InX<ELFT>::VerNeed);
+ addInt(DT_VERNEEDNUM, InX<ELFT>::VerNeed->getNeedNum());
+ }
+ if (Config->EMachine == EM_MIPS) {
+ addInt(DT_MIPS_BASE_ADDRESS, Target->getImageBase());
+ addInt(DT_MIPS_SYMTABNO, In.DynSymTab->getNumSymbols());
+ add(DT_MIPS_LOCAL_GOTNO, [] { return In.MipsGot->getLocalEntriesNum(); });
+ if (const Symbol *B = In.MipsGot->getFirstGlobalEntry())
+ addInt(DT_MIPS_GOTSYM, B->DynsymIndex);
+ else
+ addInt(DT_MIPS_GOTSYM, In.DynSymTab->getNumSymbols());
+ addInSec(DT_PLTGOT, In.MipsGot);
+ if (In.MipsRldMap) {
+ if (!Config->Pie)
+ addInSec(DT_MIPS_RLD_MAP, In.MipsRldMap);
+ // Store the offset to the .rld_map section
+ // relative to the address of the tag.
+ addInSecRelative(DT_MIPS_RLD_MAP_REL, In.MipsRldMap);
+ }
+ }
+ // Glink dynamic tag is required by the V2 abi if the plt section isn't empty.
+ if (Config->EMachine == EM_PPC64 && !In.Plt->empty()) {
+ // The Glink tag points to 32 bytes before the first lazy symbol resolution
+ // stub, which starts directly after the header.
+ Entries.push_back({DT_PPC64_GLINK, [=] {
+ unsigned Offset = Target->PltHeaderSize - 32;
+ return In.Plt->getVA(0) + Offset;
+ }});
+ }
+ addInt(DT_NULL, 0);
+ getParent()->Link = this->Link;
+ this->Size = Entries.size() * this->Entsize;
+template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) {
+ auto *P = reinterpret_cast<Elf_Dyn *>(Buf);
+ for (std::pair<int32_t, std::function<uint64_t()>> &KV : Entries) {
+ P->d_tag = KV.first;
+ P->d_un.d_val = KV.second();
+ ++P;
+ }
+uint64_t DynamicReloc::getOffset() const {
+ return InputSec->getVA(OffsetInSec);
+int64_t DynamicReloc::computeAddend() const {
+ if (UseSymVA)
+ return Sym->getVA(Addend);
+ if (!OutputSec)
+ return Addend;
+ // See the comment in the DynamicReloc ctor.
+ return getMipsPageAddr(OutputSec->Addr) + Addend;
+uint32_t DynamicReloc::getSymIndex() const {
+ if (Sym && !UseSymVA)
+ return Sym->DynsymIndex;
+ return 0;
+RelocationBaseSection::RelocationBaseSection(StringRef Name, uint32_t Type,
+ int32_t DynamicTag,
+ int32_t SizeDynamicTag)
+ : SyntheticSection(SHF_ALLOC, Type, Config->Wordsize, Name),
+ DynamicTag(DynamicTag), SizeDynamicTag(SizeDynamicTag) {}
+void RelocationBaseSection::addReloc(RelType DynType, InputSectionBase *IS,
+ uint64_t OffsetInSec, Symbol *Sym) {
+ addReloc({DynType, IS, OffsetInSec, false, Sym, 0});
+void RelocationBaseSection::addReloc(RelType DynType,
+ InputSectionBase *InputSec,
+ uint64_t OffsetInSec, Symbol *Sym,
+ int64_t Addend, RelExpr Expr,
+ RelType Type) {
+ // Write the addends to the relocated address if required. We skip
+ // it if the written value would be zero.
+ if (Config->WriteAddends && (Expr != R_ADDEND || Addend != 0))
+ InputSec->Relocations.push_back({Expr, Type, OffsetInSec, Addend, Sym});
+ addReloc({DynType, InputSec, OffsetInSec, Expr != R_ADDEND, Sym, Addend});
+void RelocationBaseSection::addReloc(const DynamicReloc &Reloc) {
+ if (Reloc.Type == Target->RelativeRel)
+ ++NumRelativeRelocs;
+ Relocs.push_back(Reloc);
+void RelocationBaseSection::finalizeContents() {
+ // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
+ // relocations due to IFUNC (e.g. strcpy). sh_link will be set to 0 in that
+ // case.
+ InputSection *SymTab = Config->Relocatable ? In.SymTab : In.DynSymTab;
+ if (SymTab && SymTab->getParent())
+ getParent()->Link = SymTab->getParent()->SectionIndex;
+ else
+ getParent()->Link = 0;
+ if (In.RelaPlt == this)
+ getParent()->Info = In.GotPlt->getParent()->SectionIndex;
+ if (In.RelaIplt == this)
+ getParent()->Info = In.IgotPlt->getParent()->SectionIndex;
+ : SyntheticSection(SHF_ALLOC,
+ Config->UseAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR,
+ Config->Wordsize, ".relr.dyn") {}
+template <class ELFT>
+static void encodeDynamicReloc(typename ELFT::Rela *P,
+ const DynamicReloc &Rel) {
+ if (Config->IsRela)
+ P->r_addend = Rel.computeAddend();
+ P->r_offset = Rel.getOffset();
+ P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->IsMips64EL);
+template <class ELFT>
+RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort)
+ : RelocationBaseSection(Name, Config->IsRela ? SHT_RELA : SHT_REL,
+ Config->IsRela ? DT_RELA : DT_REL,
+ Config->IsRela ? DT_RELASZ : DT_RELSZ),
+ Sort(Sort) {
+ this->Entsize = Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
+static bool compRelocations(const DynamicReloc &A, const DynamicReloc &B) {
+ bool AIsRel = A.Type == Target->RelativeRel;
+ bool BIsRel = B.Type == Target->RelativeRel;
+ if (AIsRel != BIsRel)
+ return AIsRel;
+ return A.getSymIndex() < B.getSymIndex();
+template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) {
+ if (Sort)
+ std::stable_sort(Relocs.begin(), Relocs.end(), compRelocations);
+ for (const DynamicReloc &Rel : Relocs) {
+ encodeDynamicReloc<ELFT>(reinterpret_cast<Elf_Rela *>(Buf), Rel);
+ Buf += Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
+ }
+template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() {
+ return this->Entsize * Relocs.size();
+template <class ELFT>
+ StringRef Name)
+ : RelocationBaseSection(
+ this->Entsize = 1;
+template <class ELFT>
+bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() {
+ // This function computes the contents of an Android-format packed relocation
+ // section.
+ //
+ // This format compresses relocations by using relocation groups to factor out
+ // fields that are common between relocations and storing deltas from previous
+ // relocations in SLEB128 format (which has a short representation for small
+ // numbers). A good example of a relocation type with common fields is
+ // R_*_RELATIVE, which is normally used to represent function pointers in
+ // vtables. In the REL format, each relative relocation has the same r_info
+ // field, and is only different from other relative relocations in terms of
+ // the r_offset field. By sorting relocations by offset, grouping them by
+ // r_info and representing each relocation with only the delta from the
+ // previous offset, each 8-byte relocation can be compressed to as little as 1
+ // byte (or less with run-length encoding). This relocation packer was able to
+ // reduce the size of the relocation section in an Android Chromium DSO from
+ // 2,911,184 bytes to 174,693 bytes, or 6% of the original size.
+ //
+ // A relocation section consists of a header containing the literal bytes
+ // 'APS2' followed by a sequence of SLEB128-encoded integers. The first two
+ // elements are the total number of relocations in the section and an initial
+ // r_offset value. The remaining elements define a sequence of relocation
+ // groups. Each relocation group starts with a header consisting of the
+ // following elements:
+ //
+ // - the number of relocations in the relocation group
+ // - flags for the relocation group
+ // - (if RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG is set) the r_offset delta
+ // for each relocation in the group.
+ // - (if RELOCATION_GROUPED_BY_INFO_FLAG is set) the value of the r_info
+ // field for each relocation in the group.
+ // RELOCATION_GROUPED_BY_ADDEND_FLAG are set) the r_addend delta for
+ // each relocation in the group.
+ //
+ // Following the relocation group header are descriptions of each of the
+ // relocations in the group. They consist of the following elements:
+ //
+ // - (if RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG is not set) the r_offset
+ // delta for this relocation.
+ // - (if RELOCATION_GROUPED_BY_INFO_FLAG is not set) the value of the r_info
+ // field for this relocation.
+ // RELOCATION_GROUPED_BY_ADDEND_FLAG is not set) the r_addend delta for
+ // this relocation.
+ size_t OldSize = RelocData.size();
+ RelocData = {'A', 'P', 'S', '2'};
+ raw_svector_ostream OS(RelocData);
+ auto Add = [&](int64_t V) { encodeSLEB128(V, OS); };
+ // The format header includes the number of relocations and the initial
+ // offset (we set this to zero because the first relocation group will
+ // perform the initial adjustment).
+ Add(Relocs.size());
+ Add(0);
+ std::vector<Elf_Rela> Relatives, NonRelatives;
+ for (const DynamicReloc &Rel : Relocs) {
+ Elf_Rela R;
+ encodeDynamicReloc<ELFT>(&R, Rel);
+ if (R.getType(Config->IsMips64EL) == Target->RelativeRel)
+ Relatives.push_back(R);
+ else
+ NonRelatives.push_back(R);
+ }
+ llvm::sort(Relatives, [](const Elf_Rel &A, const Elf_Rel &B) {
+ return A.r_offset < B.r_offset;
+ });
+ // Try to find groups of relative relocations which are spaced one word
+ // apart from one another. These generally correspond to vtable entries. The
+ // format allows these groups to be encoded using a sort of run-length
+ // encoding, but each group will cost 7 bytes in addition to the offset from
+ // the previous group, so it is only profitable to do this for groups of
+ // size 8 or larger.
+ std::vector<Elf_Rela> UngroupedRelatives;
+ std::vector<std::vector<Elf_Rela>> RelativeGroups;
+ for (auto I = Relatives.begin(), E = Relatives.end(); I != E;) {
+ std::vector<Elf_Rela> Group;
+ do {
+ Group.push_back(*I++);
+ } while (I != E && (I - 1)->r_offset + Config->Wordsize == I->r_offset);
+ if (Group.size() < 8)
+ UngroupedRelatives.insert(UngroupedRelatives.end(), Group.begin(),
+ Group.end());
+ else
+ RelativeGroups.emplace_back(std::move(Group));
+ }
+ unsigned HasAddendIfRela =
+ uint64_t Offset = 0;
+ uint64_t Addend = 0;
+ // Emit the run-length encoding for the groups of adjacent relative
+ // relocations. Each group is represented using two groups in the packed
+ // format. The first is used to set the current offset to the start of the
+ // group (and also encodes the first relocation), and the second encodes the
+ // remaining relocations.
+ for (std::vector<Elf_Rela> &G : RelativeGroups) {
+ // The first relocation in the group.
+ Add(1);
+ Add(G[0].r_offset - Offset);
+ Add(Target->RelativeRel);
+ if (Config->IsRela) {
+ Add(G[0].r_addend - Addend);
+ Addend = G[0].r_addend;
+ }
+ // The remaining relocations.
+ Add(G.size() - 1);
+ Add(Config->Wordsize);
+ Add(Target->RelativeRel);
+ if (Config->IsRela) {
+ for (auto I = G.begin() + 1, E = G.end(); I != E; ++I) {
+ Add(I->r_addend - Addend);
+ Addend = I->r_addend;
+ }
+ }
+ Offset = G.back().r_offset;
+ }
+ // Now the ungrouped relatives.
+ if (!UngroupedRelatives.empty()) {
+ Add(UngroupedRelatives.size());
+ Add(Target->RelativeRel);
+ for (Elf_Rela &R : UngroupedRelatives) {
+ Add(R.r_offset - Offset);
+ Offset = R.r_offset;
+ if (Config->IsRela) {
+ Add(R.r_addend - Addend);
+ Addend = R.r_addend;
+ }
+ }
+ }
+ // Finally the non-relative relocations.
+ llvm::sort(NonRelatives, [](const Elf_Rela &A, const Elf_Rela &B) {
+ return A.r_offset < B.r_offset;
+ });
+ if (!NonRelatives.empty()) {
+ Add(NonRelatives.size());
+ Add(HasAddendIfRela);
+ for (Elf_Rela &R : NonRelatives) {
+ Add(R.r_offset - Offset);
+ Offset = R.r_offset;
+ Add(R.r_info);
+ if (Config->IsRela) {
+ Add(R.r_addend - Addend);
+ Addend = R.r_addend;
+ }
+ }
+ }
+ // Don't allow the section to shrink; otherwise the size of the section can
+ // oscillate infinitely.
+ if (RelocData.size() < OldSize)
+ RelocData.append(OldSize - RelocData.size(), 0);
+ // Returns whether the section size changed. We need to keep recomputing both
+ // section layout and the contents of this section until the size converges
+ // because changing this section's size can affect section layout, which in
+ // turn can affect the sizes of the LEB-encoded integers stored in this
+ // section.
+ return RelocData.size() != OldSize;
+template <class ELFT> RelrSection<ELFT>::RelrSection() {
+ this->Entsize = Config->Wordsize;
+template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() {
+ // This function computes the contents of an SHT_RELR packed relocation
+ // section.
+ //
+ // Proposal for adding SHT_RELR sections to generic-abi is here:
+ // https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg
+ //
+ // The encoded sequence of Elf64_Relr entries in a SHT_RELR section looks
+ //
+ // i.e. start with an address, followed by any number of bitmaps. The address
+ // entry encodes 1 relocation. The subsequent bitmap entries encode up to 63
+ // relocations each, at subsequent offsets following the last address entry.
+ //
+ // The bitmap entries must have 1 in the least significant bit. The assumption
+ // here is that an address cannot have 1 in lsb. Odd addresses are not
+ // supported.
+ //
+ // Excluding the least significant bit in the bitmap, each non-zero bit in
+ // the bitmap represents a relocation to be applied to a corresponding machine
+ // word that follows the base address word. The second least significant bit
+ // represents the machine word immediately following the initial address, and
+ // each bit that follows represents the next word, in linear order. As such,
+ // a single bitmap can encode up to 31 relocations in a 32-bit object, and
+ // 63 relocations in a 64-bit object.
+ //
+ // This encoding has a couple of interesting properties:
+ // 1. Looking at any entry, it is clear whether it's an address or a bitmap:
+ // even means address, odd means bitmap.
+ // 2. Just a simple list of addresses is a valid encoding.
+ size_t OldSize = RelrRelocs.size();
+ RelrRelocs.clear();
+ // Same as Config->Wordsize but faster because this is a compile-time
+ // constant.
+ const size_t Wordsize = sizeof(typename ELFT::uint);
+ // Number of bits to use for the relocation offsets bitmap.
+ // Must be either 63 or 31.
+ const size_t NBits = Wordsize * 8 - 1;
+ // Get offsets for all relative relocations and sort them.
+ std::vector<uint64_t> Offsets;
+ for (const RelativeReloc &Rel : Relocs)
+ Offsets.push_back(Rel.getOffset());
+ llvm::sort(Offsets.begin(), Offsets.end());
+ // For each leading relocation, find following ones that can be folded
+ // as a bitmap and fold them.
+ for (size_t I = 0, E = Offsets.size(); I < E;) {
+ // Add a leading relocation.
+ RelrRelocs.push_back(Elf_Relr(Offsets[I]));
+ uint64_t Base = Offsets[I] + Wordsize;
+ ++I;
+ // Find foldable relocations to construct bitmaps.
+ while (I < E) {
+ uint64_t Bitmap = 0;
+ while (I < E) {
+ uint64_t Delta = Offsets[I] - Base;
+ // If it is too far, it cannot be folded.
+ if (Delta >= NBits * Wordsize)
+ break;
+ // If it is not a multiple of wordsize away, it cannot be folded.
+ if (Delta % Wordsize)
+ break;
+ // Fold it.
+ Bitmap |= 1ULL << (Delta / Wordsize);
+ ++I;
+ }
+ if (!Bitmap)
+ break;
+ RelrRelocs.push_back(Elf_Relr((Bitmap << 1) | 1));
+ Base += NBits * Wordsize;
+ }
+ }
+ return RelrRelocs.size() != OldSize;
+SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &StrTabSec)
+ : SyntheticSection(StrTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0,
+ StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB,
+ Config->Wordsize,
+ StrTabSec.isDynamic() ? ".dynsym" : ".symtab"),
+ StrTabSec(StrTabSec) {}
+// Orders symbols according to their positions in the GOT,
+// in compliance with MIPS ABI rules.
+// See "Global Offset Table" in Chapter 5 in the following document
+// for detailed description:
+// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+static bool sortMipsSymbols(const SymbolTableEntry &L,
+ const SymbolTableEntry &R) {
+ // Sort entries related to non-local preemptible symbols by GOT indexes.
+ // All other entries go to the beginning of a dynsym in arbitrary order.
+ if (L.Sym->isInGot() && R.Sym->isInGot())
+ return L.Sym->GotIndex < R.Sym->GotIndex;
+ if (!L.Sym->isInGot() && !R.Sym->isInGot())
+ return false;
+ return !L.Sym->isInGot();
+void SymbolTableBaseSection::finalizeContents() {
+ if (OutputSection *Sec = StrTabSec.getParent())
+ getParent()->Link = Sec->SectionIndex;
+ if (this->Type != SHT_DYNSYM) {
+ sortSymTabSymbols();
+ return;
+ }
+ // If it is a .dynsym, there should be no local symbols, but we need
+ // to do a few things for the dynamic linker.
+ // Section's Info field has the index of the first non-local symbol.
+ // Because the first symbol entry is a null entry, 1 is the first.
+ getParent()->Info = 1;
+ if (In.GnuHashTab) {
+ // NB: It also sorts Symbols to meet the GNU hash table requirements.
+ In.GnuHashTab->addSymbols(Symbols);
+ } else if (Config->EMachine == EM_MIPS) {
+ std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols);
+ }
+ size_t I = 0;
+ for (const SymbolTableEntry &S : Symbols)
+ S.Sym->DynsymIndex = ++I;
+// The ELF spec requires that all local symbols precede global symbols, so we
+// sort symbol entries in this function. (For .dynsym, we don't do that because
+// symbols for dynamic linking are inherently all globals.)
+// Aside from above, we put local symbols in groups starting with the STT_FILE
+// symbol. That is convenient for purpose of identifying where are local symbols
+// coming from.
+void SymbolTableBaseSection::sortSymTabSymbols() {
+ // Move all local symbols before global symbols.
+ auto E = std::stable_partition(
+ Symbols.begin(), Symbols.end(), [](const SymbolTableEntry &S) {
+ return S.Sym->isLocal() || S.Sym->computeBinding() == STB_LOCAL;
+ });
+ size_t NumLocals = E - Symbols.begin();
+ getParent()->Info = NumLocals + 1;
+ // We want to group the local symbols by file. For that we rebuild the local
+ // part of the symbols vector. We do not need to care about the STT_FILE
+ // symbols, they are already naturally placed first in each group. That
+ // happens because STT_FILE is always the first symbol in the object and hence
+ // precede all other local symbols we add for a file.
+ MapVector<InputFile *, std::vector<SymbolTableEntry>> Arr;
+ for (const SymbolTableEntry &S : llvm::make_range(Symbols.begin(), E))
+ Arr[S.Sym->File].push_back(S);
+ auto I = Symbols.begin();
+ for (std::pair<InputFile *, std::vector<SymbolTableEntry>> &P : Arr)
+ for (SymbolTableEntry &Entry : P.second)
+ *I++ = Entry;
+void SymbolTableBaseSection::addSymbol(Symbol *B) {
+ // Adding a local symbol to a .dynsym is a bug.
+ assert(this->Type != SHT_DYNSYM || !B->isLocal());
+ bool HashIt = B->isLocal();
+ Symbols.push_back({B, StrTabSec.addString(B->getName(), HashIt)});
+size_t SymbolTableBaseSection::getSymbolIndex(Symbol *Sym) {
+ // Initializes symbol lookup tables lazily. This is used only
+ // for -r or -emit-relocs.
+ llvm::call_once(OnceFlag, [&] {
+ SymbolIndexMap.reserve(Symbols.size());
+ size_t I = 0;
+ for (const SymbolTableEntry &E : Symbols) {
+ if (E.Sym->Type == STT_SECTION)
+ SectionIndexMap[E.Sym->getOutputSection()] = ++I;
+ else
+ SymbolIndexMap[E.Sym] = ++I;
+ }
+ });
+ // Section symbols are mapped based on their output sections
+ // to maintain their semantics.
+ if (Sym->Type == STT_SECTION)
+ return SectionIndexMap.lookup(Sym->getOutputSection());
+ return SymbolIndexMap.lookup(Sym);
+template <class ELFT>
+SymbolTableSection<ELFT>::SymbolTableSection(StringTableSection &StrTabSec)
+ : SymbolTableBaseSection(StrTabSec) {
+ this->Entsize = sizeof(Elf_Sym);
+static BssSection *getCommonSec(Symbol *Sym) {
+ if (!Config->DefineCommon)
+ if (auto *D = dyn_cast<Defined>(Sym))
+ return dyn_cast_or_null<BssSection>(D->Section);
+ return nullptr;
+static uint32_t getSymSectionIndex(Symbol *Sym) {
+ if (getCommonSec(Sym))
+ return SHN_COMMON;
+ if (!isa<Defined>(Sym) || Sym->NeedsPltAddr)
+ return SHN_UNDEF;
+ if (const OutputSection *OS = Sym->getOutputSection())
+ return OS->SectionIndex >= SHN_LORESERVE ? (uint32_t)SHN_XINDEX
+ : OS->SectionIndex;
+ return SHN_ABS;
+// Write the internal symbol table contents to the output symbol table.
+template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) {
+ // The first entry is a null entry as per the ELF spec.
+ memset(Buf, 0, sizeof(Elf_Sym));
+ Buf += sizeof(Elf_Sym);
+ auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
+ for (SymbolTableEntry &Ent : Symbols) {
+ Symbol *Sym = Ent.Sym;
+ // Set st_info and st_other.
+ ESym->st_other = 0;
+ if (Sym->isLocal()) {
+ ESym->setBindingAndType(STB_LOCAL, Sym->Type);
+ } else {
+ ESym->setBindingAndType(Sym->computeBinding(), Sym->Type);
+ ESym->setVisibility(Sym->Visibility);
+ }
+ ESym->st_name = Ent.StrTabOffset;
+ ESym->st_shndx = getSymSectionIndex(Ent.Sym);
+ // Copy symbol size if it is a defined symbol. st_size is not significant
+ // for undefined symbols, so whether copying it or not is up to us if that's
+ // the case. We'll leave it as zero because by not setting a value, we can
+ // get the exact same outputs for two sets of input files that differ only
+ // in undefined symbol size in DSOs.
+ if (ESym->st_shndx == SHN_UNDEF)
+ ESym->st_size = 0;
+ else
+ ESym->st_size = Sym->getSize();
+ // st_value is usually an address of a symbol, but that has a
+ // special meaining for uninstantiated common symbols (this can
+ // occur if -r is given).
+ if (BssSection *CommonSec = getCommonSec(Ent.Sym))
+ ESym->st_value = CommonSec->Alignment;
+ else
+ ESym->st_value = Sym->getVA();
+ ++ESym;
+ }
+ // On MIPS we need to mark symbol which has a PLT entry and requires
+ // pointer equality by STO_MIPS_PLT flag. That is necessary to help
+ // dynamic linker distinguish such symbols and MIPS lazy-binding stubs.
+ // https://sourceware.org/ml/binutils/2008-07/txt00000.txt
+ if (Config->EMachine == EM_MIPS) {
+ auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
+ for (SymbolTableEntry &Ent : Symbols) {
+ Symbol *Sym = Ent.Sym;
+ if (Sym->isInPlt() && Sym->NeedsPltAddr)
+ ESym->st_other |= STO_MIPS_PLT;
+ if (isMicroMips()) {
+ // Set STO_MIPS_MICROMIPS flag and less-significant bit for
+ // a defined microMIPS symbol and symbol should point to its
+ // PLT entry (in case of microMIPS, PLT entries always contain
+ // microMIPS code).
+ if (Sym->isDefined() &&
+ ((Sym->StOther & STO_MIPS_MICROMIPS) || Sym->NeedsPltAddr)) {
+ if (StrTabSec.isDynamic())
+ ESym->st_value |= 1;
+ ESym->st_other |= STO_MIPS_MICROMIPS;
+ }
+ }
+ if (Config->Relocatable)
+ if (auto *D = dyn_cast<Defined>(Sym))
+ if (isMipsPIC<ELFT>(D))
+ ESym->st_other |= STO_MIPS_PIC;
+ ++ESym;
+ }
+ }
+ : SyntheticSection(0, SHT_SYMTAB_SHNDX, 4, ".symtab_shndxr") {
+ this->Entsize = 4;
+void SymtabShndxSection::writeTo(uint8_t *Buf) {
+ // We write an array of 32 bit values, where each value has 1:1 association
+ // with an entry in .symtab. If the corresponding entry contains SHN_XINDEX,
+ // we need to write actual index, otherwise, we must write SHN_UNDEF(0).
+ Buf += 4; // Ignore .symtab[0] entry.
+ for (const SymbolTableEntry &Entry : In.SymTab->getSymbols()) {
+ if (getSymSectionIndex(Entry.Sym) == SHN_XINDEX)
+ write32(Buf, Entry.Sym->getOutputSection()->SectionIndex);
+ Buf += 4;
+ }
+bool SymtabShndxSection::empty() const {
+ // SHT_SYMTAB can hold symbols with section indices values up to
+ // SHN_LORESERVE. If we need more, we want to use extension SHT_SYMTAB_SHNDX
+ // section. Problem is that we reveal the final section indices a bit too
+ // late, and we do not know them here. For simplicity, we just always create
+ // a .symtab_shndxr section when the amount of output sections is huge.
+ size_t Size = 0;
+ for (BaseCommand *Base : Script->SectionCommands)
+ if (isa<OutputSection>(Base))
+ ++Size;
+ return Size < SHN_LORESERVE;
+void SymtabShndxSection::finalizeContents() {
+ getParent()->Link = In.SymTab->getParent()->SectionIndex;
+size_t SymtabShndxSection::getSize() const {
+ return In.SymTab->getNumSymbols() * 4;
+// .hash and .gnu.hash sections contain on-disk hash tables that map
+// symbol names to their dynamic symbol table indices. Their purpose
+// is to help the dynamic linker resolve symbols quickly. If ELF files
+// don't have them, the dynamic linker has to do linear search on all
+// dynamic symbols, which makes programs slower. Therefore, a .hash
+// section is added to a DSO by default. A .gnu.hash is added if you
+// give the -hash-style=gnu or -hash-style=both option.
+// The Unix semantics of resolving dynamic symbols is somewhat expensive.
+// Each ELF file has a list of DSOs that the ELF file depends on and a
+// list of dynamic symbols that need to be resolved from any of the
+// DSOs. That means resolving all dynamic symbols takes O(m)*O(n)
+// where m is the number of DSOs and n is the number of dynamic
+// symbols. For modern large programs, both m and n are large. So
+// making each step faster by using hash tables substiantially
+// improves time to load programs.
+// (Note that this is not the only way to design the shared library.
+// For instance, the Windows DLL takes a different approach. On
+// Windows, each dynamic symbol has a name of DLL from which the symbol
+// has to be resolved. That makes the cost of symbol resolution O(n).
+// This disables some hacky techniques you can use on Unix such as
+// LD_PRELOAD, but this is arguably better semantics than the Unix ones.)
+// Due to historical reasons, we have two different hash tables, .hash
+// and .gnu.hash. They are for the same purpose, and .gnu.hash is a new
+// and better version of .hash. .hash is just an on-disk hash table, but
+// .gnu.hash has a bloom filter in addition to a hash table to skip
+// DSOs very quickly. If you are sure that your dynamic linker knows
+// about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a
+// safe bet is to specify -hash-style=both for backward compatibilty.
+ : SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, Config->Wordsize, ".gnu.hash") {
+void GnuHashTableSection::finalizeContents() {
+ if (OutputSection *Sec = In.DynSymTab->getParent())
+ getParent()->Link = Sec->SectionIndex;
+ // Computes bloom filter size in word size. We want to allocate 12
+ // bits for each symbol. It must be a power of two.
+ if (Symbols.empty()) {
+ MaskWords = 1;
+ } else {
+ uint64_t NumBits = Symbols.size() * 12;
+ MaskWords = NextPowerOf2(NumBits / (Config->Wordsize * 8));
+ }
+ Size = 16; // Header
+ Size += Config->Wordsize * MaskWords; // Bloom filter
+ Size += NBuckets * 4; // Hash buckets
+ Size += Symbols.size() * 4; // Hash values
+void GnuHashTableSection::writeTo(uint8_t *Buf) {
+ // The output buffer is not guaranteed to be zero-cleared because we pre-
+ // fill executable sections with trap instructions. This is a precaution
+ // for that case, which happens only when -no-rosegment is given.
+ memset(Buf, 0, Size);
+ // Write a header.
+ write32(Buf, NBuckets);
+ write32(Buf + 4, In.DynSymTab->getNumSymbols() - Symbols.size());
+ write32(Buf + 8, MaskWords);
+ write32(Buf + 12, Shift2);
+ Buf += 16;
+ // Write a bloom filter and a hash table.
+ writeBloomFilter(Buf);
+ Buf += Config->Wordsize * MaskWords;
+ writeHashTable(Buf);
+// This function writes a 2-bit bloom filter. This bloom filter alone
+// usually filters out 80% or more of all symbol lookups [1].
+// The dynamic linker uses the hash table only when a symbol is not
+// filtered out by a bloom filter.
+// [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2),
+// p.9, https://www.akkadia.org/drepper/dsohowto.pdf
+void GnuHashTableSection::writeBloomFilter(uint8_t *Buf) {
+ unsigned C = Config->Is64 ? 64 : 32;
+ for (const Entry &Sym : Symbols) {
+ // When C = 64, we choose a word with bits [6:...] and set 1 to two bits in
+ // the word using bits [0:5] and [26:31].
+ size_t I = (Sym.Hash / C) & (MaskWords - 1);
+ uint64_t Val = readUint(Buf + I * Config->Wordsize);
+ Val |= uint64_t(1) << (Sym.Hash % C);
+ Val |= uint64_t(1) << ((Sym.Hash >> Shift2) % C);
+ writeUint(Buf + I * Config->Wordsize, Val);
+ }
+void GnuHashTableSection::writeHashTable(uint8_t *Buf) {
+ uint32_t *Buckets = reinterpret_cast<uint32_t *>(Buf);
+ uint32_t OldBucket = -1;
+ uint32_t *Values = Buckets + NBuckets;
+ for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
+ // Write a hash value. It represents a sequence of chains that share the
+ // same hash modulo value. The last element of each chain is terminated by
+ // LSB 1.
+ uint32_t Hash = I->Hash;
+ bool IsLastInChain = (I + 1) == E || I->BucketIdx != (I + 1)->BucketIdx;
+ Hash = IsLastInChain ? Hash | 1 : Hash & ~1;
+ write32(Values++, Hash);
+ if (I->BucketIdx == OldBucket)
+ continue;
+ // Write a hash bucket. Hash buckets contain indices in the following hash
+ // value table.
+ write32(Buckets + I->BucketIdx, I->Sym->DynsymIndex);
+ OldBucket = I->BucketIdx;
+ }
+static uint32_t hashGnu(StringRef Name) {
+ uint32_t H = 5381;
+ for (uint8_t C : Name)
+ H = (H << 5) + H + C;
+ return H;
+// Add symbols to this symbol hash table. Note that this function
+// destructively sort a given vector -- which is needed because
+// GNU-style hash table places some sorting requirements.
+void GnuHashTableSection::addSymbols(std::vector<SymbolTableEntry> &V) {
+ // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce
+ // its type correctly.
+ std::vector<SymbolTableEntry>::iterator Mid =
+ std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) {
+ return !S.Sym->isDefined();
+ });
+ // We chose load factor 4 for the on-disk hash table. For each hash
+ // collision, the dynamic linker will compare a uint32_t hash value.
+ // Since the integer comparison is quite fast, we believe we can
+ // make the load factor even larger. 4 is just a conservative choice.
+ //
+ // Note that we don't want to create a zero-sized hash table because
+ // Android loader as of 2018 doesn't like a .gnu.hash containing such
+ // table. If that's the case, we create a hash table with one unused
+ // dummy slot.
+ NBuckets = std::max<size_t>((V.end() - Mid) / 4, 1);
+ if (Mid == V.end())
+ return;
+ for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) {
+ Symbol *B = Ent.Sym;
+ uint32_t Hash = hashGnu(B->getName());
+ uint32_t BucketIdx = Hash % NBuckets;
+ Symbols.push_back({B, Ent.StrTabOffset, Hash, BucketIdx});
+ }
+ std::stable_sort(
+ Symbols.begin(), Symbols.end(),
+ [](const Entry &L, const Entry &R) { return L.BucketIdx < R.BucketIdx; });
+ V.erase(Mid, V.end());
+ for (const Entry &Ent : Symbols)
+ V.push_back({Ent.Sym, Ent.StrTabOffset});
+ : SyntheticSection(SHF_ALLOC, SHT_HASH, 4, ".hash") {
+ this->Entsize = 4;
+void HashTableSection::finalizeContents() {
+ if (OutputSection *Sec = In.DynSymTab->getParent())
+ getParent()->Link = Sec->SectionIndex;
+ unsigned NumEntries = 2; // nbucket and nchain.
+ NumEntries += In.DynSymTab->getNumSymbols(); // The chain entries.
+ // Create as many buckets as there are symbols.
+ NumEntries += In.DynSymTab->getNumSymbols();
+ this->Size = NumEntries * 4;
+void HashTableSection::writeTo(uint8_t *Buf) {
+ // See comment in GnuHashTableSection::writeTo.
+ memset(Buf, 0, Size);
+ unsigned NumSymbols = In.DynSymTab->getNumSymbols();
+ uint32_t *P = reinterpret_cast<uint32_t *>(Buf);
+ write32(P++, NumSymbols); // nbucket
+ write32(P++, NumSymbols); // nchain
+ uint32_t *Buckets = P;
+ uint32_t *Chains = P + NumSymbols;
+ for (const SymbolTableEntry &S : In.DynSymTab->getSymbols()) {
+ Symbol *Sym = S.Sym;
+ StringRef Name = Sym->getName();
+ unsigned I = Sym->DynsymIndex;
+ uint32_t Hash = hashSysV(Name) % NumSymbols;
+ Chains[I] = Buckets[Hash];
+ write32(Buckets + Hash, I);
+ }
+// On PowerPC64 the lazy symbol resolvers go into the `global linkage table`
+// in the .glink section, rather then the typical .plt section.
+PltSection::PltSection(bool IsIplt)
+ Config->EMachine == EM_PPC64 ? ".glink" : ".plt"),
+ HeaderSize(!IsIplt || Config->ZRetpolineplt ? Target->PltHeaderSize : 0),
+ IsIplt(IsIplt) {
+ // The PLT needs to be writable on SPARC as the dynamic linker will
+ // modify the instructions in the PLT entries.
+ if (Config->EMachine == EM_SPARCV9)
+ this->Flags |= SHF_WRITE;
+void PltSection::writeTo(uint8_t *Buf) {
+ // At beginning of PLT or retpoline IPLT, we have code to call the dynamic
+ // linker to resolve dynsyms at runtime. Write such code.
+ if (HeaderSize > 0)
+ Target->writePltHeader(Buf);
+ size_t Off = HeaderSize;
+ // The IPlt is immediately after the Plt, account for this in RelOff
+ unsigned PltOff = getPltRelocOff();
+ for (auto &I : Entries) {
+ const Symbol *B = I.first;
+ unsigned RelOff = I.second + PltOff;
+ uint64_t Got = B->getGotPltVA();
+ uint64_t Plt = this->getVA() + Off;
+ Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff);
+ Off += Target->PltEntrySize;
+ }
+template <class ELFT> void PltSection::addEntry(Symbol &Sym) {
+ Sym.PltIndex = Entries.size();
+ RelocationBaseSection *PltRelocSection = In.RelaPlt;
+ if (IsIplt) {
+ PltRelocSection = In.RelaIplt;
+ Sym.IsInIplt = true;
+ }
+ unsigned RelOff =
+ static_cast<RelocationSection<ELFT> *>(PltRelocSection)->getRelocOffset();
+ Entries.push_back(std::make_pair(&Sym, RelOff));
+size_t PltSection::getSize() const {
+ return HeaderSize + Entries.size() * Target->PltEntrySize;
+// Some architectures such as additional symbols in the PLT section. For
+// example ARM uses mapping symbols to aid disassembly
+void PltSection::addSymbols() {
+ // The PLT may have symbols defined for the Header, the IPLT has no header
+ if (!IsIplt)
+ Target->addPltHeaderSymbols(*this);
+ size_t Off = HeaderSize;
+ for (size_t I = 0; I < Entries.size(); ++I) {
+ Target->addPltSymbols(*this, Off);
+ Off += Target->PltEntrySize;
+ }
+unsigned PltSection::getPltRelocOff() const {
+ return IsIplt ? In.Plt->getSize() : 0;
+// The string hash function for .gdb_index.
+static uint32_t computeGdbHash(StringRef S) {
+ uint32_t H = 0;
+ for (uint8_t C : S)
+ H = H * 67 + toLower(C) - 113;
+ return H;
+ : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index") {}
+// Returns the desired size of an on-disk hash table for a .gdb_index section.
+// There's a tradeoff between size and collision rate. We aim 75% utilization.
+size_t GdbIndexSection::computeSymtabSize() const {
+ return std::max<size_t>(NextPowerOf2(Symbols.size() * 4 / 3), 1024);
+// Compute the output section size.
+void GdbIndexSection::initOutputSize() {
+ Size = sizeof(GdbIndexHeader) + computeSymtabSize() * 8;
+ for (GdbChunk &Chunk : Chunks)
+ Size += Chunk.CompilationUnits.size() * 16 + Chunk.AddressAreas.size() * 20;
+ // Add the constant pool size if exists.
+ if (!Symbols.empty()) {
+ GdbSymbol &Sym = Symbols.back();
+ Size += Sym.NameOff + Sym.Name.size() + 1;
+ }
+static std::vector<InputSection *> getDebugInfoSections() {
+ std::vector<InputSection *> Ret;
+ for (InputSectionBase *S : InputSections)
+ if (InputSection *IS = dyn_cast<InputSection>(S))
+ if (IS->Name == ".debug_info")
+ Ret.push_back(IS);
+ return Ret;
+static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &Dwarf) {
+ std::vector<GdbIndexSection::CuEntry> Ret;
+ for (std::unique_ptr<DWARFUnit> &Cu : Dwarf.compile_units())
+ Ret.push_back({Cu->getOffset(), Cu->getLength() + 4});
+ return Ret;
+static std::vector<GdbIndexSection::AddressEntry>
+readAddressAreas(DWARFContext &Dwarf, InputSection *Sec) {
+ std::vector<GdbIndexSection::AddressEntry> Ret;
+ uint32_t CuIdx = 0;
+ for (std::unique_ptr<DWARFUnit> &Cu : Dwarf.compile_units()) {
+ Expected<DWARFAddressRangesVector> Ranges = Cu->collectAddressRanges();
+ if (!Ranges) {
+ error(toString(Sec) + ": " + toString(Ranges.takeError()));
+ return {};
+ }
+ ArrayRef<InputSectionBase *> Sections = Sec->File->getSections();
+ for (DWARFAddressRange &R : *Ranges) {
+ InputSectionBase *S = Sections[R.SectionIndex];
+ if (!S || S == &InputSection::Discarded || !S->Live)
+ continue;
+ // Range list with zero size has no effect.
+ if (R.LowPC == R.HighPC)
+ continue;
+ auto *IS = cast<InputSection>(S);
+ uint64_t Offset = IS->getOffsetInFile();
+ Ret.push_back({IS, R.LowPC - Offset, R.HighPC - Offset, CuIdx});
+ }
+ ++CuIdx;
+ }
+ return Ret;
+template <class ELFT>
+static std::vector<GdbIndexSection::NameAttrEntry>
+readPubNamesAndTypes(const LLDDwarfObj<ELFT> &Obj,
+ const std::vector<GdbIndexSection::CuEntry> &CUs) {
+ const DWARFSection &PubNames = Obj.getGnuPubNamesSection();
+ const DWARFSection &PubTypes = Obj.getGnuPubTypesSection();
+ std::vector<GdbIndexSection::NameAttrEntry> Ret;
+ for (const DWARFSection *Pub : {&PubNames, &PubTypes}) {
+ DWARFDebugPubTable Table(Obj, *Pub, Config->IsLE, true);
+ for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
+ // The value written into the constant pool is Kind << 24 | CuIndex. As we
+ // don't know how many compilation units precede this object to compute
+ // CuIndex, we compute (Kind << 24 | CuIndexInThisObject) instead, and add
+ // the number of preceding compilation units later.
+ uint32_t I =
+ lower_bound(CUs, Set.Offset,
+ [](GdbIndexSection::CuEntry CU, uint32_t Offset) {
+ return CU.CuOffset < Offset;
+ }) -
+ CUs.begin();
+ for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
+ Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
+ (Ent.Descriptor.toBits() << 24) | I});
+ }
+ }
+ return Ret;
+// Create a list of symbols from a given list of symbol names and types
+// by uniquifying them by name.
+static std::vector<GdbIndexSection::GdbSymbol>
+createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> NameAttrs,
+ const std::vector<GdbIndexSection::GdbChunk> &Chunks) {
+ typedef GdbIndexSection::GdbSymbol GdbSymbol;
+ typedef GdbIndexSection::NameAttrEntry NameAttrEntry;
+ // For each chunk, compute the number of compilation units preceding it.
+ uint32_t CuIdx = 0;
+ std::vector<uint32_t> CuIdxs(Chunks.size());
+ for (uint32_t I = 0, E = Chunks.size(); I != E; ++I) {
+ CuIdxs[I] = CuIdx;
+ CuIdx += Chunks[I].CompilationUnits.size();
+ }
+ // The number of symbols we will handle in this function is of the order
+ // of millions for very large executables, so we use multi-threading to
+ // speed it up.
+ size_t NumShards = 32;
+ size_t Concurrency = 1;
+ if (ThreadsEnabled)
+ Concurrency =
+ std::min<size_t>(PowerOf2Floor(hardware_concurrency()), NumShards);
+ // A sharded map to uniquify symbols by name.
+ std::vector<DenseMap<CachedHashStringRef, size_t>> Map(NumShards);
+ size_t Shift = 32 - countTrailingZeros(NumShards);
+ // Instantiate GdbSymbols while uniqufying them by name.
+ std::vector<std::vector<GdbSymbol>> Symbols(NumShards);
+ parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
+ uint32_t I = 0;
+ for (ArrayRef<NameAttrEntry> Entries : NameAttrs) {
+ for (const NameAttrEntry &Ent : Entries) {
+ size_t ShardId = Ent.Name.hash() >> Shift;
+ if ((ShardId & (Concurrency - 1)) != ThreadId)
+ continue;
+ uint32_t V = Ent.CuIndexAndAttrs + CuIdxs[I];
+ size_t &Idx = Map[ShardId][Ent.Name];
+ if (Idx) {
+ Symbols[ShardId][Idx - 1].CuVector.push_back(V);
+ continue;
+ }
+ Idx = Symbols[ShardId].size() + 1;
+ Symbols[ShardId].push_back({Ent.Name, {V}, 0, 0});
+ }
+ ++I;
+ }
+ });
+ size_t NumSymbols = 0;
+ for (ArrayRef<GdbSymbol> V : Symbols)
+ NumSymbols += V.size();
+ // The return type is a flattened vector, so we'll copy each vector
+ // contents to Ret.
+ std::vector<GdbSymbol> Ret;
+ Ret.reserve(NumSymbols);
+ for (std::vector<GdbSymbol> &Vec : Symbols)
+ for (GdbSymbol &Sym : Vec)
+ Ret.push_back(std::move(Sym));
+ // CU vectors and symbol names are adjacent in the output file.
+ // We can compute their offsets in the output file now.
+ size_t Off = 0;
+ for (GdbSymbol &Sym : Ret) {
+ Sym.CuVectorOff = Off;
+ Off += (Sym.CuVector.size() + 1) * 4;
+ }
+ for (GdbSymbol &Sym : Ret) {
+ Sym.NameOff = Off;
+ Off += Sym.Name.size() + 1;
+ }
+ return Ret;
+// Returns a newly-created .gdb_index section.
+template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
+ std::vector<InputSection *> Sections = getDebugInfoSections();
+ // .debug_gnu_pub{names,types} are useless in executables.
+ // They are present in input object files solely for creating
+ // a .gdb_index. So we can remove them from the output.
+ for (InputSectionBase *S : InputSections)
+ if (S->Name == ".debug_gnu_pubnames" || S->Name == ".debug_gnu_pubtypes")
+ S->Live = false;
+ std::vector<GdbChunk> Chunks(Sections.size());
+ std::vector<std::vector<NameAttrEntry>> NameAttrs(Sections.size());
+ parallelForEachN(0, Sections.size(), [&](size_t I) {
+ ObjFile<ELFT> *File = Sections[I]->getFile<ELFT>();
+ DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(File));
+ Chunks[I].Sec = Sections[I];
+ Chunks[I].CompilationUnits = readCuList(Dwarf);
+ Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
+ NameAttrs[I] = readPubNamesAndTypes<ELFT>(
+ static_cast<const LLDDwarfObj<ELFT> &>(Dwarf.getDWARFObj()),
+ Chunks[I].CompilationUnits);
+ });
+ auto *Ret = make<GdbIndexSection>();
+ Ret->Chunks = std::move(Chunks);
+ Ret->Symbols = createSymbols(NameAttrs, Ret->Chunks);
+ Ret->initOutputSize();
+ return Ret;
+void GdbIndexSection::writeTo(uint8_t *Buf) {
+ // Write the header.
+ auto *Hdr = reinterpret_cast<GdbIndexHeader *>(Buf);
+ uint8_t *Start = Buf;
+ Hdr->Version = 7;
+ Buf += sizeof(*Hdr);
+ // Write the CU list.
+ Hdr->CuListOff = Buf - Start;
+ for (GdbChunk &Chunk : Chunks) {
+ for (CuEntry &Cu : Chunk.CompilationUnits) {
+ write64le(Buf, Chunk.Sec->OutSecOff + Cu.CuOffset);
+ write64le(Buf + 8, Cu.CuLength);
+ Buf += 16;
+ }
+ }
+ // Write the address area.
+ Hdr->CuTypesOff = Buf - Start;
+ Hdr->AddressAreaOff = Buf - Start;
+ uint32_t CuOff = 0;
+ for (GdbChunk &Chunk : Chunks) {
+ for (AddressEntry &E : Chunk.AddressAreas) {
+ uint64_t BaseAddr = E.Section->getVA(0);
+ write64le(Buf, BaseAddr + E.LowAddress);
+ write64le(Buf + 8, BaseAddr + E.HighAddress);
+ write32le(Buf + 16, E.CuIndex + CuOff);
+ Buf += 20;
+ }
+ CuOff += Chunk.CompilationUnits.size();
+ }
+ // Write the on-disk open-addressing hash table containing symbols.
+ Hdr->SymtabOff = Buf - Start;
+ size_t SymtabSize = computeSymtabSize();
+ uint32_t Mask = SymtabSize - 1;
+ for (GdbSymbol &Sym : Symbols) {
+ uint32_t H = Sym.Name.hash();
+ uint32_t I = H & Mask;
+ uint32_t Step = ((H * 17) & Mask) | 1;
+ while (read32le(Buf + I * 8))
+ I = (I + Step) & Mask;
+ write32le(Buf + I * 8, Sym.NameOff);
+ write32le(Buf + I * 8 + 4, Sym.CuVectorOff);
+ }
+ Buf += SymtabSize * 8;
+ // Write the string pool.
+ Hdr->ConstantPoolOff = Buf - Start;
+ parallelForEach(Symbols, [&](GdbSymbol &Sym) {
+ memcpy(Buf + Sym.NameOff, Sym.Name.data(), Sym.Name.size());
+ });
+ // Write the CU vectors.
+ for (GdbSymbol &Sym : Symbols) {
+ write32le(Buf, Sym.CuVector.size());
+ Buf += 4;
+ for (uint32_t Val : Sym.CuVector) {
+ write32le(Buf, Val);
+ Buf += 4;
+ }
+ }
+bool GdbIndexSection::empty() const { return Chunks.empty(); }
+ : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 4, ".eh_frame_hdr") {}
+// .eh_frame_hdr contains a binary search table of pointers to FDEs.
+// Each entry of the search table consists of two values,
+// the starting PC from where FDEs covers, and the FDE's address.
+// It is sorted by PC.
+void EhFrameHeader::writeTo(uint8_t *Buf) {
+ typedef EhFrameSection::FdeData FdeData;
+ std::vector<FdeData> Fdes = In.EhFrame->getFdeData();
+ Buf[0] = 1;
+ Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ Buf[2] = DW_EH_PE_udata4;
+ Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
+ write32(Buf + 4, In.EhFrame->getParent()->Addr - this->getVA() - 4);
+ write32(Buf + 8, Fdes.size());
+ Buf += 12;
+ for (FdeData &Fde : Fdes) {
+ write32(Buf, Fde.PcRel);
+ write32(Buf + 4, Fde.FdeVARel);
+ Buf += 8;
+ }
+size_t EhFrameHeader::getSize() const {
+ // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs.
+ return 12 + In.EhFrame->NumFdes * 8;
+bool EhFrameHeader::empty() const { return In.EhFrame->empty(); }
+ : SyntheticSection(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t),
+ ".gnu.version_d") {}
+static StringRef getFileDefName() {
+ if (!Config->SoName.empty())
+ return Config->SoName;
+ return Config->OutputFile;
+void VersionDefinitionSection::finalizeContents() {
+ FileDefNameOff = In.DynStrTab->addString(getFileDefName());
+ for (VersionDefinition &V : Config->VersionDefinitions)
+ V.NameOff = In.DynStrTab->addString(V.Name);
+ if (OutputSection *Sec = In.DynStrTab->getParent())
+ getParent()->Link = Sec->SectionIndex;
+ // sh_info should be set to the number of definitions. This fact is missed in
+ // documentation, but confirmed by binutils community:
+ // https://sourceware.org/ml/binutils/2014-11/msg00355.html
+ getParent()->Info = getVerDefNum();
+void VersionDefinitionSection::writeOne(uint8_t *Buf, uint32_t Index,
+ StringRef Name, size_t NameOff) {
+ uint16_t Flags = Index == 1 ? VER_FLG_BASE : 0;
+ // Write a verdef.
+ write16(Buf, 1); // vd_version
+ write16(Buf + 2, Flags); // vd_flags
+ write16(Buf + 4, Index); // vd_ndx
+ write16(Buf + 6, 1); // vd_cnt
+ write32(Buf + 8, hashSysV(Name)); // vd_hash
+ write32(Buf + 12, 20); // vd_aux
+ write32(Buf + 16, 28); // vd_next
+ // Write a veraux.
+ write32(Buf + 20, NameOff); // vda_name
+ write32(Buf + 24, 0); // vda_next
+void VersionDefinitionSection::writeTo(uint8_t *Buf) {
+ writeOne(Buf, 1, getFileDefName(), FileDefNameOff);
+ for (VersionDefinition &V : Config->VersionDefinitions) {
+ Buf += EntrySize;
+ writeOne(Buf, V.Id, V.Name, V.NameOff);
+ }
+ // Need to terminate the last version definition.
+ write32(Buf + 16, 0); // vd_next
+size_t VersionDefinitionSection::getSize() const {
+ return EntrySize * getVerDefNum();
+// .gnu.version is a table where each entry is 2 byte long.
+template <class ELFT>
+ : SyntheticSection(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t),
+ ".gnu.version") {
+ this->Entsize = 2;
+template <class ELFT> void VersionTableSection<ELFT>::finalizeContents() {
+ // At the moment of june 2016 GNU docs does not mention that sh_link field
+ // should be set, but Sun docs do. Also readelf relies on this field.
+ getParent()->Link = In.DynSymTab->getParent()->SectionIndex;
+template <class ELFT> size_t VersionTableSection<ELFT>::getSize() const {
+ return (In.DynSymTab->getSymbols().size() + 1) * 2;
+template <class ELFT> void VersionTableSection<ELFT>::writeTo(uint8_t *Buf) {
+ Buf += 2;
+ for (const SymbolTableEntry &S : In.DynSymTab->getSymbols()) {
+ write16(Buf, S.Sym->VersionId);
+ Buf += 2;
+ }
+template <class ELFT> bool VersionTableSection<ELFT>::empty() const {
+ return !In.VerDef && InX<ELFT>::VerNeed->empty();
+template <class ELFT>
+ : SyntheticSection(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t),
+ ".gnu.version_r") {
+ // Identifiers in verneed section start at 2 because 0 and 1 are reserved
+ // First identifiers are reserved by verdef section if it exist.
+ NextIndex = getVerDefNum() + 1;
+template <class ELFT> void VersionNeedSection<ELFT>::addSymbol(Symbol *SS) {
+ auto &File = cast<SharedFile<ELFT>>(*SS->File);
+ if (SS->VerdefIndex == VER_NDX_GLOBAL) {
+ SS->VersionId = VER_NDX_GLOBAL;
+ return;
+ }
+ // If we don't already know that we need an Elf_Verneed for this DSO, prepare
+ // to create one by adding it to our needed list and creating a dynstr entry
+ // for the soname.
+ if (File.VerdefMap.empty())
+ Needed.push_back({&File, In.DynStrTab->addString(File.SoName)});
+ const typename ELFT::Verdef *Ver = File.Verdefs[SS->VerdefIndex];
+ typename SharedFile<ELFT>::NeededVer &NV = File.VerdefMap[Ver];
+ // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef,
+ // prepare to create one by allocating a version identifier and creating a
+ // dynstr entry for the version name.
+ if (NV.Index == 0) {
+ NV.StrTab = In.DynStrTab->addString(File.getStringTable().data() +
+ Ver->getAux()->vda_name);
+ NV.Index = NextIndex++;
+ }
+ SS->VersionId = NV.Index;
+template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) {
+ // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs.
+ auto *Verneed = reinterpret_cast<Elf_Verneed *>(Buf);
+ auto *Vernaux = reinterpret_cast<Elf_Vernaux *>(Verneed + Needed.size());
+ for (std::pair<SharedFile<ELFT> *, size_t> &P : Needed) {
+ // Create an Elf_Verneed for this DSO.
+ Verneed->vn_version = 1;
+ Verneed->vn_cnt = P.first->VerdefMap.size();
+ Verneed->vn_file = P.second;
+ Verneed->vn_aux =
+ reinterpret_cast<char *>(Vernaux) - reinterpret_cast<char *>(Verneed);
+ Verneed->vn_next = sizeof(Elf_Verneed);
+ ++Verneed;
+ // Create the Elf_Vernauxs for this Elf_Verneed. The loop iterates over
+ // VerdefMap, which will only contain references to needed version
+ // definitions. Each Elf_Vernaux is based on the information contained in
+ // the Elf_Verdef in the source DSO. This loop iterates over a std::map of
+ // pointers, but is deterministic because the pointers refer to Elf_Verdef
+ // data structures within a single input file.
+ for (auto &NV : P.first->VerdefMap) {
+ Vernaux->vna_hash = NV.first->vd_hash;
+ Vernaux->vna_flags = 0;
+ Vernaux->vna_other = NV.second.Index;
+ Vernaux->vna_name = NV.second.StrTab;
+ Vernaux->vna_next = sizeof(Elf_Vernaux);
+ ++Vernaux;
+ }
+ Vernaux[-1].vna_next = 0;
+ }
+ Verneed[-1].vn_next = 0;
+template <class ELFT> void VersionNeedSection<ELFT>::finalizeContents() {
+ if (OutputSection *Sec = In.DynStrTab->getParent())
+ getParent()->Link = Sec->SectionIndex;
+ getParent()->Info = Needed.size();
+template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const {
+ unsigned Size = Needed.size() * sizeof(Elf_Verneed);
+ for (const std::pair<SharedFile<ELFT> *, size_t> &P : Needed)
+ Size += P.first->VerdefMap.size() * sizeof(Elf_Vernaux);
+ return Size;
+template <class ELFT> bool VersionNeedSection<ELFT>::empty() const {
+ return getNeedNum() == 0;
+void MergeSyntheticSection::addSection(MergeInputSection *MS) {
+ MS->Parent = this;
+ Sections.push_back(MS);
+MergeTailSection::MergeTailSection(StringRef Name, uint32_t Type,
+ uint64_t Flags, uint32_t Alignment)
+ : MergeSyntheticSection(Name, Type, Flags, Alignment),
+ Builder(StringTableBuilder::RAW, Alignment) {}
+size_t MergeTailSection::getSize() const { return Builder.getSize(); }
+void MergeTailSection::writeTo(uint8_t *Buf) { Builder.write(Buf); }
+void MergeTailSection::finalizeContents() {
+ // Add all string pieces to the string table builder to create section
+ // contents.
+ for (MergeInputSection *Sec : Sections)
+ for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
+ if (Sec->Pieces[I].Live)
+ Builder.add(Sec->getData(I));
+ // Fix the string table content. After this, the contents will never change.
+ Builder.finalize();
+ // finalize() fixed tail-optimized strings, so we can now get
+ // offsets of strings. Get an offset for each string and save it
+ // to a corresponding StringPiece for easy access.
+ for (MergeInputSection *Sec : Sections)
+ for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
+ if (Sec->Pieces[I].Live)
+ Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I));
+void MergeNoTailSection::writeTo(uint8_t *Buf) {
+ for (size_t I = 0; I < NumShards; ++I)
+ Shards[I].write(Buf + ShardOffsets[I]);
+// This function is very hot (i.e. it can take several seconds to finish)
+// because sometimes the number of inputs is in an order of magnitude of
+// millions. So, we use multi-threading.
+// For any strings S and T, we know S is not mergeable with T if S's hash
+// value is different from T's. If that's the case, we can safely put S and
+// T into different string builders without worrying about merge misses.
+// We do it in parallel.
+void MergeNoTailSection::finalizeContents() {
+ // Initializes string table builders.
+ for (size_t I = 0; I < NumShards; ++I)
+ Shards.emplace_back(StringTableBuilder::RAW, Alignment);
+ // Concurrency level. Must be a power of 2 to avoid expensive modulo
+ // operations in the following tight loop.
+ size_t Concurrency = 1;
+ if (ThreadsEnabled)
+ Concurrency =
+ std::min<size_t>(PowerOf2Floor(hardware_concurrency()), NumShards);
+ // Add section pieces to the builders.
+ parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
+ for (MergeInputSection *Sec : Sections) {
+ for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
+ size_t ShardId = getShardId(Sec->Pieces[I].Hash);
+ if ((ShardId & (Concurrency - 1)) == ThreadId && Sec->Pieces[I].Live)
+ Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I));
+ }
+ }
+ });
+ // Compute an in-section offset for each shard.
+ size_t Off = 0;
+ for (size_t I = 0; I < NumShards; ++I) {
+ Shards[I].finalizeInOrder();
+ if (Shards[I].getSize() > 0)
+ Off = alignTo(Off, Alignment);
+ ShardOffsets[I] = Off;
+ Off += Shards[I].getSize();
+ }
+ Size = Off;
+ // So far, section pieces have offsets from beginning of shards, but
+ // we want offsets from beginning of the whole section. Fix them.
+ parallelForEach(Sections, [&](MergeInputSection *Sec) {
+ for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
+ if (Sec->Pieces[I].Live)
+ Sec->Pieces[I].OutputOff +=
+ ShardOffsets[getShardId(Sec->Pieces[I].Hash)];
+ });
+static MergeSyntheticSection *createMergeSynthetic(StringRef Name,
+ uint32_t Type,
+ uint64_t Flags,
+ uint32_t Alignment) {
+ bool ShouldTailMerge = (Flags & SHF_STRINGS) && Config->Optimize >= 2;
+ if (ShouldTailMerge)
+ return make<MergeTailSection>(Name, Type, Flags, Alignment);
+ return make<MergeNoTailSection>(Name, Type, Flags, Alignment);
+template <class ELFT> void elf::splitSections() {
+ // splitIntoPieces needs to be called on each MergeInputSection
+ // before calling finalizeContents().
+ parallelForEach(InputSections, [](InputSectionBase *Sec) {
+ if (auto *S = dyn_cast<MergeInputSection>(Sec))
+ S->splitIntoPieces();
+ else if (auto *Eh = dyn_cast<EhInputSection>(Sec))
+ Eh->split<ELFT>();
+ });
+// This function scans over the inputsections to create mergeable
+// synthetic sections.
+// It removes MergeInputSections from the input section array and adds
+// new synthetic sections at the location of the first input section
+// that it replaces. It then finalizes each synthetic section in order
+// to compute an output offset for each piece of each input section.
+void elf::mergeSections() {
+ std::vector<MergeSyntheticSection *> MergeSections;
+ for (InputSectionBase *&S : InputSections) {
+ MergeInputSection *MS = dyn_cast<MergeInputSection>(S);
+ if (!MS)
+ continue;
+ // We do not want to handle sections that are not alive, so just remove
+ // them instead of trying to merge.
+ if (!MS->Live) {
+ S = nullptr;
+ continue;
+ }
+ StringRef OutsecName = getOutputSectionName(MS);
+ uint32_t Alignment = std::max<uint32_t>(MS->Alignment, MS->Entsize);
+ auto I = llvm::find_if(MergeSections, [=](MergeSyntheticSection *Sec) {
+ // While we could create a single synthetic section for two different
+ // values of Entsize, it is better to take Entsize into consideration.
+ //
+ // With a single synthetic section no two pieces with different Entsize
+ // could be equal, so we may as well have two sections.
+ //
+ // Using Entsize in here also allows us to propagate it to the synthetic
+ // section.
+ return Sec->Name == OutsecName && Sec->Flags == MS->Flags &&
+ Sec->Entsize == MS->Entsize && Sec->Alignment == Alignment;
+ });
+ if (I == MergeSections.end()) {
+ MergeSyntheticSection *Syn =
+ createMergeSynthetic(OutsecName, MS->Type, MS->Flags, Alignment);
+ MergeSections.push_back(Syn);
+ I = std::prev(MergeSections.end());
+ S = Syn;
+ Syn->Entsize = MS->Entsize;
+ } else {
+ S = nullptr;
+ }
+ (*I)->addSection(MS);
+ }
+ for (auto *MS : MergeSections)
+ MS->finalizeContents();
+ std::vector<InputSectionBase *> &V = InputSections;
+ V.erase(std::remove(V.begin(), V.end(), nullptr), V.end());
+ : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Config->Wordsize,
+ ".rld_map") {}
+ Config->Wordsize, ".ARM.exidx") {}
+// Write a terminating sentinel entry to the end of the .ARM.exidx table.
+// This section will have been sorted last in the .ARM.exidx table.
+// This table entry will have the form:
+// | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND |
+// The sentinel must have the PREL31 value of an address higher than any
+// address described by any other table entry.
+void ARMExidxSentinelSection::writeTo(uint8_t *Buf) {
+ assert(Highest);
+ uint64_t S = Highest->getVA(Highest->getSize());
+ uint64_t P = getVA();
+ Target->relocateOne(Buf, R_ARM_PREL31, S - P);
+ write32le(Buf + 4, 1);
+// The sentinel has to be removed if there are no other .ARM.exidx entries.
+bool ARMExidxSentinelSection::empty() const {
+ for (InputSection *IS : getInputSections(getParent()))
+ if (!isa<ARMExidxSentinelSection>(IS))
+ return false;
+ return true;
+bool ARMExidxSentinelSection::classof(const SectionBase *D) {
+ return D->kind() == InputSectionBase::Synthetic && D->Type == SHT_ARM_EXIDX;
+ThunkSection::ThunkSection(OutputSection *OS, uint64_t Off)
+ Config->Wordsize, ".text.thunk") {
+ this->Parent = OS;
+ this->OutSecOff = Off;
+void ThunkSection::addThunk(Thunk *T) {
+ Thunks.push_back(T);
+ T->addSymbols(*this);
+void ThunkSection::writeTo(uint8_t *Buf) {
+ for (Thunk *T : Thunks)
+ T->writeTo(Buf + T->Offset);
+InputSection *ThunkSection::getTargetInputSection() const {
+ if (Thunks.empty())
+ return nullptr;
+ const Thunk *T = Thunks.front();
+ return T->getTargetInputSection();
+bool ThunkSection::assignOffsets() {
+ uint64_t Off = 0;
+ for (Thunk *T : Thunks) {
+ Off = alignTo(Off, T->Alignment);
+ T->setOffset(Off);
+ uint32_t Size = T->size();
+ T->getThunkTargetSym()->Size = Size;
+ Off += Size;
+ }
+ bool Changed = Off != Size;
+ Size = Off;
+ return Changed;
+// If linking position-dependent code then the table will store the addresses
+// directly in the binary so the section has type SHT_PROGBITS. If linking
+// position-independent code the section has type SHT_NOBITS since it will be
+// allocated and filled in by the dynamic linker.
+ : SyntheticSection(SHF_ALLOC | SHF_WRITE,
+ Config->Pic ? SHT_NOBITS : SHT_PROGBITS, 8,
+ ".branch_lt") {}
+void PPC64LongBranchTargetSection::addEntry(Symbol &Sym) {
+ assert(Sym.PPC64BranchltIndex == 0xffff);
+ Sym.PPC64BranchltIndex = Entries.size();
+ Entries.push_back(&Sym);
+size_t PPC64LongBranchTargetSection::getSize() const {
+ return Entries.size() * 8;
+void PPC64LongBranchTargetSection::writeTo(uint8_t *Buf) {
+ assert(Target->GotPltEntrySize == 8);
+ // If linking non-pic we have the final addresses of the targets and they get
+ // written to the table directly. For pic the dynamic linker will allocate
+ // the section and fill it it.
+ if (Config->Pic)
+ return;
+ for (const Symbol *Sym : Entries) {
+ assert(Sym->getVA());
+ // Need calls to branch to the local entry-point since a long-branch
+ // must be a local-call.
+ write64(Buf,
+ Sym->getVA() + getPPC64GlobalEntryToLocalEntryOffset(Sym->StOther));
+ Buf += Target->GotPltEntrySize;
+ }
+bool PPC64LongBranchTargetSection::empty() const {
+ // `removeUnusedSyntheticSections()` is called before thunk allocation which
+ // is too early to determine if this section will be empty or not. We need
+ // Finalized to keep the section alive until after thunk creation. Finalized
+ // only gets set to true once `finalizeSections()` is called after thunk
+ // creation. Becuase of this, if we don't create any long-branch thunks we end
+ // up with an empty .branch_lt section in the binary.
+ return Finalized && Entries.empty();
+InStruct elf::In;
+template GdbIndexSection *GdbIndexSection::create<ELF32LE>();
+template GdbIndexSection *GdbIndexSection::create<ELF32BE>();
+template GdbIndexSection *GdbIndexSection::create<ELF64LE>();
+template GdbIndexSection *GdbIndexSection::create<ELF64BE>();
+template void elf::splitSections<ELF32LE>();
+template void elf::splitSections<ELF32BE>();
+template void elf::splitSections<ELF64LE>();
+template void elf::splitSections<ELF64BE>();
+template void EhFrameSection::addSection<ELF32LE>(InputSectionBase *);
+template void EhFrameSection::addSection<ELF32BE>(InputSectionBase *);
+template void EhFrameSection::addSection<ELF64LE>(InputSectionBase *);
+template void EhFrameSection::addSection<ELF64BE>(InputSectionBase *);
+template void PltSection::addEntry<ELF32LE>(Symbol &Sym);
+template void PltSection::addEntry<ELF32BE>(Symbol &Sym);
+template void PltSection::addEntry<ELF64LE>(Symbol &Sym);
+template void PltSection::addEntry<ELF64BE>(Symbol &Sym);
+template void MipsGotSection::build<ELF32LE>();
+template void MipsGotSection::build<ELF32BE>();
+template void MipsGotSection::build<ELF64LE>();
+template void MipsGotSection::build<ELF64BE>();
+template class elf::MipsAbiFlagsSection<ELF32LE>;
+template class elf::MipsAbiFlagsSection<ELF32BE>;
+template class elf::MipsAbiFlagsSection<ELF64LE>;
+template class elf::MipsAbiFlagsSection<ELF64BE>;
+template class elf::MipsOptionsSection<ELF32LE>;
+template class elf::MipsOptionsSection<ELF32BE>;
+template class elf::MipsOptionsSection<ELF64LE>;
+template class elf::MipsOptionsSection<ELF64BE>;
+template class elf::MipsReginfoSection<ELF32LE>;
+template class elf::MipsReginfoSection<ELF32BE>;
+template class elf::MipsReginfoSection<ELF64LE>;
+template class elf::MipsReginfoSection<ELF64BE>;
+template class elf::DynamicSection<ELF32LE>;
+template class elf::DynamicSection<ELF32BE>;
+template class elf::DynamicSection<ELF64LE>;
+template class elf::DynamicSection<ELF64BE>;
+template class elf::RelocationSection<ELF32LE>;
+template class elf::RelocationSection<ELF32BE>;
+template class elf::RelocationSection<ELF64LE>;
+template class elf::RelocationSection<ELF64BE>;
+template class elf::AndroidPackedRelocationSection<ELF32LE>;
+template class elf::AndroidPackedRelocationSection<ELF32BE>;
+template class elf::AndroidPackedRelocationSection<ELF64LE>;
+template class elf::AndroidPackedRelocationSection<ELF64BE>;
+template class elf::RelrSection<ELF32LE>;
+template class elf::RelrSection<ELF32BE>;
+template class elf::RelrSection<ELF64LE>;
+template class elf::RelrSection<ELF64BE>;
+template class elf::SymbolTableSection<ELF32LE>;
+template class elf::SymbolTableSection<ELF32BE>;
+template class elf::SymbolTableSection<ELF64LE>;
+template class elf::SymbolTableSection<ELF64BE>;
+template class elf::VersionTableSection<ELF32LE>;
+template class elf::VersionTableSection<ELF32BE>;
+template class elf::VersionTableSection<ELF64LE>;
+template class elf::VersionTableSection<ELF64BE>;
+template class elf::VersionNeedSection<ELF32LE>;
+template class elf::VersionNeedSection<ELF32BE>;
+template class elf::VersionNeedSection<ELF64LE>;
+template class elf::VersionNeedSection<ELF64BE>;
diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h
new file mode 100644
index 000000000000..6fc40d355d5e
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h
@@ -0,0 +1,1043 @@
+//===- SyntheticSection.h ---------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Synthetic sections represent chunks of linker-created data. If you
+// need to create a chunk of data that to be included in some section
+// in the result, you probably want to create that as a synthetic section.
+// Synthetic sections are designed as input sections as opposed to
+// output sections because we want to allow them to be manipulated
+// using linker scripts just like other input sections from regular
+// files.
+#include "DWARF.h"
+#include "EhFrame.h"
+#include "InputSection.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Endian.h"
+#include <functional>
+namespace lld {
+namespace elf {
+class Defined;
+class SharedSymbol;
+class SyntheticSection : public InputSection {
+ SyntheticSection(uint64_t Flags, uint32_t Type, uint32_t Alignment,
+ StringRef Name)
+ : InputSection(nullptr, Flags, Type, Alignment, {}, Name,
+ InputSectionBase::Synthetic) {
+ this->Live = true;
+ }
+ virtual ~SyntheticSection() = default;
+ virtual void writeTo(uint8_t *Buf) = 0;
+ virtual size_t getSize() const = 0;
+ virtual void finalizeContents() {}
+ // If the section has the SHF_ALLOC flag and the size may be changed if
+ // thunks are added, update the section size.
+ virtual bool updateAllocSize() { return false; }
+ virtual bool empty() const { return false; }
+ static bool classof(const SectionBase *D) {
+ return D->kind() == InputSectionBase::Synthetic;
+ }
+struct CieRecord {
+ EhSectionPiece *Cie = nullptr;
+ std::vector<EhSectionPiece *> Fdes;
+// Section for .eh_frame.
+class EhFrameSection final : public SyntheticSection {
+ EhFrameSection();
+ void writeTo(uint8_t *Buf) override;
+ void finalizeContents() override;
+ bool empty() const override { return Sections.empty(); }
+ size_t getSize() const override { return Size; }
+ template <class ELFT> void addSection(InputSectionBase *S);
+ std::vector<EhInputSection *> Sections;
+ size_t NumFdes = 0;
+ struct FdeData {
+ uint32_t PcRel;
+ uint32_t FdeVARel;
+ };
+ std::vector<FdeData> getFdeData() const;
+ ArrayRef<CieRecord *> getCieRecords() const { return CieRecords; }
+ // This is used only when parsing EhInputSection. We keep it here to avoid
+ // allocating one for each EhInputSection.
+ llvm::DenseMap<size_t, CieRecord *> OffsetToCie;
+ uint64_t Size = 0;
+ template <class ELFT, class RelTy>
+ void addSectionAux(EhInputSection *S, llvm::ArrayRef<RelTy> Rels);
+ template <class ELFT, class RelTy>
+ CieRecord *addCie(EhSectionPiece &Piece, ArrayRef<RelTy> Rels);
+ template <class ELFT, class RelTy>
+ bool isFdeLive(EhSectionPiece &Piece, ArrayRef<RelTy> Rels);
+ uint64_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc) const;
+ std::vector<CieRecord *> CieRecords;
+ // CIE records are uniquified by their contents and personality functions.
+ llvm::DenseMap<std::pair<ArrayRef<uint8_t>, Symbol *>, CieRecord *> CieMap;
+class GotSection : public SyntheticSection {
+ GotSection();
+ size_t getSize() const override { return Size; }
+ void finalizeContents() override;
+ bool empty() const override;
+ void writeTo(uint8_t *Buf) override;
+ void addEntry(Symbol &Sym);
+ bool addDynTlsEntry(Symbol &Sym);
+ bool addTlsIndex();
+ uint64_t getGlobalDynAddr(const Symbol &B) const;
+ uint64_t getGlobalDynOffset(const Symbol &B) const;
+ uint64_t getTlsIndexVA() { return this->getVA() + TlsIndexOff; }
+ uint32_t getTlsIndexOff() const { return TlsIndexOff; }
+ // Flag to force GOT to be in output if we have relocations
+ // that relies on its address.
+ bool HasGotOffRel = false;
+ size_t NumEntries = 0;
+ uint32_t TlsIndexOff = -1;
+ uint64_t Size = 0;
+// .note.GNU-stack section.
+class GnuStackSection : public SyntheticSection {
+ GnuStackSection()
+ : SyntheticSection(0, llvm::ELF::SHT_PROGBITS, 1, ".note.GNU-stack") {}
+ void writeTo(uint8_t *Buf) override {}
+ size_t getSize() const override { return 0; }
+// .note.gnu.build-id section.
+class BuildIdSection : public SyntheticSection {
+ // First 16 bytes are a header.
+ static const unsigned HeaderSize = 16;
+ BuildIdSection();
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return HeaderSize + HashSize; }
+ void writeBuildId(llvm::ArrayRef<uint8_t> Buf);
+ void computeHash(llvm::ArrayRef<uint8_t> Buf,
+ std::function<void(uint8_t *, ArrayRef<uint8_t>)> Hash);
+ size_t HashSize;
+ uint8_t *HashBuf;
+// BssSection is used to reserve space for copy relocations and common symbols.
+// We create three instances of this class for .bss, .bss.rel.ro and "COMMON",
+// that are used for writable symbols, read-only symbols and common symbols,
+// respectively.
+class BssSection final : public SyntheticSection {
+ BssSection(StringRef Name, uint64_t Size, uint32_t Alignment);
+ void writeTo(uint8_t *) override {
+ llvm_unreachable("unexpected writeTo() call for SHT_NOBITS section");
+ }
+ bool empty() const override { return getSize() == 0; }
+ size_t getSize() const override { return Size; }
+ static bool classof(const SectionBase *S) { return S->Bss; }
+ uint64_t Size;
+class MipsGotSection final : public SyntheticSection {
+ MipsGotSection();
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return Size; }
+ bool updateAllocSize() override;
+ void finalizeContents() override;
+ bool empty() const override;
+ // Join separate GOTs built for each input file to generate
+ // primary and optional multiple secondary GOTs.
+ template <class ELFT> void build();
+ void addEntry(InputFile &File, Symbol &Sym, int64_t Addend, RelExpr Expr);
+ void addDynTlsEntry(InputFile &File, Symbol &Sym);
+ void addTlsIndex(InputFile &File);
+ uint64_t getPageEntryOffset(const InputFile *F, const Symbol &S,
+ int64_t Addend) const;
+ uint64_t getSymEntryOffset(const InputFile *F, const Symbol &S,
+ int64_t Addend) const;
+ uint64_t getGlobalDynOffset(const InputFile *F, const Symbol &S) const;
+ uint64_t getTlsIndexOffset(const InputFile *F) const;
+ // Returns the symbol which corresponds to the first entry of the global part
+ // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic
+ // table properties.
+ // Returns nullptr if the global part is empty.
+ const Symbol *getFirstGlobalEntry() const;
+ // Returns the number of entries in the local part of GOT including
+ // the number of reserved entries.
+ unsigned getLocalEntriesNum() const;
+ // Return _gp value for primary GOT (nullptr) or particular input file.
+ uint64_t getGp(const InputFile *F = nullptr) const;
+ // MIPS GOT consists of three parts: local, global and tls. Each part
+ // contains different types of entries. Here is a layout of GOT:
+ // - Header entries |
+ // - Page entries | Local part
+ // - Local entries (16-bit access) |
+ // - Local entries (32-bit access) |
+ // - Normal global entries || Global part
+ // - Reloc-only global entries ||
+ // - TLS entries ||| TLS part
+ //
+ // Header:
+ // Two entries hold predefined value 0x0 and 0x80000000.
+ // Page entries:
+ // These entries created by R_MIPS_GOT_PAGE relocation and R_MIPS_GOT16
+ // relocation against local symbols. They are initialized by higher 16-bit
+ // of the corresponding symbol's value. So each 64kb of address space
+ // requires a single GOT entry.
+ // Local entries (16-bit access):
+ // These entries created by GOT relocations against global non-preemptible
+ // symbols so dynamic linker is not necessary to resolve the symbol's
+ // values. "16-bit access" means that corresponding relocations address
+ // GOT using 16-bit index. Each unique Symbol-Addend pair has its own
+ // GOT entry.
+ // Local entries (32-bit access):
+ // These entries are the same as above but created by relocations which
+ // address GOT using 32-bit index (R_MIPS_GOT_HI16/LO16 etc).
+ // Normal global entries:
+ // These entries created by GOT relocations against preemptible global
+ // symbols. They need to be initialized by dynamic linker and they ordered
+ // exactly as the corresponding entries in the dynamic symbols table.
+ // Reloc-only global entries:
+ // These entries created for symbols that are referenced by dynamic
+ // relocations R_MIPS_REL32. These entries are not accessed with gp-relative
+ // addressing, but MIPS ABI requires that these entries be present in GOT.
+ // TLS entries:
+ // Entries created by TLS relocations.
+ //
+ // If the sum of local, global and tls entries is less than 64K only single
+ // got is enough. Otherwise, multi-got is created. Series of primary and
+ // multiple secondary GOTs have the following layout:
+ // - Primary GOT
+ // Header
+ // Local entries
+ // Global entries
+ // Relocation only entries
+ // TLS entries
+ //
+ // - Secondary GOT
+ // Local entries
+ // Global entries
+ // TLS entries
+ // ...
+ //
+ // All GOT entries required by relocations from a single input file entirely
+ // belong to either primary or one of secondary GOTs. To reference GOT entries
+ // each GOT has its own _gp value points to the "middle" of the GOT.
+ // In the code this value loaded to the register which is used for GOT access.
+ //
+ // MIPS 32 function's prologue:
+ // lui v0,0x0
+ // 0: R_MIPS_HI16 _gp_disp
+ // addiu v0,v0,0
+ // 4: R_MIPS_LO16 _gp_disp
+ //
+ // MIPS 64:
+ // lui at,0x0
+ // 14: R_MIPS_GPREL16 main
+ //
+ // Dynamic linker does not know anything about secondary GOTs and cannot
+ // use a regular MIPS mechanism for GOT entries initialization. So we have
+ // to use an approach accepted by other architectures and create dynamic
+ // relocations R_MIPS_REL32 to initialize global entries (and local in case
+ // of PIC code) in secondary GOTs. But ironically MIPS dynamic linker
+ // requires GOT entries and correspondingly ordered dynamic symbol table
+ // entries to deal with dynamic relocations. To handle this problem
+ // relocation-only section in the primary GOT contains entries for all
+ // symbols referenced in global parts of secondary GOTs. Although the sum
+ // of local and normal global entries of the primary got should be less
+ // than 64K, the size of the primary got (including relocation-only entries
+ // can be greater than 64K, because parts of the primary got that overflow
+ // the 64K limit are used only by the dynamic linker at dynamic link-time
+ // and not by 16-bit gp-relative addressing at run-time.
+ //
+ // For complete multi-GOT description see the following link
+ // https://dmz-portal.mips.com/wiki/MIPS_Multi_GOT
+ // Number of "Header" entries.
+ static const unsigned HeaderEntriesNum = 2;
+ uint64_t Size = 0;
+ // Symbol and addend.
+ typedef std::pair<Symbol *, int64_t> GotEntry;
+ struct FileGot {
+ InputFile *File = nullptr;
+ size_t StartIndex = 0;
+ struct PageBlock {
+ size_t FirstIndex = 0;
+ size_t Count = 0;
+ };
+ // Map output sections referenced by MIPS GOT relocations
+ // to the description (index/count) "page" entries allocated
+ // for this section.
+ llvm::SmallMapVector<const OutputSection *, PageBlock, 16> PagesMap;
+ // Maps from Symbol+Addend pair or just Symbol to the GOT entry index.
+ llvm::MapVector<GotEntry, size_t> Local16;
+ llvm::MapVector<GotEntry, size_t> Local32;
+ llvm::MapVector<Symbol *, size_t> Global;
+ llvm::MapVector<Symbol *, size_t> Relocs;
+ llvm::MapVector<Symbol *, size_t> Tls;
+ // Set of symbols referenced by dynamic TLS relocations.
+ llvm::MapVector<Symbol *, size_t> DynTlsSymbols;
+ // Total number of all entries.
+ size_t getEntriesNum() const;
+ // Number of "page" entries.
+ size_t getPageEntriesNum() const;
+ // Number of entries require 16-bit index to access.
+ size_t getIndexedEntriesNum() const;
+ };
+ // Container of GOT created for each input file.
+ // After building a final series of GOTs this container
+ // holds primary and secondary GOT's.
+ std::vector<FileGot> Gots;
+ // Return (and create if necessary) `FileGot`.
+ FileGot &getGot(InputFile &F);
+ // Try to merge two GOTs. In case of success the `Dst` contains
+ // result of merging and the function returns true. In case of
+ // ovwerflow the `Dst` is unchanged and the function returns false.
+ bool tryMergeGots(FileGot & Dst, FileGot & Src, bool IsPrimary);
+class GotPltSection final : public SyntheticSection {
+ GotPltSection();
+ void addEntry(Symbol &Sym);
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
+ bool empty() const override;
+ std::vector<const Symbol *> Entries;
+// The IgotPltSection is a Got associated with the PltSection for GNU Ifunc
+// Symbols that will be relocated by Target->IRelativeRel.
+// On most Targets the IgotPltSection will immediately follow the GotPltSection
+// on ARM the IgotPltSection will immediately follow the GotSection.
+class IgotPltSection final : public SyntheticSection {
+ IgotPltSection();
+ void addEntry(Symbol &Sym);
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
+ bool empty() const override { return Entries.empty(); }
+ std::vector<const Symbol *> Entries;
+class StringTableSection final : public SyntheticSection {
+ StringTableSection(StringRef Name, bool Dynamic);
+ unsigned addString(StringRef S, bool HashIt = true);
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return Size; }
+ bool isDynamic() const { return Dynamic; }
+ const bool Dynamic;
+ uint64_t Size = 0;
+ llvm::DenseMap<StringRef, unsigned> StringMap;
+ std::vector<StringRef> Strings;
+class DynamicReloc {
+ DynamicReloc(RelType Type, const InputSectionBase *InputSec,
+ uint64_t OffsetInSec, bool UseSymVA, Symbol *Sym, int64_t Addend)
+ : Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec),
+ UseSymVA(UseSymVA), Addend(Addend), OutputSec(nullptr) {}
+ // This constructor records dynamic relocation settings used by MIPS
+ // multi-GOT implementation. It's to relocate addresses of 64kb pages
+ // lie inside the output section.
+ DynamicReloc(RelType Type, const InputSectionBase *InputSec,
+ uint64_t OffsetInSec, const OutputSection *OutputSec,
+ int64_t Addend)
+ : Type(Type), Sym(nullptr), InputSec(InputSec), OffsetInSec(OffsetInSec),
+ UseSymVA(false), Addend(Addend), OutputSec(OutputSec) {}
+ uint64_t getOffset() const;
+ uint32_t getSymIndex() const;
+ const InputSectionBase *getInputSec() const { return InputSec; }
+ // Computes the addend of the dynamic relocation. Note that this is not the
+ // same as the Addend member variable as it also includes the symbol address
+ // if UseSymVA is true.
+ int64_t computeAddend() const;
+ RelType Type;
+ Symbol *Sym;
+ const InputSectionBase *InputSec = nullptr;
+ uint64_t OffsetInSec;
+ // If this member is true, the dynamic relocation will not be against the
+ // symbol but will instead be a relative relocation that simply adds the
+ // load address. This means we need to write the symbol virtual address
+ // plus the original addend as the final relocation addend.
+ bool UseSymVA;
+ int64_t Addend;
+ const OutputSection *OutputSec;
+template <class ELFT> class DynamicSection final : public SyntheticSection {
+ typedef typename ELFT::Dyn Elf_Dyn;
+ typedef typename ELFT::Rel Elf_Rel;
+ typedef typename ELFT::Rela Elf_Rela;
+ typedef typename ELFT::Relr Elf_Relr;
+ typedef typename ELFT::Shdr Elf_Shdr;
+ typedef typename ELFT::Sym Elf_Sym;
+ // finalizeContents() fills this vector with the section contents.
+ std::vector<std::pair<int32_t, std::function<uint64_t()>>> Entries;
+ DynamicSection();
+ void finalizeContents() override;
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return Size; }
+ void add(int32_t Tag, std::function<uint64_t()> Fn);
+ void addInt(int32_t Tag, uint64_t Val);
+ void addInSec(int32_t Tag, InputSection *Sec);
+ void addInSecRelative(int32_t Tag, InputSection *Sec);
+ void addOutSec(int32_t Tag, OutputSection *Sec);
+ void addSize(int32_t Tag, OutputSection *Sec);
+ void addSym(int32_t Tag, Symbol *Sym);
+ uint64_t Size = 0;
+class RelocationBaseSection : public SyntheticSection {
+ RelocationBaseSection(StringRef Name, uint32_t Type, int32_t DynamicTag,
+ int32_t SizeDynamicTag);
+ void addReloc(RelType DynType, InputSectionBase *IS, uint64_t OffsetInSec,
+ Symbol *Sym);
+ // Add a dynamic relocation that might need an addend. This takes care of
+ // writing the addend to the output section if needed.
+ void addReloc(RelType DynType, InputSectionBase *InputSec,
+ uint64_t OffsetInSec, Symbol *Sym, int64_t Addend, RelExpr Expr,
+ RelType Type);
+ void addReloc(const DynamicReloc &Reloc);
+ bool empty() const override { return Relocs.empty(); }
+ size_t getSize() const override { return Relocs.size() * this->Entsize; }
+ size_t getRelativeRelocCount() const { return NumRelativeRelocs; }
+ void finalizeContents() override;
+ int32_t DynamicTag, SizeDynamicTag;
+ std::vector<DynamicReloc> Relocs;
+ size_t NumRelativeRelocs = 0;
+template <class ELFT>
+class RelocationSection final : public RelocationBaseSection {
+ typedef typename ELFT::Rel Elf_Rel;
+ typedef typename ELFT::Rela Elf_Rela;
+ RelocationSection(StringRef Name, bool Sort);
+ unsigned getRelocOffset();
+ void writeTo(uint8_t *Buf) override;
+ bool Sort;
+template <class ELFT>
+class AndroidPackedRelocationSection final : public RelocationBaseSection {
+ typedef typename ELFT::Rel Elf_Rel;
+ typedef typename ELFT::Rela Elf_Rela;
+ AndroidPackedRelocationSection(StringRef Name);
+ bool updateAllocSize() override;
+ size_t getSize() const override { return RelocData.size(); }
+ void writeTo(uint8_t *Buf) override {
+ memcpy(Buf, RelocData.data(), RelocData.size());
+ }
+ SmallVector<char, 0> RelocData;
+struct RelativeReloc {
+ uint64_t getOffset() const { return InputSec->getVA(OffsetInSec); }
+ const InputSectionBase *InputSec;
+ uint64_t OffsetInSec;
+class RelrBaseSection : public SyntheticSection {
+ RelrBaseSection();
+ bool empty() const override { return Relocs.empty(); }
+ std::vector<RelativeReloc> Relocs;
+// RelrSection is used to encode offsets for relative relocations.
+// Proposal for adding SHT_RELR sections to generic-abi is here:
+// https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg
+// For more details, see the comment in RelrSection::updateAllocSize().
+template <class ELFT> class RelrSection final : public RelrBaseSection {
+ typedef typename ELFT::Relr Elf_Relr;
+ RelrSection();
+ bool updateAllocSize() override;
+ size_t getSize() const override { return RelrRelocs.size() * this->Entsize; }
+ void writeTo(uint8_t *Buf) override {
+ memcpy(Buf, RelrRelocs.data(), getSize());
+ }
+ std::vector<Elf_Relr> RelrRelocs;
+struct SymbolTableEntry {
+ Symbol *Sym;
+ size_t StrTabOffset;
+class SymbolTableBaseSection : public SyntheticSection {
+ SymbolTableBaseSection(StringTableSection &StrTabSec);
+ void finalizeContents() override;
+ size_t getSize() const override { return getNumSymbols() * Entsize; }
+ void addSymbol(Symbol *Sym);
+ unsigned getNumSymbols() const { return Symbols.size() + 1; }
+ size_t getSymbolIndex(Symbol *Sym);
+ ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; }
+ void sortSymTabSymbols();
+ // A vector of symbols and their string table offsets.
+ std::vector<SymbolTableEntry> Symbols;
+ StringTableSection &StrTabSec;
+ llvm::once_flag OnceFlag;
+ llvm::DenseMap<Symbol *, size_t> SymbolIndexMap;
+ llvm::DenseMap<OutputSection *, size_t> SectionIndexMap;
+template <class ELFT>
+class SymbolTableSection final : public SymbolTableBaseSection {
+ typedef typename ELFT::Sym Elf_Sym;
+ SymbolTableSection(StringTableSection &StrTabSec);
+ void writeTo(uint8_t *Buf) override;
+class SymtabShndxSection final : public SyntheticSection {
+ SymtabShndxSection();
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override;
+ bool empty() const override;
+ void finalizeContents() override;
+// Outputs GNU Hash section. For detailed explanation see:
+// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
+class GnuHashTableSection final : public SyntheticSection {
+ GnuHashTableSection();
+ void finalizeContents() override;
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return Size; }
+ // Adds symbols to the hash table.
+ // Sorts the input to satisfy GNU hash section requirements.
+ void addSymbols(std::vector<SymbolTableEntry> &Symbols);
+ // See the comment in writeBloomFilter.
+ enum { Shift2 = 26 };
+ void writeBloomFilter(uint8_t *Buf);
+ void writeHashTable(uint8_t *Buf);
+ struct Entry {
+ Symbol *Sym;
+ size_t StrTabOffset;
+ uint32_t Hash;
+ uint32_t BucketIdx;
+ };
+ std::vector<Entry> Symbols;
+ size_t MaskWords;
+ size_t NBuckets = 0;
+ size_t Size = 0;
+class HashTableSection final : public SyntheticSection {
+ HashTableSection();
+ void finalizeContents() override;
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return Size; }
+ size_t Size = 0;
+// The PltSection is used for both the Plt and Iplt. The former usually has a
+// header as its first entry that is used at run-time to resolve lazy binding.
+// The latter is used for GNU Ifunc symbols, that will be subject to a
+// Target->IRelativeRel.
+class PltSection : public SyntheticSection {
+ PltSection(bool IsIplt);
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override;
+ bool empty() const override { return Entries.empty(); }
+ void addSymbols();
+ template <class ELFT> void addEntry(Symbol &Sym);
+ size_t HeaderSize;
+ unsigned getPltRelocOff() const;
+ std::vector<std::pair<const Symbol *, unsigned>> Entries;
+ bool IsIplt;
+class GdbIndexSection final : public SyntheticSection {
+ struct AddressEntry {
+ InputSection *Section;
+ uint64_t LowAddress;
+ uint64_t HighAddress;
+ uint32_t CuIndex;
+ };
+ struct CuEntry {
+ uint64_t CuOffset;
+ uint64_t CuLength;
+ };
+ struct NameAttrEntry {
+ llvm::CachedHashStringRef Name;
+ uint32_t CuIndexAndAttrs;
+ };
+ struct GdbChunk {
+ InputSection *Sec;
+ std::vector<AddressEntry> AddressAreas;
+ std::vector<CuEntry> CompilationUnits;
+ };
+ struct GdbSymbol {
+ llvm::CachedHashStringRef Name;
+ std::vector<uint32_t> CuVector;
+ uint32_t NameOff;
+ uint32_t CuVectorOff;
+ };
+ GdbIndexSection();
+ template <typename ELFT> static GdbIndexSection *create();
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override { return Size; }
+ bool empty() const override;
+ struct GdbIndexHeader {
+ llvm::support::ulittle32_t Version;
+ llvm::support::ulittle32_t CuListOff;
+ llvm::support::ulittle32_t CuTypesOff;
+ llvm::support::ulittle32_t AddressAreaOff;
+ llvm::support::ulittle32_t SymtabOff;
+ llvm::support::ulittle32_t ConstantPoolOff;
+ };
+ void initOutputSize();
+ size_t computeSymtabSize() const;
+ // Each chunk contains information gathered from debug sections of a
+ // single object file.
+ std::vector<GdbChunk> Chunks;
+ // A symbol table for this .gdb_index section.
+ std::vector<GdbSymbol> Symbols;
+ size_t Size;
+// --eh-frame-hdr option tells linker to construct a header for all the
+// .eh_frame sections. This header is placed to a section named .eh_frame_hdr
+// and also to a PT_GNU_EH_FRAME segment.
+// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by
+// calling dl_iterate_phdr.
+// This section contains a lookup table for quick binary search of FDEs.
+// Detailed info about internals can be found in Ian Lance Taylor's blog:
+// http://www.airs.com/blog/archives/460 (".eh_frame")
+// http://www.airs.com/blog/archives/462 (".eh_frame_hdr")
+class EhFrameHeader final : public SyntheticSection {
+ EhFrameHeader();
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override;
+ bool empty() const override;
+// For more information about .gnu.version and .gnu.version_r see:
+// https://www.akkadia.org/drepper/symbol-versioning
+// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall
+// contain symbol version definitions. The number of entries in this section
+// shall be contained in the DT_VERDEFNUM entry of the .dynamic section.
+// The section shall contain an array of Elf_Verdef structures, optionally
+// followed by an array of Elf_Verdaux structures.
+class VersionDefinitionSection final : public SyntheticSection {
+ VersionDefinitionSection();
+ void finalizeContents() override;
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
+ enum { EntrySize = 28 };
+ void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff);
+ unsigned FileDefNameOff;
+// The .gnu.version section specifies the required version of each symbol in the
+// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol
+// table entry. An Elf_Versym is just a 16-bit integer that refers to a version
+// identifier defined in the either .gnu.version_r or .gnu.version_d section.
+// The values 0 and 1 are reserved. All other values are used for versions in
+// the own object or in any of the dependencies.
+template <class ELFT>
+class VersionTableSection final : public SyntheticSection {
+ VersionTableSection();
+ void finalizeContents() override;
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
+ bool empty() const override;
+// The .gnu.version_r section defines the version identifiers used by
+// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each
+// Elf_Verneed specifies the version requirements for a single DSO, and contains
+// a reference to a linked list of Elf_Vernaux data structures which define the
+// mapping from version identifiers to version names.
+template <class ELFT> class VersionNeedSection final : public SyntheticSection {
+ typedef typename ELFT::Verneed Elf_Verneed;
+ typedef typename ELFT::Vernaux Elf_Vernaux;
+ // A vector of shared files that need Elf_Verneed data structures and the
+ // string table offsets of their sonames.
+ std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed;
+ // The next available version identifier.
+ unsigned NextIndex;
+ VersionNeedSection();
+ void addSymbol(Symbol *Sym);
+ void finalizeContents() override;
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override;
+ size_t getNeedNum() const { return Needed.size(); }
+ bool empty() const override;
+// MergeSyntheticSection is a class that allows us to put mergeable sections
+// with different attributes in a single output sections. To do that
+// we put them into MergeSyntheticSection synthetic input sections which are
+// attached to regular output sections.
+class MergeSyntheticSection : public SyntheticSection {
+ void addSection(MergeInputSection *MS);
+ std::vector<MergeInputSection *> Sections;
+ MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags,
+ uint32_t Alignment)
+ : SyntheticSection(Flags, Type, Alignment, Name) {}
+class MergeTailSection final : public MergeSyntheticSection {
+ MergeTailSection(StringRef Name, uint32_t Type, uint64_t Flags,
+ uint32_t Alignment);
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
+ void finalizeContents() override;
+ llvm::StringTableBuilder Builder;
+class MergeNoTailSection final : public MergeSyntheticSection {
+ MergeNoTailSection(StringRef Name, uint32_t Type, uint64_t Flags,
+ uint32_t Alignment)
+ : MergeSyntheticSection(Name, Type, Flags, Alignment) {}
+ size_t getSize() const override { return Size; }
+ void writeTo(uint8_t *Buf) override;
+ void finalizeContents() override;
+ // We use the most significant bits of a hash as a shard ID.
+ // The reason why we don't want to use the least significant bits is
+ // because DenseMap also uses lower bits to determine a bucket ID.
+ // If we use lower bits, it significantly increases the probability of
+ // hash collisons.
+ size_t getShardId(uint32_t Hash) {
+ return Hash >> (32 - llvm::countTrailingZeros(NumShards));
+ }
+ // Section size
+ size_t Size;
+ // String table contents
+ constexpr static size_t NumShards = 32;
+ std::vector<llvm::StringTableBuilder> Shards;
+ size_t ShardOffsets[NumShards];
+// .MIPS.abiflags section.
+template <class ELFT>
+class MipsAbiFlagsSection final : public SyntheticSection {
+ typedef llvm::object::Elf_Mips_ABIFlags<ELFT> Elf_Mips_ABIFlags;
+ static MipsAbiFlagsSection *create();
+ MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags);
+ size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); }
+ void writeTo(uint8_t *Buf) override;
+ Elf_Mips_ABIFlags Flags;
+// .MIPS.options section.
+template <class ELFT> class MipsOptionsSection final : public SyntheticSection {
+ typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options;
+ typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo;
+ static MipsOptionsSection *create();
+ MipsOptionsSection(Elf_Mips_RegInfo Reginfo);
+ void writeTo(uint8_t *Buf) override;
+ size_t getSize() const override {
+ return sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo);
+ }
+ Elf_Mips_RegInfo Reginfo;
+// MIPS .reginfo section.
+template <class ELFT> class MipsReginfoSection final : public SyntheticSection {
+ typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo;
+ static MipsReginfoSection *create();
+ MipsReginfoSection(Elf_Mips_RegInfo Reginfo);
+ size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); }
+ void writeTo(uint8_t *Buf) override;
+ Elf_Mips_RegInfo Reginfo;
+// This is a MIPS specific section to hold a space within the data segment
+// of executable file which is pointed to by the DT_MIPS_RLD_MAP entry.
+// See "Dynamic section" in Chapter 5 in the following document:
+// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+class MipsRldMapSection : public SyntheticSection {
+ MipsRldMapSection();
+ size_t getSize() const override { return Config->Wordsize; }
+ void writeTo(uint8_t *Buf) override {}
+class ARMExidxSentinelSection : public SyntheticSection {
+ ARMExidxSentinelSection();
+ size_t getSize() const override { return 8; }
+ void writeTo(uint8_t *Buf) override;
+ bool empty() const override;
+ static bool classof(const SectionBase *D);
+ // The last section referenced by a regular .ARM.exidx section.
+ // It is found and filled in Writer<ELFT>::resolveShfLinkOrder().
+ // The sentinel points at the end of that section.
+ InputSection *Highest = nullptr;
+// A container for one or more linker generated thunks. Instances of these
+// thunks including ARM interworking and Mips LA25 PI to non-PI thunks.
+class ThunkSection : public SyntheticSection {
+ // ThunkSection in OS, with desired OutSecOff of Off
+ ThunkSection(OutputSection *OS, uint64_t Off);
+ // Add a newly created Thunk to this container:
+ // Thunk is given offset from start of this InputSection
+ // Thunk defines a symbol in this InputSection that can be used as target
+ // of a relocation
+ void addThunk(Thunk *T);
+ size_t getSize() const override { return Size; }
+ void writeTo(uint8_t *Buf) override;
+ InputSection *getTargetInputSection() const;
+ bool assignOffsets();
+ std::vector<Thunk *> Thunks;
+ size_t Size = 0;
+// This section is used to store the addresses of functions that are called
+// in range-extending thunks on PowerPC64. When producing position dependant
+// code the addresses are link-time constants and the table is written out to
+// the binary. When producing position-dependant code the table is allocated and
+// filled in by the dynamic linker.
+class PPC64LongBranchTargetSection final : public SyntheticSection {
+ PPC64LongBranchTargetSection();
+ void addEntry(Symbol &Sym);
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
+ bool empty() const override;
+ void finalizeContents() override { Finalized = true; }
+ std::vector<const Symbol *> Entries;
+ bool Finalized = false;
+InputSection *createInterpSection();
+MergeInputSection *createCommentSection();
+template <class ELFT> void splitSections();
+void mergeSections();
+Defined *addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value,
+ uint64_t Size, InputSectionBase &Section);
+// Linker generated sections which can be used as inputs.
+struct InStruct {
+ InputSection *ARMAttributes;
+ BssSection *Bss;
+ BssSection *BssRelRo;
+ BuildIdSection *BuildId;
+ EhFrameHeader *EhFrameHdr;
+ EhFrameSection *EhFrame;
+ SyntheticSection *Dynamic;
+ StringTableSection *DynStrTab;
+ SymbolTableBaseSection *DynSymTab;
+ GnuHashTableSection *GnuHashTab;
+ HashTableSection *HashTab;
+ InputSection *Interp;
+ GdbIndexSection *GdbIndex;
+ GotSection *Got;
+ GotPltSection *GotPlt;
+ IgotPltSection *IgotPlt;
+ PPC64LongBranchTargetSection *PPC64LongBranchTarget;
+ MipsGotSection *MipsGot;
+ MipsRldMapSection *MipsRldMap;
+ PltSection *Plt;
+ PltSection *Iplt;
+ RelocationBaseSection *RelaDyn;
+ RelrBaseSection *RelrDyn;
+ RelocationBaseSection *RelaPlt;
+ RelocationBaseSection *RelaIplt;
+ StringTableSection *ShStrTab;
+ StringTableSection *StrTab;
+ SymbolTableBaseSection *SymTab;
+ SymtabShndxSection *SymTabShndx;
+ VersionDefinitionSection *VerDef;
+extern InStruct In;
+template <class ELFT> struct InX {
+ static VersionTableSection<ELFT> *VerSym;
+ static VersionNeedSection<ELFT> *VerNeed;
+template <class ELFT> VersionTableSection<ELFT> *InX<ELFT>::VerSym;
+template <class ELFT> VersionNeedSection<ELFT> *InX<ELFT>::VerNeed;
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp
new file mode 100644
index 000000000000..01073a62cfd6
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Target.cpp
@@ -0,0 +1,184 @@
+//===- Target.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Machine-specific things, such as applying relocations, creation of
+// GOT or PLT entries, etc., are handled in this file.
+// Refer the ELF spec for the single letter variables, S, A or P, used
+// in this file.
+// Some functions defined in this file has "relaxTls" as part of their names.
+// They do peephole optimization for TLS variables by rewriting instructions.
+// They are not part of the ABI but optional optimization, so you can skip
+// them if you are not interested in how TLS variables are optimized.
+// See the following paper for the details.
+// Ulrich Drepper, ELF Handling For Thread-Local Storage
+// http://www.akkadia.org/drepper/tls.pdf
+#include "Target.h"
+#include "InputFiles.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+TargetInfo *elf::Target;
+std::string lld::toString(RelType Type) {
+ StringRef S = getELFRelocationTypeName(elf::Config->EMachine, Type);
+ if (S == "Unknown")
+ return ("Unknown (" + Twine(Type) + ")").str();
+ return S;
+TargetInfo *elf::getTarget() {
+ switch (Config->EMachine) {
+ case EM_386:
+ case EM_IAMCU:
+ return getX86TargetInfo();
+ case EM_AARCH64:
+ return getAArch64TargetInfo();
+ case EM_AMDGPU:
+ return getAMDGPUTargetInfo();
+ case EM_ARM:
+ return getARMTargetInfo();
+ case EM_AVR:
+ return getAVRTargetInfo();
+ case EM_HEXAGON:
+ return getHexagonTargetInfo();
+ case EM_MIPS:
+ switch (Config->EKind) {
+ case ELF32LEKind:
+ return getMipsTargetInfo<ELF32LE>();
+ case ELF32BEKind:
+ return getMipsTargetInfo<ELF32BE>();
+ case ELF64LEKind:
+ return getMipsTargetInfo<ELF64LE>();
+ case ELF64BEKind:
+ return getMipsTargetInfo<ELF64BE>();
+ default:
+ llvm_unreachable("unsupported MIPS target");
+ }
+ case EM_MSP430:
+ return getMSP430TargetInfo();
+ case EM_PPC:
+ return getPPCTargetInfo();
+ case EM_PPC64:
+ return getPPC64TargetInfo();
+ case EM_RISCV:
+ return getRISCVTargetInfo();
+ case EM_SPARCV9:
+ return getSPARCV9TargetInfo();
+ case EM_X86_64:
+ if (Config->EKind == ELF32LEKind)
+ return getX32TargetInfo();
+ return getX86_64TargetInfo();
+ }
+ llvm_unreachable("unknown target machine");
+template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *Loc) {
+ for (InputSectionBase *D : InputSections) {
+ auto *IS = cast<InputSection>(D);
+ if (!IS->getParent())
+ continue;
+ uint8_t *ISLoc = IS->getParent()->Loc + IS->OutSecOff;
+ if (ISLoc <= Loc && Loc < ISLoc + IS->getSize())
+ return {IS, IS->template getLocation<ELFT>(Loc - ISLoc) + ": "};
+ }
+ return {};
+ErrorPlace elf::getErrorPlace(const uint8_t *Loc) {
+ switch (Config->EKind) {
+ case ELF32LEKind:
+ return getErrPlace<ELF32LE>(Loc);
+ case ELF32BEKind:
+ return getErrPlace<ELF32BE>(Loc);
+ case ELF64LEKind:
+ return getErrPlace<ELF64LE>(Loc);
+ case ELF64BEKind:
+ return getErrPlace<ELF64BE>(Loc);
+ default:
+ llvm_unreachable("unknown ELF type");
+ }
+TargetInfo::~TargetInfo() {}
+int64_t TargetInfo::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
+ return 0;
+bool TargetInfo::usesOnlyLowPageBits(RelType Type) const { return false; }
+bool TargetInfo::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
+ uint64_t BranchAddr, const Symbol &S) const {
+ return false;
+bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const {
+ llvm_unreachable("Target doesn't support split stacks.");
+bool TargetInfo::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
+ return true;
+void TargetInfo::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
+ writeGotPlt(Buf, S);
+RelExpr TargetInfo::adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const {
+ return Expr;
+void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const {
+ llvm_unreachable("Should not have claimed to be relaxable");
+void TargetInfo::relaxTlsGdToLe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ llvm_unreachable("Should not have claimed to be relaxable");
+void TargetInfo::relaxTlsGdToIe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ llvm_unreachable("Should not have claimed to be relaxable");
+void TargetInfo::relaxTlsIeToLe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ llvm_unreachable("Should not have claimed to be relaxable");
+void TargetInfo::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
+ uint64_t Val) const {
+ llvm_unreachable("Should not have claimed to be relaxable");
+uint64_t TargetInfo::getImageBase() {
+ // Use -image-base if set. Fall back to the target default if not.
+ if (Config->ImageBase)
+ return *Config->ImageBase;
+ return Config->Pic ? 0 : DefaultImageBase;
diff --git a/contrib/llvm/tools/lld/ELF/Target.h b/contrib/llvm/tools/lld/ELF/Target.h
new file mode 100644
index 000000000000..685ad05ecd66
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Target.h
@@ -0,0 +1,260 @@
+//===- Target.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "InputSection.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/MathExtras.h"
+#include <array>
+namespace lld {
+std::string toString(elf::RelType Type);
+namespace elf {
+class Defined;
+class InputFile;
+class Symbol;
+class TargetInfo {
+ virtual uint32_t calcEFlags() const { return 0; }
+ virtual RelType getDynRel(RelType Type) const { return Type; }
+ virtual void writeGotPltHeader(uint8_t *Buf) const {}
+ virtual void writeGotHeader(uint8_t *Buf) const {}
+ virtual void writeGotPlt(uint8_t *Buf, const Symbol &S) const {};
+ virtual void writeIgotPlt(uint8_t *Buf, const Symbol &S) const;
+ virtual int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const;
+ // If lazy binding is supported, the first entry of the PLT has code
+ // to call the dynamic linker to resolve PLT entries the first time
+ // they are called. This function writes that code.
+ virtual void writePltHeader(uint8_t *Buf) const {}
+ virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr,
+ uint64_t PltEntryAddr, int32_t Index,
+ unsigned RelOff) const {}
+ virtual void addPltHeaderSymbols(InputSection &IS) const {}
+ virtual void addPltSymbols(InputSection &IS, uint64_t Off) const {}
+ // Returns true if a relocation only uses the low bits of a value such that
+ // all those bits are in the same page. For example, if the relocation
+ // only uses the low 12 bits in a system with 4k pages. If this is true, the
+ // bits will always have the same value at runtime and we don't have to emit
+ // a dynamic relocation.
+ virtual bool usesOnlyLowPageBits(RelType Type) const;
+ // Decide whether a Thunk is needed for the relocation from File
+ // targeting S.
+ virtual bool needsThunk(RelExpr Expr, RelType RelocType,
+ const InputFile *File, uint64_t BranchAddr,
+ const Symbol &S) const;
+ // On systems with range extensions we place collections of Thunks at
+ // regular spacings that enable the majority of branches reach the Thunks.
+ // a value of 0 means range extension thunks are not supported.
+ virtual uint32_t getThunkSectionSpacing() const { return 0; }
+ // The function with a prologue starting at Loc was compiled with
+ // -fsplit-stack and it calls a function compiled without. Adjust the prologue
+ // to do the right thing. See https://gcc.gnu.org/wiki/SplitStacks.
+ // The symbols st_other flags are needed on PowerPC64 for determining the
+ // offset to the split-stack prologue.
+ virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const;
+ // Return true if we can reach Dst from Src with Relocation RelocType
+ virtual bool inBranchRange(RelType Type, uint64_t Src,
+ uint64_t Dst) const;
+ virtual RelExpr getRelExpr(RelType Type, const Symbol &S,
+ const uint8_t *Loc) const = 0;
+ virtual void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const = 0;
+ virtual ~TargetInfo();
+ unsigned TlsGdRelaxSkip = 1;
+ unsigned PageSize = 4096;
+ unsigned DefaultMaxPageSize = 4096;
+ uint64_t getImageBase();
+ // Offset of _GLOBAL_OFFSET_TABLE_ from base of .got or .got.plt section.
+ uint64_t GotBaseSymOff = 0;
+ // True if _GLOBAL_OFFSET_TABLE_ is relative to .got.plt, false if .got.
+ bool GotBaseSymInGotPlt = true;
+ RelType CopyRel;
+ RelType GotRel;
+ RelType NoneRel;
+ RelType PltRel;
+ RelType RelativeRel;
+ RelType IRelativeRel;
+ RelType TlsDescRel;
+ RelType TlsGotRel;
+ RelType TlsModuleIndexRel;
+ RelType TlsOffsetRel;
+ unsigned GotEntrySize = 0;
+ unsigned GotPltEntrySize = 0;
+ unsigned PltEntrySize;
+ unsigned PltHeaderSize;
+ // At least on x86_64 positions 1 and 2 are used by the first plt entry
+ // to support lazy loading.
+ unsigned GotPltHeaderEntriesNum = 3;
+ // On PPC ELF V2 abi, the first entry in the .got is the .TOC.
+ unsigned GotHeaderEntriesNum = 0;
+ bool NeedsThunks = false;
+ // A 4-byte field corresponding to one or more trap instructions, used to pad
+ // executable OutputSections.
+ std::array<uint8_t, 4> TrapInstr;
+ // If a target needs to rewrite calls to __morestack to instead call
+ // __morestack_non_split when a split-stack enabled caller calls a
+ // non-split-stack callee this will return true. Otherwise returns false.
+ bool NeedsMoreStackNonSplit = true;
+ virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
+ RelExpr Expr) const;
+ virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
+ virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const;
+ virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
+ virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
+ virtual void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
+ // On FreeBSD x86_64 the first page cannot be mmaped.
+ // On Linux that is controled by vm.mmap_min_addr. At least on some x86_64
+ // installs that is 65536, so the first 15 pages cannot be used.
+ // Given that, the smallest value that can be used in here is 0x10000.
+ uint64_t DefaultImageBase = 0x10000;
+TargetInfo *getAArch64TargetInfo();
+TargetInfo *getAMDGPUTargetInfo();
+TargetInfo *getARMTargetInfo();
+TargetInfo *getAVRTargetInfo();
+TargetInfo *getHexagonTargetInfo();
+TargetInfo *getMSP430TargetInfo();
+TargetInfo *getPPC64TargetInfo();
+TargetInfo *getPPCTargetInfo();
+TargetInfo *getRISCVTargetInfo();
+TargetInfo *getSPARCV9TargetInfo();
+TargetInfo *getX32TargetInfo();
+TargetInfo *getX86TargetInfo();
+TargetInfo *getX86_64TargetInfo();
+template <class ELFT> TargetInfo *getMipsTargetInfo();
+struct ErrorPlace {
+ InputSectionBase *IS;
+ std::string Loc;
+// Returns input section and corresponding source string for the given location.
+ErrorPlace getErrorPlace(const uint8_t *Loc);
+static inline std::string getErrorLocation(const uint8_t *Loc) {
+ return getErrorPlace(Loc).Loc;
+// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is
+// a global entry point (GEP) which typically is used to intiailzie the TOC
+// pointer in general purpose register 2. The second is a local entry
+// point (LEP) which bypasses the TOC pointer initialization code. The
+// offset between GEP and LEP is encoded in a function's st_other flags.
+// This function will return the offset (in bytes) from the global entry-point
+// to the local entry-point.
+unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther);
+uint64_t getPPC64TocBase();
+uint64_t getAArch64Page(uint64_t Expr);
+extern TargetInfo *Target;
+TargetInfo *getTarget();
+template <class ELFT> bool isMipsPIC(const Defined *Sym);
+static inline void reportRangeError(uint8_t *Loc, RelType Type, const Twine &V,
+ int64_t Min, uint64_t Max) {
+ ErrorPlace ErrPlace = getErrorPlace(Loc);
+ StringRef Hint;
+ if (ErrPlace.IS && ErrPlace.IS->Name.startswith(".debug"))
+ Hint = "; consider recompiling with -fdebug-types-section to reduce size "
+ "of debug sections";
+ errorOrWarn(ErrPlace.Loc + "relocation " + lld::toString(Type) +
+ " out of range: " + V.str() + " is not in [" + Twine(Min).str() +
+ ", " + Twine(Max).str() + "]" + Hint);
+inline unsigned getPltEntryOffset(unsigned Idx) {
+ return Target->PltHeaderSize + Target->PltEntrySize * Idx;
+// Make sure that V can be represented as an N bit signed integer.
+inline void checkInt(uint8_t *Loc, int64_t V, int N, RelType Type) {
+ if (V != llvm::SignExtend64(V, N))
+ reportRangeError(Loc, Type, Twine(V), llvm::minIntN(N), llvm::maxIntN(N));
+// Make sure that V can be represented as an N bit unsigned integer.
+inline void checkUInt(uint8_t *Loc, uint64_t V, int N, RelType Type) {
+ if ((V >> N) != 0)
+ reportRangeError(Loc, Type, Twine(V), 0, llvm::maxUIntN(N));
+// Make sure that V can be represented as an N bit signed or unsigned integer.
+inline void checkIntUInt(uint8_t *Loc, uint64_t V, int N, RelType Type) {
+ // For the error message we should cast V to a signed integer so that error
+ // messages show a small negative value rather than an extremely large one
+ if (V != (uint64_t)llvm::SignExtend64(V, N) && (V >> N) != 0)
+ reportRangeError(Loc, Type, Twine((int64_t)V), llvm::minIntN(N),
+ llvm::maxIntN(N));
+inline void checkAlignment(uint8_t *Loc, uint64_t V, int N, RelType Type) {
+ if ((V & (N - 1)) != 0)
+ error(getErrorLocation(Loc) + "improper alignment for relocation " +
+ lld::toString(Type) + ": 0x" + llvm::utohexstr(V) +
+ " is not aligned to " + Twine(N) + " bytes");
+// Endianness-aware read/write.
+inline uint16_t read16(const void *P) {
+ return llvm::support::endian::read16(P, Config->Endianness);
+inline uint32_t read32(const void *P) {
+ return llvm::support::endian::read32(P, Config->Endianness);
+inline uint64_t read64(const void *P) {
+ return llvm::support::endian::read64(P, Config->Endianness);
+inline void write16(void *P, uint16_t V) {
+ llvm::support::endian::write16(P, V, Config->Endianness);
+inline void write32(void *P, uint32_t V) {
+ llvm::support::endian::write32(P, V, Config->Endianness);
+inline void write64(void *P, uint64_t V) {
+ llvm::support::endian::write64(P, V, Config->Endianness);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp
new file mode 100644
index 000000000000..7a31d36b0e90
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp
@@ -0,0 +1,846 @@
+//===- Thunks.cpp --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains Thunk subclasses.
+// A thunk is a small piece of code written after an input section
+// which is used to jump between "incompatible" functions
+// such as MIPS PIC and non-PIC or ARM non-Thumb and Thumb functions.
+// If a jump target is too far and its address doesn't fit to a
+// short jump instruction, we need to create a thunk too, but we
+// haven't supported it yet.
+// i386 and x86-64 don't need thunks.
+#include "Thunks.h"
+#include "Config.h"
+#include "InputSection.h"
+#include "OutputSections.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdint>
+#include <cstring>
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+namespace lld {
+namespace elf {
+namespace {
+// AArch64 long range Thunks
+class AArch64ABSLongThunk final : public Thunk {
+ AArch64ABSLongThunk(Symbol &Dest) : Thunk(Dest) {}
+ uint32_t size() override { return 16; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+class AArch64ADRPThunk final : public Thunk {
+ AArch64ADRPThunk(Symbol &Dest) : Thunk(Dest) {}
+ uint32_t size() override { return 12; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+// Base class for ARM thunks.
+// An ARM thunk may be either short or long. A short thunk is simply a branch
+// (B) instruction, and it may be used to call ARM functions when the distance
+// from the thunk to the target is less than 32MB. Long thunks can branch to any
+// virtual address and can switch between ARM and Thumb, and they are
+// implemented in the derived classes. This class tries to create a short thunk
+// if the target is in range, otherwise it creates a long thunk.
+class ARMThunk : public Thunk {
+ ARMThunk(Symbol &Dest) : Thunk(Dest) {}
+ bool mayUseShortThunk();
+ uint32_t size() override { return mayUseShortThunk() ? 4 : sizeLong(); }
+ void writeTo(uint8_t *Buf) override;
+ bool isCompatibleWith(RelType Type) const override;
+ // Returns the size of a long thunk.
+ virtual uint32_t sizeLong() = 0;
+ // Writes a long thunk to Buf.
+ virtual void writeLong(uint8_t *Buf) = 0;
+ // This field tracks whether all previously considered layouts would allow
+ // this thunk to be short. If we have ever needed a long thunk, we always
+ // create a long thunk, even if the thunk may be short given the current
+ // distance to the target. We do this because transitioning from long to short
+ // can create layout oscillations in certain corner cases which would prevent
+ // the layout from converging.
+ bool MayUseShortThunk = true;
+// Base class for Thumb-2 thunks.
+// This class is similar to ARMThunk, but it uses the Thumb-2 B.W instruction
+// which has a range of 16MB.
+class ThumbThunk : public Thunk {
+ ThumbThunk(Symbol &Dest) : Thunk(Dest) { Alignment = 2; }
+ bool mayUseShortThunk();
+ uint32_t size() override { return mayUseShortThunk() ? 4 : sizeLong(); }
+ void writeTo(uint8_t *Buf) override;
+ bool isCompatibleWith(RelType Type) const override;
+ // Returns the size of a long thunk.
+ virtual uint32_t sizeLong() = 0;
+ // Writes a long thunk to Buf.
+ virtual void writeLong(uint8_t *Buf) = 0;
+ // See comment in ARMThunk above.
+ bool MayUseShortThunk = true;
+// Specific ARM Thunk implementations. The naming convention is:
+// Source State, TargetState, Target Requirement, ABS or PI, Range
+class ARMV7ABSLongThunk final : public ARMThunk {
+ ARMV7ABSLongThunk(Symbol &Dest) : ARMThunk(Dest) {}
+ uint32_t sizeLong() override { return 12; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+class ARMV7PILongThunk final : public ARMThunk {
+ ARMV7PILongThunk(Symbol &Dest) : ARMThunk(Dest) {}
+ uint32_t sizeLong() override { return 16; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+class ThumbV7ABSLongThunk final : public ThumbThunk {
+ ThumbV7ABSLongThunk(Symbol &Dest) : ThumbThunk(Dest) {}
+ uint32_t sizeLong() override { return 10; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+class ThumbV7PILongThunk final : public ThumbThunk {
+ ThumbV7PILongThunk(Symbol &Dest) : ThumbThunk(Dest) {}
+ uint32_t sizeLong() override { return 12; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+// Implementations of Thunks for older Arm architectures that do not support
+// the movt/movw instructions. These thunks require at least Architecture v5
+// as used on processors such as the Arm926ej-s. There are no Thumb entry
+// points as there is no Thumb branch instruction on these architecture that
+// can result in a thunk
+class ARMV5ABSLongThunk final : public ARMThunk {
+ ARMV5ABSLongThunk(Symbol &Dest) : ARMThunk(Dest) {}
+ uint32_t sizeLong() override { return 8; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+ bool isCompatibleWith(uint32_t RelocType) const override;
+class ARMV5PILongThunk final : public ARMThunk {
+ ARMV5PILongThunk(Symbol &Dest) : ARMThunk(Dest) {}
+ uint32_t sizeLong() override { return 16; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+ bool isCompatibleWith(uint32_t RelocType) const override;
+// Implementations of Thunks for Arm v6-M. Only Thumb instructions are permitted
+class ThumbV6MABSLongThunk final : public ThumbThunk {
+ ThumbV6MABSLongThunk(Symbol &Dest) : ThumbThunk(Dest) {}
+ uint32_t sizeLong() override { return 12; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+class ThumbV6MPILongThunk final : public ThumbThunk {
+ ThumbV6MPILongThunk(Symbol &Dest) : ThumbThunk(Dest) {}
+ uint32_t sizeLong() override { return 16; }
+ void writeLong(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+// MIPS LA25 thunk
+class MipsThunk final : public Thunk {
+ MipsThunk(Symbol &Dest) : Thunk(Dest) {}
+ uint32_t size() override { return 16; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+ InputSection *getTargetInputSection() const override;
+// microMIPS R2-R5 LA25 thunk
+class MicroMipsThunk final : public Thunk {
+ MicroMipsThunk(Symbol &Dest) : Thunk(Dest) {}
+ uint32_t size() override { return 14; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+ InputSection *getTargetInputSection() const override;
+// microMIPS R6 LA25 thunk
+class MicroMipsR6Thunk final : public Thunk {
+ MicroMipsR6Thunk(Symbol &Dest) : Thunk(Dest) {}
+ uint32_t size() override { return 12; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+ InputSection *getTargetInputSection() const override;
+// PPC64 Plt call stubs.
+// Any call site that needs to call through a plt entry needs a call stub in
+// the .text section. The call stub is responsible for:
+// 1) Saving the toc-pointer to the stack.
+// 2) Loading the target functions address from the procedure linkage table into
+// r12 for use by the target functions global entry point, and into the count
+// register.
+// 3) Transfering control to the target function through an indirect branch.
+class PPC64PltCallStub final : public Thunk {
+ PPC64PltCallStub(Symbol &Dest) : Thunk(Dest) {}
+ uint32_t size() override { return 20; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
+// alignment. This gives a possible 26 bits of 'reach'. If the call offset is
+// larger then that we need to emit a long-branch thunk. The target address
+// of the callee is stored in a table to be accessed TOC-relative. Since the
+// call must be local (a non-local call will have a PltCallStub instead) the
+// table stores the address of the callee's local entry point. For
+// position-independent code a corresponding relative dynamic relocation is
+// used.
+class PPC64LongBranchThunk : public Thunk {
+ uint32_t size() override { return 16; }
+ void writeTo(uint8_t *Buf) override;
+ void addSymbols(ThunkSection &IS) override;
+ PPC64LongBranchThunk(Symbol &Dest) : Thunk(Dest) {}
+class PPC64PILongBranchThunk final : public PPC64LongBranchThunk {
+ PPC64PILongBranchThunk(Symbol &Dest) : PPC64LongBranchThunk(Dest) {
+ assert(!Dest.IsPreemptible);
+ if (Dest.isInPPC64Branchlt())
+ return;
+ In.PPC64LongBranchTarget->addEntry(Dest);
+ In.RelaDyn->addReloc({Target->RelativeRel, In.PPC64LongBranchTarget,
+ Dest.getPPC64LongBranchOffset(), true, &Dest,
+ getPPC64GlobalEntryToLocalEntryOffset(Dest.StOther)});
+ }
+class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk {
+ PPC64PDLongBranchThunk(Symbol &Dest) : PPC64LongBranchThunk(Dest) {
+ if (!Dest.isInPPC64Branchlt())
+ In.PPC64LongBranchTarget->addEntry(Dest);
+ }
+} // end anonymous namespace
+Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value,
+ InputSectionBase &Section) {
+ Defined *D = addSyntheticLocal(Name, Type, Value, /*Size=*/0, Section);
+ Syms.push_back(D);
+ return D;
+void Thunk::setOffset(uint64_t NewOffset) {
+ for (Defined *D : Syms)
+ D->Value = D->Value - Offset + NewOffset;
+ Offset = NewOffset;
+// AArch64 long range Thunks
+static uint64_t getAArch64ThunkDestVA(const Symbol &S) {
+ uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA();
+ return V;
+void AArch64ABSLongThunk::writeTo(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x50, 0x00, 0x00, 0x58, // ldr x16, L0
+ 0x00, 0x02, 0x1f, 0xd6, // br x16
+ 0x00, 0x00, 0x00, 0x00, // L0: .xword S
+ 0x00, 0x00, 0x00, 0x00,
+ };
+ uint64_t S = getAArch64ThunkDestVA(Destination);
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf + 8, R_AARCH64_ABS64, S);
+void AArch64ABSLongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__AArch64AbsLongThunk_" + Destination.getName()),
+ STT_FUNC, 0, IS);
+ addSymbol("$x", STT_NOTYPE, 0, IS);
+ addSymbol("$d", STT_NOTYPE, 8, IS);
+// This Thunk has a maximum range of 4Gb, this is sufficient for all programs
+// using the small code model, including pc-relative ones. At time of writing
+// clang and gcc do not support the large code model for position independent
+// code so it is safe to use this for position independent thunks without
+// worrying about the destination being more than 4Gb away.
+void AArch64ADRPThunk::writeTo(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x10, 0x00, 0x00, 0x90, // adrp x16, Dest R_AARCH64_ADR_PREL_PG_HI21(Dest)
+ 0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest)
+ 0x00, 0x02, 0x1f, 0xd6, // br x16
+ };
+ uint64_t S = getAArch64ThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA();
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21,
+ getAArch64Page(S) - getAArch64Page(P));
+ Target->relocateOne(Buf + 4, R_AARCH64_ADD_ABS_LO12_NC, S);
+void AArch64ADRPThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__AArch64ADRPThunk_" + Destination.getName()), STT_FUNC,
+ 0, IS);
+ addSymbol("$x", STT_NOTYPE, 0, IS);
+// ARM Target Thunks
+static uint64_t getARMThunkDestVA(const Symbol &S) {
+ uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA();
+ return SignExtend64<32>(V);
+// This function returns true if the target is not Thumb and is within 2^26, and
+// it has not previously returned false (see comment for MayUseShortThunk).
+bool ARMThunk::mayUseShortThunk() {
+ if (!MayUseShortThunk)
+ return false;
+ uint64_t S = getARMThunkDestVA(Destination);
+ if (S & 1) {
+ MayUseShortThunk = false;
+ return false;
+ }
+ uint64_t P = getThunkTargetSym()->getVA();
+ int64_t Offset = S - P - 8;
+ MayUseShortThunk = llvm::isInt<26>(Offset);
+ return MayUseShortThunk;
+void ARMThunk::writeTo(uint8_t *Buf) {
+ if (!mayUseShortThunk()) {
+ writeLong(Buf);
+ return;
+ }
+ uint64_t S = getARMThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA();
+ int64_t Offset = S - P - 8;
+ const uint8_t Data[] = {
+ 0x00, 0x00, 0x00, 0xea, // b S
+ };
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_ARM_JUMP24, Offset);
+bool ARMThunk::isCompatibleWith(RelType Type) const {
+ // Thumb branch relocations can't use BLX
+ return Type != R_ARM_THM_JUMP19 && Type != R_ARM_THM_JUMP24;
+// This function returns true if the target is Thumb and is within 2^25, and
+// it has not previously returned false (see comment for MayUseShortThunk).
+bool ThumbThunk::mayUseShortThunk() {
+ if (!MayUseShortThunk)
+ return false;
+ uint64_t S = getARMThunkDestVA(Destination);
+ if ((S & 1) == 0) {
+ MayUseShortThunk = false;
+ return false;
+ }
+ uint64_t P = getThunkTargetSym()->getVA() & ~1;
+ int64_t Offset = S - P - 4;
+ MayUseShortThunk = llvm::isInt<25>(Offset);
+ return MayUseShortThunk;
+void ThumbThunk::writeTo(uint8_t *Buf) {
+ if (!mayUseShortThunk()) {
+ writeLong(Buf);
+ return;
+ }
+ uint64_t S = getARMThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA();
+ int64_t Offset = S - P - 4;
+ const uint8_t Data[] = {
+ 0x00, 0xf0, 0x00, 0xb0, // b.w S
+ };
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_ARM_THM_JUMP24, Offset);
+bool ThumbThunk::isCompatibleWith(RelType Type) const {
+ // ARM branch relocations can't use BLX
+ return Type != R_ARM_JUMP24 && Type != R_ARM_PC24 && Type != R_ARM_PLT32;
+void ARMV7ABSLongThunk::writeLong(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x00, 0xc0, 0x00, 0xe3, // movw ip,:lower16:S
+ 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S
+ 0x1c, 0xff, 0x2f, 0xe1, // bx ip
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S);
+ Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S);
+void ARMV7ABSLongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__ARMv7ABSLongThunk_" + Destination.getName()),
+ STT_FUNC, 0, IS);
+ addSymbol("$a", STT_NOTYPE, 0, IS);
+void ThumbV7ABSLongThunk::writeLong(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x40, 0xf2, 0x00, 0x0c, // movw ip, :lower16:S
+ 0xc0, 0xf2, 0x00, 0x0c, // movt ip, :upper16:S
+ 0x60, 0x47, // bx ip
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S);
+ Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S);
+void ThumbV7ABSLongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__Thumbv7ABSLongThunk_" + Destination.getName()),
+ STT_FUNC, 1, IS);
+ addSymbol("$t", STT_NOTYPE, 0, IS);
+void ARMV7PILongThunk::writeLong(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0xf0, 0xcf, 0x0f, 0xe3, // P: movw ip,:lower16:S - (P + (L1-P) + 8)
+ 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S - (P + (L1-P) + 8)
+ 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
+ 0x1c, 0xff, 0x2f, 0xe1, // bx ip
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA();
+ int64_t Offset = S - P - 16;
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, Offset);
+ Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, Offset);
+void ARMV7PILongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__ARMV7PILongThunk_" + Destination.getName()), STT_FUNC,
+ 0, IS);
+ addSymbol("$a", STT_NOTYPE, 0, IS);
+void ThumbV7PILongThunk::writeLong(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x4f, 0xf6, 0xf4, 0x7c, // P: movw ip,:lower16:S - (P + (L1-P) + 4)
+ 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P) + 4)
+ 0xfc, 0x44, // L1: add ip, pc
+ 0x60, 0x47, // bx ip
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA() & ~0x1;
+ int64_t Offset = S - P - 12;
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, Offset);
+ Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, Offset);
+void ThumbV7PILongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__ThumbV7PILongThunk_" + Destination.getName()),
+ STT_FUNC, 1, IS);
+ addSymbol("$t", STT_NOTYPE, 0, IS);
+void ARMV5ABSLongThunk::writeLong(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc,#-4] ; L1
+ 0x00, 0x00, 0x00, 0x00, // L1: .word S
+ };
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf + 4, R_ARM_ABS32, getARMThunkDestVA(Destination));
+void ARMV5ABSLongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__ARMv5ABSLongThunk_" + Destination.getName()),
+ STT_FUNC, 0, IS);
+ addSymbol("$a", STT_NOTYPE, 0, IS);
+ addSymbol("$d", STT_NOTYPE, 4, IS);
+bool ARMV5ABSLongThunk::isCompatibleWith(uint32_t RelocType) const {
+ // Thumb branch relocations can't use BLX
+ return RelocType != R_ARM_THM_JUMP19 && RelocType != R_ARM_THM_JUMP24;
+void ARMV5PILongThunk::writeLong(uint8_t *Buf) {
+ const uint8_t Data[] = {
+ 0x04, 0xc0, 0x9f, 0xe5, // P: ldr ip, [pc,#4] ; L2
+ 0x0c, 0xc0, 0x8f, 0xe0, // L1: add ip, pc, ip
+ 0x1c, 0xff, 0x2f, 0xe1, // bx ip
+ 0x00, 0x00, 0x00, 0x00, // L2: .word S - (P + (L1 - P) + 8)
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA() & ~0x1;
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf + 12, R_ARM_REL32, S - P - 12);
+void ARMV5PILongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__ARMV5PILongThunk_" + Destination.getName()), STT_FUNC,
+ 0, IS);
+ addSymbol("$a", STT_NOTYPE, 0, IS);
+ addSymbol("$d", STT_NOTYPE, 12, IS);
+bool ARMV5PILongThunk::isCompatibleWith(uint32_t RelocType) const {
+ // Thumb branch relocations can't use BLX
+ return RelocType != R_ARM_THM_JUMP19 && RelocType != R_ARM_THM_JUMP24;
+void ThumbV6MABSLongThunk::writeLong(uint8_t *Buf) {
+ // Most Thumb instructions cannot access the high registers r8 - r15. As the
+ // only register we can corrupt is r12 we must instead spill a low register
+ // to the stack to use as a scratch register. We push r1 even though we
+ // don't need to get some space to use for the return address.
+ const uint8_t Data[] = {
+ 0x03, 0xb4, // push {r0, r1} ; Obtain scratch registers
+ 0x01, 0x48, // ldr r0, [pc, #4] ; L1
+ 0x01, 0x90, // str r0, [sp, #4] ; SP + 4 = S
+ 0x01, 0xbd, // pop {r0, pc} ; restore r0 and branch to dest
+ 0x00, 0x00, 0x00, 0x00 // L1: .word S
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf + 8, R_ARM_ABS32, S);
+void ThumbV6MABSLongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__Thumbv6MABSLongThunk_" + Destination.getName()),
+ STT_FUNC, 1, IS);
+ addSymbol("$t", STT_NOTYPE, 0, IS);
+ addSymbol("$d", STT_NOTYPE, 8, IS);
+void ThumbV6MPILongThunk::writeLong(uint8_t *Buf) {
+ // Most Thumb instructions cannot access the high registers r8 - r15. As the
+ // only register we can corrupt is ip (r12) we must instead spill a low
+ // register to the stack to use as a scratch register.
+ const uint8_t Data[] = {
+ 0x01, 0xb4, // P: push {r0} ; Obtain scratch register
+ 0x02, 0x48, // ldr r0, [pc, #8] ; L2
+ 0x84, 0x46, // mov ip, r0 ; high to low register
+ 0x01, 0xbc, // pop {r0} ; restore scratch register
+ 0xe7, 0x44, // L1: add pc, ip ; transfer control
+ 0xc0, 0x46, // nop ; pad to 4-byte boundary
+ 0x00, 0x00, 0x00, 0x00, // L2: .word S - (P + (L1 - P) + 4)
+ };
+ uint64_t S = getARMThunkDestVA(Destination);
+ uint64_t P = getThunkTargetSym()->getVA() & ~0x1;
+ memcpy(Buf, Data, sizeof(Data));
+ Target->relocateOne(Buf + 12, R_ARM_REL32, S - P - 12);
+void ThumbV6MPILongThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__Thumbv6MPILongThunk_" + Destination.getName()),
+ STT_FUNC, 1, IS);
+ addSymbol("$t", STT_NOTYPE, 0, IS);
+ addSymbol("$d", STT_NOTYPE, 12, IS);
+// Write MIPS LA25 thunk code to call PIC function from the non-PIC one.
+void MipsThunk::writeTo(uint8_t *Buf) {
+ uint64_t S = Destination.getVA();
+ write32(Buf, 0x3c190000); // lui $25, %hi(func)
+ write32(Buf + 4, 0x08000000 | (S >> 2)); // j func
+ write32(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func)
+ write32(Buf + 12, 0x00000000); // nop
+ Target->relocateOne(Buf, R_MIPS_HI16, S);
+ Target->relocateOne(Buf + 8, R_MIPS_LO16, S);
+void MipsThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__LA25Thunk_" + Destination.getName()), STT_FUNC, 0,
+ IS);
+InputSection *MipsThunk::getTargetInputSection() const {
+ auto &DR = cast<Defined>(Destination);
+ return dyn_cast<InputSection>(DR.Section);
+// Write microMIPS R2-R5 LA25 thunk code
+// to call PIC function from the non-PIC one.
+void MicroMipsThunk::writeTo(uint8_t *Buf) {
+ uint64_t S = Destination.getVA() | 1;
+ write16(Buf, 0x41b9); // lui $25, %hi(func)
+ write16(Buf + 4, 0xd400); // j func
+ write16(Buf + 8, 0x3339); // addiu $25, $25, %lo(func)
+ write16(Buf + 12, 0x0c00); // nop
+ Target->relocateOne(Buf, R_MICROMIPS_HI16, S);
+ Target->relocateOne(Buf + 4, R_MICROMIPS_26_S1, S);
+ Target->relocateOne(Buf + 8, R_MICROMIPS_LO16, S);
+void MicroMipsThunk::addSymbols(ThunkSection &IS) {
+ Defined *D = addSymbol(
+ Saver.save("__microLA25Thunk_" + Destination.getName()), STT_FUNC, 0, IS);
+InputSection *MicroMipsThunk::getTargetInputSection() const {
+ auto &DR = cast<Defined>(Destination);
+ return dyn_cast<InputSection>(DR.Section);
+// Write microMIPS R6 LA25 thunk code
+// to call PIC function from the non-PIC one.
+void MicroMipsR6Thunk::writeTo(uint8_t *Buf) {
+ uint64_t S = Destination.getVA() | 1;
+ uint64_t P = getThunkTargetSym()->getVA();
+ write16(Buf, 0x1320); // lui $25, %hi(func)
+ write16(Buf + 4, 0x3339); // addiu $25, $25, %lo(func)
+ write16(Buf + 8, 0x9400); // bc func
+ Target->relocateOne(Buf, R_MICROMIPS_HI16, S);
+ Target->relocateOne(Buf + 4, R_MICROMIPS_LO16, S);
+ Target->relocateOne(Buf + 8, R_MICROMIPS_PC26_S1, S - P - 12);
+void MicroMipsR6Thunk::addSymbols(ThunkSection &IS) {
+ Defined *D = addSymbol(
+ Saver.save("__microLA25Thunk_" + Destination.getName()), STT_FUNC, 0, IS);
+InputSection *MicroMipsR6Thunk::getTargetInputSection() const {
+ auto &DR = cast<Defined>(Destination);
+ return dyn_cast<InputSection>(DR.Section);
+static void writePPCLoadAndBranch(uint8_t *Buf, int64_t Offset) {
+ uint16_t OffHa = (Offset + 0x8000) >> 16;
+ uint16_t OffLo = Offset & 0xffff;
+ write32(Buf + 0, 0x3d820000 | OffHa); // addis r12, r2, OffHa
+ write32(Buf + 4, 0xe98c0000 | OffLo); // ld r12, OffLo(r12)
+ write32(Buf + 8, 0x7d8903a6); // mtctr r12
+ write32(Buf + 12, 0x4e800420); // bctr
+void PPC64PltCallStub::writeTo(uint8_t *Buf) {
+ int64_t Offset = Destination.getGotPltVA() - getPPC64TocBase();
+ // Save the TOC pointer to the save-slot reserved in the call frame.
+ write32(Buf + 0, 0xf8410018); // std r2,24(r1)
+ writePPCLoadAndBranch(Buf + 4, Offset);
+void PPC64PltCallStub::addSymbols(ThunkSection &IS) {
+ Defined *S = addSymbol(Saver.save("__plt_" + Destination.getName()), STT_FUNC,
+ 0, IS);
+ S->NeedsTocRestore = true;
+void PPC64LongBranchThunk::writeTo(uint8_t *Buf) {
+ int64_t Offset = Destination.getPPC64LongBranchTableVA() - getPPC64TocBase();
+ writePPCLoadAndBranch(Buf, Offset);
+void PPC64LongBranchThunk::addSymbols(ThunkSection &IS) {
+ addSymbol(Saver.save("__long_branch_" + Destination.getName()), STT_FUNC, 0,
+ IS);
+Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {}
+Thunk::~Thunk() = default;
+static Thunk *addThunkAArch64(RelType Type, Symbol &S) {
+ if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
+ fatal("unrecognized relocation type");
+ if (Config->PicThunk)
+ return make<AArch64ADRPThunk>(S);
+ return make<AArch64ABSLongThunk>(S);
+// Creates a thunk for Thumb-ARM interworking.
+// Arm Architectures v5 and v6 do not support Thumb2 technology. This means
+// - MOVT and MOVW instructions cannot be used
+// - Only Thumb relocation that can generate a Thunk is a BL, this can always
+// be transformed into a BLX
+static Thunk *addThunkPreArmv7(RelType Reloc, Symbol &S) {
+ switch (Reloc) {
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ case R_ARM_JUMP24:
+ case R_ARM_CALL:
+ case R_ARM_THM_CALL:
+ if (Config->PicThunk)
+ return make<ARMV5PILongThunk>(S);
+ return make<ARMV5ABSLongThunk>(S);
+ }
+ fatal("relocation " + toString(Reloc) + " to " + toString(S) +
+ " not supported for Armv5 or Armv6 targets");
+// Create a thunk for Thumb long branch on V6-M.
+// Arm Architecture v6-M only supports Thumb instructions. This means
+// - MOVT and MOVW instructions cannot be used.
+// - Only a limited number of instructions can access registers r8 and above
+// - No interworking support is needed (all Thumb).
+static Thunk *addThunkV6M(RelType Reloc, Symbol &S) {
+ switch (Reloc) {
+ case R_ARM_THM_JUMP19:
+ case R_ARM_THM_JUMP24:
+ case R_ARM_THM_CALL:
+ if (Config->Pic)
+ return make<ThumbV6MPILongThunk>(S);
+ return make<ThumbV6MABSLongThunk>(S);
+ }
+ fatal("relocation " + toString(Reloc) + " to " + toString(S) +
+ " not supported for Armv6-M targets");
+// Creates a thunk for Thumb-ARM interworking or branch range extension.
+static Thunk *addThunkArm(RelType Reloc, Symbol &S) {
+ // Decide which Thunk is needed based on:
+ // Available instruction set
+ // - An Arm Thunk can only be used if Arm state is available.
+ // - A Thumb Thunk can only be used if Thumb state is available.
+ // - Can only use a Thunk if it uses instructions that the Target supports.
+ // Relocation is branch or branch and link
+ // - Branch instructions cannot change state, can only select Thunk that
+ // starts in the same state as the caller.
+ // - Branch and link relocations can change state, can select Thunks from
+ // either Arm or Thumb.
+ // Position independent Thunks if we require position independent code.
+ // Handle architectures that have restrictions on the instructions that they
+ // can use in Thunks. The flags below are set by reading the BuildAttributes
+ // of the input objects. InputFiles.cpp contains the mapping from ARM
+ // architecture to flag.
+ if (!Config->ARMHasMovtMovw) {
+ if (!Config->ARMJ1J2BranchEncoding)
+ return addThunkPreArmv7(Reloc, S);
+ return addThunkV6M(Reloc, S);
+ }
+ switch (Reloc) {
+ case R_ARM_PC24:
+ case R_ARM_PLT32:
+ case R_ARM_JUMP24:
+ case R_ARM_CALL:
+ if (Config->PicThunk)
+ return make<ARMV7PILongThunk>(S);
+ return make<ARMV7ABSLongThunk>(S);
+ case R_ARM_THM_JUMP19:
+ case R_ARM_THM_JUMP24:
+ case R_ARM_THM_CALL:
+ if (Config->PicThunk)
+ return make<ThumbV7PILongThunk>(S);
+ return make<ThumbV7ABSLongThunk>(S);
+ }
+ fatal("unrecognized relocation type");
+static Thunk *addThunkMips(RelType Type, Symbol &S) {
+ if ((S.StOther & STO_MIPS_MICROMIPS) && isMipsR6())
+ return make<MicroMipsR6Thunk>(S);
+ return make<MicroMipsThunk>(S);
+ return make<MipsThunk>(S);
+static Thunk *addThunkPPC64(RelType Type, Symbol &S) {
+ assert(Type == R_PPC64_REL24 && "unexpected relocation type for thunk");
+ if (S.isInPlt())
+ return make<PPC64PltCallStub>(S);
+ if (Config->PicThunk)
+ return make<PPC64PILongBranchThunk>(S);
+ return make<PPC64PDLongBranchThunk>(S);
+Thunk *addThunk(RelType Type, Symbol &S) {
+ if (Config->EMachine == EM_AARCH64)
+ return addThunkAArch64(Type, S);
+ if (Config->EMachine == EM_ARM)
+ return addThunkArm(Type, S);
+ if (Config->EMachine == EM_MIPS)
+ return addThunkMips(Type, S);
+ if (Config->EMachine == EM_PPC64)
+ return addThunkPPC64(Type, S);
+ llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC");
+} // end namespace elf
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Thunks.h b/contrib/llvm/tools/lld/ELF/Thunks.h
new file mode 100644
index 000000000000..ed82b4d946ac
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Thunks.h
@@ -0,0 +1,71 @@
+//===- Thunks.h --------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Relocations.h"
+namespace lld {
+namespace elf {
+class Defined;
+class Symbol;
+class ThunkSection;
+// Class to describe an instance of a Thunk.
+// A Thunk is a code-sequence inserted by the linker in between a caller and
+// the callee. The relocation to the callee is redirected to the Thunk, which
+// after executing transfers control to the callee. Typical uses of Thunks
+// include transferring control from non-pi to pi and changing state on
+// targets like ARM.
+// Thunks can be created for Defined, Shared and Undefined Symbols.
+// Thunks are assigned to synthetic ThunkSections
+class Thunk {
+ Thunk(Symbol &Destination);
+ virtual ~Thunk();
+ virtual uint32_t size() = 0;
+ virtual void writeTo(uint8_t *Buf) = 0;
+ // All Thunks must define at least one symbol, known as the thunk target
+ // symbol, so that we can redirect relocations to it. The thunk may define
+ // additional symbols, but these are never targets for relocations.
+ virtual void addSymbols(ThunkSection &IS) = 0;
+ void setOffset(uint64_t Offset);
+ Defined *addSymbol(StringRef Name, uint8_t Type, uint64_t Value,
+ InputSectionBase &Section);
+ // Some Thunks must be placed immediately before their Target as they elide
+ // a branch and fall through to the first Symbol in the Target.
+ virtual InputSection *getTargetInputSection() const { return nullptr; }
+ // To reuse a Thunk the caller as identified by the Type must be
+ // compatible with it.
+ virtual bool isCompatibleWith(RelType Type) const { return true; }
+ Defined *getThunkTargetSym() const { return Syms[0]; }
+ // The alignment requirement for this Thunk, defaults to the size of the
+ // typical code section alignment.
+ Symbol &Destination;
+ llvm::SmallVector<Defined *, 3> Syms;
+ uint64_t Offset = 0;
+ uint32_t Alignment = 4;
+// For a Relocation to symbol S create a Thunk to be added to a synthetic
+// ThunkSection. At present there are implementations for ARM and Mips Thunks.
+Thunk *addThunk(RelType Type, Symbol &S);
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp
new file mode 100644
index 000000000000..5c987ca5a813
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Writer.cpp
@@ -0,0 +1,2530 @@
+//===- Writer.cpp ---------------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Writer.h"
+#include "AArch64ErrataFix.h"
+#include "CallGraphSort.h"
+#include "Config.h"
+#include "Filesystem.h"
+#include "LinkerScript.h"
+#include "MapFile.h"
+#include "OutputSections.h"
+#include "Relocations.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/Threads.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <climits>
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::object;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::elf;
+namespace {
+// The writer writes a SymbolTable result to a file.
+template <class ELFT> class Writer {
+ Writer() : Buffer(errorHandler().OutputBuffer) {}
+ typedef typename ELFT::Shdr Elf_Shdr;
+ typedef typename ELFT::Ehdr Elf_Ehdr;
+ typedef typename ELFT::Phdr Elf_Phdr;
+ void run();
+ void copyLocalSymbols();
+ void addSectionSymbols();
+ void forEachRelSec(llvm::function_ref<void(InputSectionBase &)> Fn);
+ void sortSections();
+ void resolveShfLinkOrder();
+ void maybeAddThunks();
+ void sortInputSections();
+ void finalizeSections();
+ void checkExecuteOnly();
+ void setReservedSymbolSections();
+ std::vector<PhdrEntry *> createPhdrs();
+ void removeEmptyPTLoad();
+ void addPtArmExid(std::vector<PhdrEntry *> &Phdrs);
+ void assignFileOffsets();
+ void assignFileOffsetsBinary();
+ void setPhdrs();
+ void checkSections();
+ void fixSectionAlignments();
+ void openFile();
+ void writeTrapInstr();
+ void writeHeader();
+ void writeSections();
+ void writeSectionsBinary();
+ void writeBuildId();
+ std::unique_ptr<FileOutputBuffer> &Buffer;
+ void addRelIpltSymbols();
+ void addStartEndSymbols();
+ void addStartStopSymbols(OutputSection *Sec);
+ std::vector<PhdrEntry *> Phdrs;
+ uint64_t FileSize;
+ uint64_t SectionHeaderOff;
+} // anonymous namespace
+static bool isSectionPrefix(StringRef Prefix, StringRef Name) {
+ return Name.startswith(Prefix) || Name == Prefix.drop_back();
+StringRef elf::getOutputSectionName(const InputSectionBase *S) {
+ if (Config->Relocatable)
+ return S->Name;
+ // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want
+ // to emit .rela.text.foo as .rela.text.bar for consistency (this is not
+ // technically required, but not doing it is odd). This code guarantees that.
+ if (auto *IS = dyn_cast<InputSection>(S)) {
+ if (InputSectionBase *Rel = IS->getRelocatedSection()) {
+ OutputSection *Out = Rel->getOutputSection();
+ if (S->Type == SHT_RELA)
+ return Saver.save(".rela" + Out->Name);
+ return Saver.save(".rel" + Out->Name);
+ }
+ }
+ // This check is for -z keep-text-section-prefix. This option separates text
+ // sections with prefix ".text.hot", ".text.unlikely", ".text.startup" or
+ // ".text.exit".
+ // When enabled, this allows identifying the hot code region (.text.hot) in
+ // the final binary which can be selectively mapped to huge pages or mlocked,
+ // for instance.
+ if (Config->ZKeepTextSectionPrefix)
+ for (StringRef V :
+ {".text.hot.", ".text.unlikely.", ".text.startup.", ".text.exit."})
+ if (isSectionPrefix(V, S->Name))
+ return V.drop_back();
+ for (StringRef V :
+ {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.",
+ ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.",
+ ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."})
+ if (isSectionPrefix(V, S->Name))
+ return V.drop_back();
+ // CommonSection is identified as "COMMON" in linker scripts.
+ // By default, it should go to .bss section.
+ if (S->Name == "COMMON")
+ return ".bss";
+ return S->Name;
+static bool needsInterpSection() {
+ return !SharedFiles.empty() && !Config->DynamicLinker.empty() &&
+ Script->needsInterpSection();
+template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); }
+template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() {
+ llvm::erase_if(Phdrs, [&](const PhdrEntry *P) {
+ if (P->p_type != PT_LOAD)
+ return false;
+ if (!P->FirstSec)
+ return true;
+ uint64_t Size = P->LastSec->Addr + P->LastSec->Size - P->FirstSec->Addr;
+ return Size == 0;
+ });
+template <class ELFT> static void combineEhFrameSections() {
+ for (InputSectionBase *&S : InputSections) {
+ EhInputSection *ES = dyn_cast<EhInputSection>(S);
+ if (!ES || !ES->Live)
+ continue;
+ In.EhFrame->addSection<ELFT>(ES);
+ S = nullptr;
+ }
+ std::vector<InputSectionBase *> &V = InputSections;
+ V.erase(std::remove(V.begin(), V.end(), nullptr), V.end());
+static Defined *addOptionalRegular(StringRef Name, SectionBase *Sec,
+ uint64_t Val, uint8_t StOther = STV_HIDDEN,
+ uint8_t Binding = STB_GLOBAL) {
+ Symbol *S = Symtab->find(Name);
+ if (!S || S->isDefined())
+ return nullptr;
+ return Symtab->addDefined(Name, StOther, STT_NOTYPE, Val,
+ /*Size=*/0, Binding, Sec,
+ /*File=*/nullptr);
+static Defined *addAbsolute(StringRef Name) {
+ return Symtab->addDefined(Name, STV_HIDDEN, STT_NOTYPE, 0, 0, STB_GLOBAL,
+ nullptr, nullptr);
+// The linker is expected to define some symbols depending on
+// the linking result. This function defines such symbols.
+void elf::addReservedSymbols() {
+ if (Config->EMachine == EM_MIPS) {
+ // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer
+ // so that it points to an absolute address which by default is relative
+ // to GOT. Default offset is 0x7ff0.
+ // See "Global Data Symbols" in Chapter 6 in the following document:
+ // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+ ElfSym::MipsGp = addAbsolute("_gp");
+ // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between
+ // start of function and 'gp' pointer into GOT.
+ if (Symtab->find("_gp_disp"))
+ ElfSym::MipsGpDisp = addAbsolute("_gp_disp");
+ // The __gnu_local_gp is a magic symbol equal to the current value of 'gp'
+ // pointer. This symbol is used in the code generated by .cpload pseudo-op
+ // in case of using -mno-shared option.
+ // https://sourceware.org/ml/binutils/2004-12/msg00094.html
+ if (Symtab->find("__gnu_local_gp"))
+ ElfSym::MipsLocalGp = addAbsolute("__gnu_local_gp");
+ }
+ // The Power Architecture 64-bit v2 ABI defines a TableOfContents (TOC) which
+ // combines the typical ELF GOT with the small data sections. It commonly
+ // includes .got .toc .sdata .sbss. The .TOC. symbol replaces both
+ // _GLOBAL_OFFSET_TABLE_ and _SDA_BASE_ from the 32-bit ABI. It is used to
+ // represent the TOC base which is offset by 0x8000 bytes from the start of
+ // the .got section.
+ // We do not allow _GLOBAL_OFFSET_TABLE_ to be defined by input objects as the
+ // correctness of some relocations depends on its value.
+ StringRef GotTableSymName =
+ (Config->EMachine == EM_PPC64) ? ".TOC." : "_GLOBAL_OFFSET_TABLE_";
+ if (Symbol *S = Symtab->find(GotTableSymName)) {
+ if (S->isDefined())
+ error(toString(S->File) + " cannot redefine linker defined symbol '" +
+ GotTableSymName + "'");
+ else
+ ElfSym::GlobalOffsetTable = Symtab->addDefined(
+ GotTableSymName, STV_HIDDEN, STT_NOTYPE, Target->GotBaseSymOff,
+ /*Size=*/0, STB_GLOBAL, Out::ElfHeader,
+ /*File=*/nullptr);
+ }
+ // __ehdr_start is the location of ELF file headers. Note that we define
+ // this symbol unconditionally even when using a linker script, which
+ // differs from the behavior implemented by GNU linker which only define
+ // this symbol if ELF headers are in the memory mapped segment.
+ addOptionalRegular("__ehdr_start", Out::ElfHeader, 0, STV_HIDDEN);
+ // __executable_start is not documented, but the expectation of at
+ // least the Android libc is that it points to the ELF header.
+ addOptionalRegular("__executable_start", Out::ElfHeader, 0, STV_HIDDEN);
+ // __dso_handle symbol is passed to cxa_finalize as a marker to identify
+ // each DSO. The address of the symbol doesn't matter as long as they are
+ // different in different DSOs, so we chose the start address of the DSO.
+ addOptionalRegular("__dso_handle", Out::ElfHeader, 0, STV_HIDDEN);
+ // If linker script do layout we do not need to create any standart symbols.
+ if (Script->HasSectionsCommand)
+ return;
+ auto Add = [](StringRef S, int64_t Pos) {
+ return addOptionalRegular(S, Out::ElfHeader, Pos, STV_DEFAULT);
+ };
+ ElfSym::Bss = Add("__bss_start", 0);
+ ElfSym::End1 = Add("end", -1);
+ ElfSym::End2 = Add("_end", -1);
+ ElfSym::Etext1 = Add("etext", -1);
+ ElfSym::Etext2 = Add("_etext", -1);
+ ElfSym::Edata1 = Add("edata", -1);
+ ElfSym::Edata2 = Add("_edata", -1);
+static OutputSection *findSection(StringRef Name) {
+ for (BaseCommand *Base : Script->SectionCommands)
+ if (auto *Sec = dyn_cast<OutputSection>(Base))
+ if (Sec->Name == Name)
+ return Sec;
+ return nullptr;
+// Initialize Out members.
+template <class ELFT> static void createSyntheticSections() {
+ // Initialize all pointers with NULL. This is needed because
+ // you can call lld::elf::main more than once as a library.
+ memset(&Out::First, 0, sizeof(Out));
+ auto Add = [](InputSectionBase *Sec) { InputSections.push_back(Sec); };
+ In.DynStrTab = make<StringTableSection>(".dynstr", true);
+ In.Dynamic = make<DynamicSection<ELFT>>();
+ if (Config->AndroidPackDynRelocs) {
+ In.RelaDyn = make<AndroidPackedRelocationSection<ELFT>>(
+ Config->IsRela ? ".rela.dyn" : ".rel.dyn");
+ } else {
+ In.RelaDyn = make<RelocationSection<ELFT>>(
+ Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc);
+ }
+ In.ShStrTab = make<StringTableSection>(".shstrtab", false);
+ Out::ProgramHeaders = make<OutputSection>("", 0, SHF_ALLOC);
+ Out::ProgramHeaders->Alignment = Config->Wordsize;
+ if (needsInterpSection()) {
+ In.Interp = createInterpSection();
+ Add(In.Interp);
+ }
+ if (Config->Strip != StripPolicy::All) {
+ In.StrTab = make<StringTableSection>(".strtab", false);
+ In.SymTab = make<SymbolTableSection<ELFT>>(*In.StrTab);
+ In.SymTabShndx = make<SymtabShndxSection>();
+ }
+ if (Config->BuildId != BuildIdKind::None) {
+ In.BuildId = make<BuildIdSection>();
+ Add(In.BuildId);
+ }
+ In.Bss = make<BssSection>(".bss", 0, 1);
+ Add(In.Bss);
+ // If there is a SECTIONS command and a .data.rel.ro section name use name
+ // .data.rel.ro.bss so that we match in the .data.rel.ro output section.
+ // This makes sure our relro is contiguous.
+ bool HasDataRelRo = Script->HasSectionsCommand && findSection(".data.rel.ro");
+ In.BssRelRo =
+ make<BssSection>(HasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1);
+ Add(In.BssRelRo);
+ // Add MIPS-specific sections.
+ if (Config->EMachine == EM_MIPS) {
+ if (!Config->Shared && Config->HasDynSymTab) {
+ In.MipsRldMap = make<MipsRldMapSection>();
+ Add(In.MipsRldMap);
+ }
+ if (auto *Sec = MipsAbiFlagsSection<ELFT>::create())
+ Add(Sec);
+ if (auto *Sec = MipsOptionsSection<ELFT>::create())
+ Add(Sec);
+ if (auto *Sec = MipsReginfoSection<ELFT>::create())
+ Add(Sec);
+ }
+ if (Config->HasDynSymTab) {
+ In.DynSymTab = make<SymbolTableSection<ELFT>>(*In.DynStrTab);
+ Add(In.DynSymTab);
+ InX<ELFT>::VerSym = make<VersionTableSection<ELFT>>();
+ Add(InX<ELFT>::VerSym);
+ if (!Config->VersionDefinitions.empty()) {
+ In.VerDef = make<VersionDefinitionSection>();
+ Add(In.VerDef);
+ }
+ InX<ELFT>::VerNeed = make<VersionNeedSection<ELFT>>();
+ Add(InX<ELFT>::VerNeed);
+ if (Config->GnuHash) {
+ In.GnuHashTab = make<GnuHashTableSection>();
+ Add(In.GnuHashTab);
+ }
+ if (Config->SysvHash) {
+ In.HashTab = make<HashTableSection>();
+ Add(In.HashTab);
+ }
+ Add(In.Dynamic);
+ Add(In.DynStrTab);
+ Add(In.RelaDyn);
+ }
+ if (Config->RelrPackDynRelocs) {
+ In.RelrDyn = make<RelrSection<ELFT>>();
+ Add(In.RelrDyn);
+ }
+ // Add .got. MIPS' .got is so different from the other archs,
+ // it has its own class.
+ if (Config->EMachine == EM_MIPS) {
+ In.MipsGot = make<MipsGotSection>();
+ Add(In.MipsGot);
+ } else {
+ In.Got = make<GotSection>();
+ Add(In.Got);
+ }
+ if (Config->EMachine == EM_PPC64) {
+ In.PPC64LongBranchTarget = make<PPC64LongBranchTargetSection>();
+ Add(In.PPC64LongBranchTarget);
+ }
+ In.GotPlt = make<GotPltSection>();
+ Add(In.GotPlt);
+ In.IgotPlt = make<IgotPltSection>();
+ Add(In.IgotPlt);
+ if (Config->GdbIndex) {
+ In.GdbIndex = GdbIndexSection::create<ELFT>();
+ Add(In.GdbIndex);
+ }
+ // We always need to add rel[a].plt to output if it has entries.
+ // Even for static linking it can contain R_[*]_IRELATIVE relocations.
+ In.RelaPlt = make<RelocationSection<ELFT>>(
+ Config->IsRela ? ".rela.plt" : ".rel.plt", false /*Sort*/);
+ Add(In.RelaPlt);
+ // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure
+ // that the IRelative relocations are processed last by the dynamic loader.
+ // We cannot place the iplt section in .rel.dyn when Android relocation
+ // packing is enabled because that would cause a section type mismatch.
+ // However, because the Android dynamic loader reads .rel.plt after .rel.dyn,
+ // we can get the desired behaviour by placing the iplt section in .rel.plt.
+ In.RelaIplt = make<RelocationSection<ELFT>>(
+ (Config->EMachine == EM_ARM && !Config->AndroidPackDynRelocs)
+ ? ".rel.dyn"
+ : In.RelaPlt->Name,
+ false /*Sort*/);
+ Add(In.RelaIplt);
+ In.Plt = make<PltSection>(false);
+ Add(In.Plt);
+ In.Iplt = make<PltSection>(true);
+ Add(In.Iplt);
+ // .note.GNU-stack is always added when we are creating a re-linkable
+ // object file. Other linkers are using the presence of this marker
+ // section to control the executable-ness of the stack area, but that
+ // is irrelevant these days. Stack area should always be non-executable
+ // by default. So we emit this section unconditionally.
+ if (Config->Relocatable)
+ Add(make<GnuStackSection>());
+ if (!Config->Relocatable) {
+ if (Config->EhFrameHdr) {
+ In.EhFrameHdr = make<EhFrameHeader>();
+ Add(In.EhFrameHdr);
+ }
+ In.EhFrame = make<EhFrameSection>();
+ Add(In.EhFrame);
+ }
+ if (In.SymTab)
+ Add(In.SymTab);
+ if (In.SymTabShndx)
+ Add(In.SymTabShndx);
+ Add(In.ShStrTab);
+ if (In.StrTab)
+ Add(In.StrTab);
+ if (Config->EMachine == EM_ARM && !Config->Relocatable)
+ // Add a sentinel to terminate .ARM.exidx. It helps an unwinder
+ // to find the exact address range of the last entry.
+ Add(make<ARMExidxSentinelSection>());
+// The main function of the writer.
+template <class ELFT> void Writer<ELFT>::run() {
+ // Create linker-synthesized sections such as .got or .plt.
+ // Such sections are of type input section.
+ createSyntheticSections<ELFT>();
+ if (!Config->Relocatable)
+ combineEhFrameSections<ELFT>();
+ // We want to process linker script commands. When SECTIONS command
+ // is given we let it create sections.
+ Script->processSectionCommands();
+ // Linker scripts controls how input sections are assigned to output sections.
+ // Input sections that were not handled by scripts are called "orphans", and
+ // they are assigned to output sections by the default rule. Process that.
+ Script->addOrphanSections();
+ if (Config->Discard != DiscardPolicy::All)
+ copyLocalSymbols();
+ if (Config->CopyRelocs)
+ addSectionSymbols();
+ // Now that we have a complete set of output sections. This function
+ // completes section contents. For example, we need to add strings
+ // to the string table, and add entries to .got and .plt.
+ // finalizeSections does that.
+ finalizeSections();
+ checkExecuteOnly();
+ if (errorCount())
+ return;
+ Script->assignAddresses();
+ // If -compressed-debug-sections is specified, we need to compress
+ // .debug_* sections. Do it right now because it changes the size of
+ // output sections.
+ for (OutputSection *Sec : OutputSections)
+ Sec->maybeCompress<ELFT>();
+ Script->allocateHeaders(Phdrs);
+ // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a
+ // 0 sized region. This has to be done late since only after assignAddresses
+ // we know the size of the sections.
+ removeEmptyPTLoad();
+ if (!Config->OFormatBinary)
+ assignFileOffsets();
+ else
+ assignFileOffsetsBinary();
+ setPhdrs();
+ if (Config->Relocatable)
+ for (OutputSection *Sec : OutputSections)
+ Sec->Addr = 0;
+ if (Config->CheckSections)
+ checkSections();
+ // It does not make sense try to open the file if we have error already.
+ if (errorCount())
+ return;
+ // Write the result down to a file.
+ openFile();
+ if (errorCount())
+ return;
+ if (!Config->OFormatBinary) {
+ writeTrapInstr();
+ writeHeader();
+ writeSections();
+ } else {
+ writeSectionsBinary();
+ }
+ // Backfill .note.gnu.build-id section content. This is done at last
+ // because the content is usually a hash value of the entire output file.
+ writeBuildId();
+ if (errorCount())
+ return;
+ // Handle -Map and -cref options.
+ writeMapFile();
+ writeCrossReferenceTable();
+ if (errorCount())
+ return;
+ if (auto E = Buffer->commit())
+ error("failed to write to the output file: " + toString(std::move(E)));
+static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName,
+ const Symbol &B) {
+ if (B.isSection())
+ return false;
+ if (Config->Discard == DiscardPolicy::None)
+ return true;
+ // In ELF assembly .L symbols are normally discarded by the assembler.
+ // If the assembler fails to do so, the linker discards them if
+ // * --discard-locals is used.
+ // * The symbol is in a SHF_MERGE section, which is normally the reason for
+ // the assembler keeping the .L symbol.
+ if (!SymName.startswith(".L") && !SymName.empty())
+ return true;
+ if (Config->Discard == DiscardPolicy::Locals)
+ return false;
+ return !Sec || !(Sec->Flags & SHF_MERGE);
+static bool includeInSymtab(const Symbol &B) {
+ if (!B.isLocal() && !B.IsUsedInRegularObj)
+ return false;
+ if (auto *D = dyn_cast<Defined>(&B)) {
+ // Always include absolute symbols.
+ SectionBase *Sec = D->Section;
+ if (!Sec)
+ return true;
+ Sec = Sec->Repl;
+ // Exclude symbols pointing to garbage-collected sections.
+ if (isa<InputSectionBase>(Sec) && !Sec->Live)
+ return false;
+ if (auto *S = dyn_cast<MergeInputSection>(Sec))
+ if (!S->getSectionPiece(D->Value)->Live)
+ return false;
+ return true;
+ }
+ return B.Used;
+// Local symbols are not in the linker's symbol table. This function scans
+// each object file's symbol table to copy local symbols to the output.
+template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
+ if (!In.SymTab)
+ return;
+ for (InputFile *File : ObjectFiles) {
+ ObjFile<ELFT> *F = cast<ObjFile<ELFT>>(File);
+ for (Symbol *B : F->getLocalSymbols()) {
+ if (!B->isLocal())
+ fatal(toString(F) +
+ ": broken object: getLocalSymbols returns a non-local symbol");
+ auto *DR = dyn_cast<Defined>(B);
+ // No reason to keep local undefined symbol in symtab.
+ if (!DR)
+ continue;
+ if (!includeInSymtab(*B))
+ continue;
+ SectionBase *Sec = DR->Section;
+ if (!shouldKeepInSymtab(Sec, B->getName(), *B))
+ continue;
+ In.SymTab->addSymbol(B);
+ }
+ }
+// Create a section symbol for each output section so that we can represent
+// relocations that point to the section. If we know that no relocation is
+// referring to a section (that happens if the section is a synthetic one), we
+// don't create a section symbol for that section.
+template <class ELFT> void Writer<ELFT>::addSectionSymbols() {
+ for (BaseCommand *Base : Script->SectionCommands) {
+ auto *Sec = dyn_cast<OutputSection>(Base);
+ if (!Sec)
+ continue;
+ auto I = llvm::find_if(Sec->SectionCommands, [](BaseCommand *Base) {
+ if (auto *ISD = dyn_cast<InputSectionDescription>(Base))
+ return !ISD->Sections.empty();
+ return false;
+ });
+ if (I == Sec->SectionCommands.end())
+ continue;
+ InputSection *IS = cast<InputSectionDescription>(*I)->Sections[0];
+ // Relocations are not using REL[A] section symbols.
+ if (IS->Type == SHT_REL || IS->Type == SHT_RELA)
+ continue;
+ // Unlike other synthetic sections, mergeable output sections contain data
+ // copied from input sections, and there may be a relocation pointing to its
+ // contents if -r or -emit-reloc are given.
+ if (isa<SyntheticSection>(IS) && !(IS->Flags & SHF_MERGE))
+ continue;
+ auto *Sym =
+ make<Defined>(IS->File, "", STB_LOCAL, /*StOther=*/0, STT_SECTION,
+ /*Value=*/0, /*Size=*/0, IS);
+ In.SymTab->addSymbol(Sym);
+ }
+// Today's loaders have a feature to make segments read-only after
+// processing dynamic relocations to enhance security. PT_GNU_RELRO
+// is defined for that.
+// This function returns true if a section needs to be put into a
+// PT_GNU_RELRO segment.
+static bool isRelroSection(const OutputSection *Sec) {
+ if (!Config->ZRelro)
+ return false;
+ uint64_t Flags = Sec->Flags;
+ // Non-allocatable or non-writable sections don't need RELRO because
+ // they are not writable or not even mapped to memory in the first place.
+ // RELRO is for sections that are essentially read-only but need to
+ // be writable only at process startup to allow dynamic linker to
+ // apply relocations.
+ if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE))
+ return false;
+ // Once initialized, TLS data segments are used as data templates
+ // for a thread-local storage. For each new thread, runtime
+ // allocates memory for a TLS and copy templates there. No thread
+ // are supposed to use templates directly. Thus, it can be in RELRO.
+ if (Flags & SHF_TLS)
+ return true;
+ // .init_array, .preinit_array and .fini_array contain pointers to
+ // functions that are executed on process startup or exit. These
+ // pointers are set by the static linker, and they are not expected
+ // to change at runtime. But if you are an attacker, you could do
+ // interesting things by manipulating pointers in .fini_array, for
+ // example. So they are put into RELRO.
+ uint32_t Type = Sec->Type;
+ if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY ||
+ return true;
+ // .got contains pointers to external symbols. They are resolved by
+ // the dynamic linker when a module is loaded into memory, and after
+ // that they are not expected to change. So, it can be in RELRO.
+ if (In.Got && Sec == In.Got->getParent())
+ return true;
+ // .toc is a GOT-ish section for PowerPC64. Their contents are accessed
+ // through r2 register, which is reserved for that purpose. Since r2 is used
+ // for accessing .got as well, .got and .toc need to be close enough in the
+ // virtual address space. Usually, .toc comes just after .got. Since we place
+ // .got into RELRO, .toc needs to be placed into RELRO too.
+ if (Sec->Name.equals(".toc"))
+ return true;
+ // .got.plt contains pointers to external function symbols. They are
+ // by default resolved lazily, so we usually cannot put it into RELRO.
+ // However, if "-z now" is given, the lazy symbol resolution is
+ // disabled, which enables us to put it into RELRO.
+ if (Sec == In.GotPlt->getParent())
+ return Config->ZNow;
+ // .dynamic section contains data for the dynamic linker, and
+ // there's no need to write to it at runtime, so it's better to put
+ // it into RELRO.
+ if (Sec == In.Dynamic->getParent())
+ return true;
+ // Sections with some special names are put into RELRO. This is a
+ // bit unfortunate because section names shouldn't be significant in
+ // ELF in spirit. But in reality many linker features depend on
+ // magic section names.
+ StringRef S = Sec->Name;
+ return S == ".data.rel.ro" || S == ".bss.rel.ro" || S == ".ctors" ||
+ S == ".dtors" || S == ".jcr" || S == ".eh_frame" ||
+ S == ".openbsd.randomdata";
+// We compute a rank for each section. The rank indicates where the
+// section should be placed in the file. Instead of using simple
+// numbers (0,1,2...), we use a series of flags. One for each decision
+// point when placing the section.
+// Using flags has two key properties:
+// * It is easy to check if a give branch was taken.
+// * It is easy two see how similar two ranks are (see getRankProximity).
+enum RankFlags {
+ RF_NOT_ADDR_SET = 1 << 18,
+ RF_NOT_ALLOC = 1 << 17,
+ RF_NOT_INTERP = 1 << 16,
+ RF_NOT_NOTE = 1 << 15,
+ RF_WRITE = 1 << 14,
+ RF_EXEC_WRITE = 1 << 13,
+ RF_EXEC = 1 << 12,
+ RF_RODATA = 1 << 11,
+ RF_NON_TLS_BSS = 1 << 10,
+ RF_NON_TLS_BSS_RO = 1 << 9,
+ RF_NOT_TLS = 1 << 8,
+ RF_BSS = 1 << 7,
+ RF_PPC_NOT_TOCBSS = 1 << 6,
+ RF_PPC_TOCL = 1 << 5,
+ RF_PPC_TOC = 1 << 4,
+ RF_PPC_GOT = 1 << 3,
+ RF_PPC_BRANCH_LT = 1 << 2,
+ RF_MIPS_GPREL = 1 << 1,
+ RF_MIPS_NOT_GOT = 1 << 0
+static unsigned getSectionRank(const OutputSection *Sec) {
+ unsigned Rank = 0;
+ // We want to put section specified by -T option first, so we
+ // can start assigning VA starting from them later.
+ if (Config->SectionStartMap.count(Sec->Name))
+ return Rank;
+ Rank |= RF_NOT_ADDR_SET;
+ // Allocatable sections go first to reduce the total PT_LOAD size and
+ // so debug info doesn't change addresses in actual code.
+ if (!(Sec->Flags & SHF_ALLOC))
+ return Rank | RF_NOT_ALLOC;
+ // Put .interp first because some loaders want to see that section
+ // on the first page of the executable file when loaded into memory.
+ if (Sec->Name == ".interp")
+ return Rank;
+ Rank |= RF_NOT_INTERP;
+ // Put .note sections (which make up one PT_NOTE) at the beginning so that
+ // they are likely to be included in a core file even if core file size is
+ // limited. In particular, we want a .note.gnu.build-id and a .note.tag to be
+ // included in a core to match core files with executables.
+ if (Sec->Type == SHT_NOTE)
+ return Rank;
+ Rank |= RF_NOT_NOTE;
+ // Sort sections based on their access permission in the following
+ // order: R, RX, RWX, RW. This order is based on the following
+ // considerations:
+ // * Read-only sections come first such that they go in the
+ // PT_LOAD covering the program headers at the start of the file.
+ // * Read-only, executable sections come next.
+ // * Writable, executable sections follow such that .plt on
+ // architectures where it needs to be writable will be placed
+ // between .text and .data.
+ // * Writable sections come last, such that .bss lands at the very
+ // end of the last PT_LOAD.
+ bool IsExec = Sec->Flags & SHF_EXECINSTR;
+ bool IsWrite = Sec->Flags & SHF_WRITE;
+ if (IsExec) {
+ if (IsWrite)
+ Rank |= RF_EXEC_WRITE;
+ else
+ Rank |= RF_EXEC;
+ } else if (IsWrite) {
+ Rank |= RF_WRITE;
+ } else if (Sec->Type == SHT_PROGBITS) {
+ // Make non-executable and non-writable PROGBITS sections (e.g .rodata
+ // .eh_frame) closer to .text. They likely contain PC or GOT relative
+ // relocations and there could be relocation overflow if other huge sections
+ // (.dynstr .dynsym) were placed in between.
+ Rank |= RF_RODATA;
+ }
+ // If we got here we know that both A and B are in the same PT_LOAD.
+ bool IsTls = Sec->Flags & SHF_TLS;
+ bool IsNoBits = Sec->Type == SHT_NOBITS;
+ // The first requirement we have is to put (non-TLS) nobits sections last. The
+ // reason is that the only thing the dynamic linker will see about them is a
+ // p_memsz that is larger than p_filesz. Seeing that it zeros the end of the
+ // PT_LOAD, so that has to correspond to the nobits sections.
+ bool IsNonTlsNoBits = IsNoBits && !IsTls;
+ if (IsNonTlsNoBits)
+ Rank |= RF_NON_TLS_BSS;
+ // We place nobits RelRo sections before plain r/w ones, and non-nobits RelRo
+ // sections after r/w ones, so that the RelRo sections are contiguous.
+ bool IsRelRo = isRelroSection(Sec);
+ if (IsNonTlsNoBits && !IsRelRo)
+ Rank |= RF_NON_TLS_BSS_RO;
+ if (!IsNonTlsNoBits && IsRelRo)
+ Rank |= RF_NON_TLS_BSS_RO;
+ // The TLS initialization block needs to be a single contiguous block in a R/W
+ // PT_LOAD, so stick TLS sections directly before the other RelRo R/W
+ // sections. The TLS NOBITS sections are placed here as they don't take up
+ // virtual address space in the PT_LOAD.
+ if (!IsTls)
+ Rank |= RF_NOT_TLS;
+ // Within the TLS initialization block, the non-nobits sections need to appear
+ // first.
+ if (IsNoBits)
+ Rank |= RF_BSS;
+ // Some architectures have additional ordering restrictions for sections
+ // within the same PT_LOAD.
+ if (Config->EMachine == EM_PPC64) {
+ // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections
+ // that we would like to make sure appear is a specific order to maximize
+ // their coverage by a single signed 16-bit offset from the TOC base
+ // pointer. Conversely, the special .tocbss section should be first among
+ // all SHT_NOBITS sections. This will put it next to the loaded special
+ // PPC64 sections (and, thus, within reach of the TOC base pointer).
+ StringRef Name = Sec->Name;
+ if (Name != ".tocbss")
+ if (Name == ".toc1")
+ Rank |= RF_PPC_TOCL;
+ if (Name == ".toc")
+ Rank |= RF_PPC_TOC;
+ if (Name == ".got")
+ Rank |= RF_PPC_GOT;
+ if (Name == ".branch_lt")
+ }
+ if (Config->EMachine == EM_MIPS) {
+ // All sections with SHF_MIPS_GPREL flag should be grouped together
+ // because data in these sections is addressable with a gp relative address.
+ if (Sec->Flags & SHF_MIPS_GPREL)
+ Rank |= RF_MIPS_GPREL;
+ if (Sec->Name != ".got")
+ Rank |= RF_MIPS_NOT_GOT;
+ }
+ return Rank;
+static bool compareSections(const BaseCommand *ACmd, const BaseCommand *BCmd) {
+ const OutputSection *A = cast<OutputSection>(ACmd);
+ const OutputSection *B = cast<OutputSection>(BCmd);
+ if (A->SortRank != B->SortRank)
+ return A->SortRank < B->SortRank;
+ if (!(A->SortRank & RF_NOT_ADDR_SET))
+ return Config->SectionStartMap.lookup(A->Name) <
+ Config->SectionStartMap.lookup(B->Name);
+ return false;
+void PhdrEntry::add(OutputSection *Sec) {
+ LastSec = Sec;
+ if (!FirstSec)
+ FirstSec = Sec;
+ p_align = std::max(p_align, Sec->Alignment);
+ if (p_type == PT_LOAD)
+ Sec->PtLoad = this;
+// The beginning and the ending of .rel[a].plt section are marked
+// with __rel[a]_iplt_{start,end} symbols if it is a statically linked
+// executable. The runtime needs these symbols in order to resolve
+// all IRELATIVE relocs on startup. For dynamic executables, we don't
+// need these symbols, since IRELATIVE relocs are resolved through GOT
+// and PLT. For details, see http://www.airs.com/blog/archives/403.
+template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() {
+ if (Config->Relocatable || needsInterpSection())
+ return;
+ // By default, __rela_iplt_{start,end} belong to a dummy section 0
+ // because .rela.plt might be empty and thus removed from output.
+ // We'll override Out::ElfHeader with In.RelaIplt later when we are
+ // sure that .rela.plt exists in output.
+ ElfSym::RelaIpltStart = addOptionalRegular(
+ Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start",
+ Out::ElfHeader, 0, STV_HIDDEN, STB_WEAK);
+ ElfSym::RelaIpltEnd = addOptionalRegular(
+ Config->IsRela ? "__rela_iplt_end" : "__rel_iplt_end",
+ Out::ElfHeader, 0, STV_HIDDEN, STB_WEAK);
+template <class ELFT>
+void Writer<ELFT>::forEachRelSec(
+ llvm::function_ref<void(InputSectionBase &)> Fn) {
+ // Scan all relocations. Each relocation goes through a series
+ // of tests to determine if it needs special treatment, such as
+ // creating GOT, PLT, copy relocations, etc.
+ // Note that relocations for non-alloc sections are directly
+ // processed by InputSection::relocateNonAlloc.
+ for (InputSectionBase *IS : InputSections)
+ if (IS->Live && isa<InputSection>(IS) && (IS->Flags & SHF_ALLOC))
+ Fn(*IS);
+ for (EhInputSection *ES : In.EhFrame->Sections)
+ Fn(*ES);
+// This function generates assignments for predefined symbols (e.g. _end or
+// _etext) and inserts them into the commands sequence to be processed at the
+// appropriate time. This ensures that the value is going to be correct by the
+// time any references to these symbols are processed and is equivalent to
+// defining these symbols explicitly in the linker script.
+template <class ELFT> void Writer<ELFT>::setReservedSymbolSections() {
+ if (ElfSym::GlobalOffsetTable) {
+ // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually
+ // to the start of the .got or .got.plt section.
+ InputSection *GotSection = In.GotPlt;
+ if (!Target->GotBaseSymInGotPlt)
+ GotSection = In.MipsGot ? cast<InputSection>(In.MipsGot)
+ : cast<InputSection>(In.Got);
+ ElfSym::GlobalOffsetTable->Section = GotSection;
+ }
+ // .rela_iplt_{start,end} mark the start and the end of .rela.plt section.
+ if (ElfSym::RelaIpltStart && !In.RelaIplt->empty()) {
+ ElfSym::RelaIpltStart->Section = In.RelaIplt;
+ ElfSym::RelaIpltEnd->Section = In.RelaIplt;
+ ElfSym::RelaIpltEnd->Value = In.RelaIplt->getSize();
+ }
+ PhdrEntry *Last = nullptr;
+ PhdrEntry *LastRO = nullptr;
+ for (PhdrEntry *P : Phdrs) {
+ if (P->p_type != PT_LOAD)
+ continue;
+ Last = P;
+ if (!(P->p_flags & PF_W))
+ LastRO = P;
+ }
+ if (LastRO) {
+ // _etext is the first location after the last read-only loadable segment.
+ if (ElfSym::Etext1)
+ ElfSym::Etext1->Section = LastRO->LastSec;
+ if (ElfSym::Etext2)
+ ElfSym::Etext2->Section = LastRO->LastSec;
+ }
+ if (Last) {
+ // _edata points to the end of the last mapped initialized section.
+ OutputSection *Edata = nullptr;
+ for (OutputSection *OS : OutputSections) {
+ if (OS->Type != SHT_NOBITS)
+ Edata = OS;
+ if (OS == Last->LastSec)
+ break;
+ }
+ if (ElfSym::Edata1)
+ ElfSym::Edata1->Section = Edata;
+ if (ElfSym::Edata2)
+ ElfSym::Edata2->Section = Edata;
+ // _end is the first location after the uninitialized data region.
+ if (ElfSym::End1)
+ ElfSym::End1->Section = Last->LastSec;
+ if (ElfSym::End2)
+ ElfSym::End2->Section = Last->LastSec;
+ }
+ if (ElfSym::Bss)
+ ElfSym::Bss->Section = findSection(".bss");
+ // Setup MIPS _gp_disp/__gnu_local_gp symbols which should
+ // be equal to the _gp symbol's value.
+ if (ElfSym::MipsGp) {
+ // Find GP-relative section with the lowest address
+ // and use this address to calculate default _gp value.
+ for (OutputSection *OS : OutputSections) {
+ if (OS->Flags & SHF_MIPS_GPREL) {
+ ElfSym::MipsGp->Section = OS;
+ ElfSym::MipsGp->Value = 0x7ff0;
+ break;
+ }
+ }
+ }
+// We want to find how similar two ranks are.
+// The more branches in getSectionRank that match, the more similar they are.
+// Since each branch corresponds to a bit flag, we can just use
+// countLeadingZeros.
+static int getRankProximityAux(OutputSection *A, OutputSection *B) {
+ return countLeadingZeros(A->SortRank ^ B->SortRank);
+static int getRankProximity(OutputSection *A, BaseCommand *B) {
+ if (auto *Sec = dyn_cast<OutputSection>(B))
+ return getRankProximityAux(A, Sec);
+ return -1;
+// When placing orphan sections, we want to place them after symbol assignments
+// so that an orphan after
+// begin_foo = .;
+// foo : { *(foo) }
+// end_foo = .;
+// doesn't break the intended meaning of the begin/end symbols.
+// We don't want to go over sections since findOrphanPos is the
+// one in charge of deciding the order of the sections.
+// We don't want to go over changes to '.', since doing so in
+// rx_sec : { *(rx_sec) }
+// . = ALIGN(0x1000);
+// /* The RW PT_LOAD starts here*/
+// rw_sec : { *(rw_sec) }
+// would mean that the RW PT_LOAD would become unaligned.
+static bool shouldSkip(BaseCommand *Cmd) {
+ if (auto *Assign = dyn_cast<SymbolAssignment>(Cmd))
+ return Assign->Name != ".";
+ return false;
+// We want to place orphan sections so that they share as much
+// characteristics with their neighbors as possible. For example, if
+// both are rw, or both are tls.
+template <typename ELFT>
+static std::vector<BaseCommand *>::iterator
+findOrphanPos(std::vector<BaseCommand *>::iterator B,
+ std::vector<BaseCommand *>::iterator E) {
+ OutputSection *Sec = cast<OutputSection>(*E);
+ // Find the first element that has as close a rank as possible.
+ auto I = std::max_element(B, E, [=](BaseCommand *A, BaseCommand *B) {
+ return getRankProximity(Sec, A) < getRankProximity(Sec, B);
+ });
+ if (I == E)
+ return E;
+ // Consider all existing sections with the same proximity.
+ int Proximity = getRankProximity(Sec, *I);
+ for (; I != E; ++I) {
+ auto *CurSec = dyn_cast<OutputSection>(*I);
+ if (!CurSec)
+ continue;
+ if (getRankProximity(Sec, CurSec) != Proximity ||
+ Sec->SortRank < CurSec->SortRank)
+ break;
+ }
+ auto IsOutputSec = [](BaseCommand *Cmd) { return isa<OutputSection>(Cmd); };
+ auto J = std::find_if(llvm::make_reverse_iterator(I),
+ llvm::make_reverse_iterator(B), IsOutputSec);
+ I = J.base();
+ // As a special case, if the orphan section is the last section, put
+ // it at the very end, past any other commands.
+ // This matches bfd's behavior and is convenient when the linker script fully
+ // specifies the start of the file, but doesn't care about the end (the non
+ // alloc sections for example).
+ auto NextSec = std::find_if(I, E, IsOutputSec);
+ if (NextSec == E)
+ return E;
+ while (I != E && shouldSkip(*I))
+ ++I;
+ return I;
+// Builds section order for handling --symbol-ordering-file.
+static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
+ DenseMap<const InputSectionBase *, int> SectionOrder;
+ // Use the rarely used option -call-graph-ordering-file to sort sections.
+ if (!Config->CallGraphProfile.empty())
+ return computeCallGraphProfileOrder();
+ if (Config->SymbolOrderingFile.empty())
+ return SectionOrder;
+ struct SymbolOrderEntry {
+ int Priority;
+ bool Present;
+ };
+ // Build a map from symbols to their priorities. Symbols that didn't
+ // appear in the symbol ordering file have the lowest priority 0.
+ // All explicitly mentioned symbols have negative (higher) priorities.
+ DenseMap<StringRef, SymbolOrderEntry> SymbolOrder;
+ int Priority = -Config->SymbolOrderingFile.size();
+ for (StringRef S : Config->SymbolOrderingFile)
+ SymbolOrder.insert({S, {Priority++, false}});
+ // Build a map from sections to their priorities.
+ auto AddSym = [&](Symbol &Sym) {
+ auto It = SymbolOrder.find(Sym.getName());
+ if (It == SymbolOrder.end())
+ return;
+ SymbolOrderEntry &Ent = It->second;
+ Ent.Present = true;
+ maybeWarnUnorderableSymbol(&Sym);
+ if (auto *D = dyn_cast<Defined>(&Sym)) {
+ if (auto *Sec = dyn_cast_or_null<InputSectionBase>(D->Section)) {
+ int &Priority = SectionOrder[cast<InputSectionBase>(Sec->Repl)];
+ Priority = std::min(Priority, Ent.Priority);
+ }
+ }
+ };
+ // We want both global and local symbols. We get the global ones from the
+ // symbol table and iterate the object files for the local ones.
+ for (Symbol *Sym : Symtab->getSymbols())
+ if (!Sym->isLazy())
+ AddSym(*Sym);
+ for (InputFile *File : ObjectFiles)
+ for (Symbol *Sym : File->getSymbols())
+ if (Sym->isLocal())
+ AddSym(*Sym);
+ if (Config->WarnSymbolOrdering)
+ for (auto OrderEntry : SymbolOrder)
+ if (!OrderEntry.second.Present)
+ warn("symbol ordering file: no such symbol: " + OrderEntry.first);
+ return SectionOrder;
+// Sorts the sections in ISD according to the provided section order.
+static void
+sortISDBySectionOrder(InputSectionDescription *ISD,
+ const DenseMap<const InputSectionBase *, int> &Order) {
+ std::vector<InputSection *> UnorderedSections;
+ std::vector<std::pair<InputSection *, int>> OrderedSections;
+ uint64_t UnorderedSize = 0;
+ for (InputSection *IS : ISD->Sections) {
+ auto I = Order.find(IS);
+ if (I == Order.end()) {
+ UnorderedSections.push_back(IS);
+ UnorderedSize += IS->getSize();
+ continue;
+ }
+ OrderedSections.push_back({IS, I->second});
+ }
+ llvm::sort(OrderedSections, [&](std::pair<InputSection *, int> A,
+ std::pair<InputSection *, int> B) {
+ return A.second < B.second;
+ });
+ // Find an insertion point for the ordered section list in the unordered
+ // section list. On targets with limited-range branches, this is the mid-point
+ // of the unordered section list. This decreases the likelihood that a range
+ // extension thunk will be needed to enter or exit the ordered region. If the
+ // ordered section list is a list of hot functions, we can generally expect
+ // the ordered functions to be called more often than the unordered functions,
+ // making it more likely that any particular call will be within range, and
+ // therefore reducing the number of thunks required.
+ //
+ // For example, imagine that you have 8MB of hot code and 32MB of cold code.
+ // If the layout is:
+ //
+ // 8MB hot
+ // 32MB cold
+ //
+ // only the first 8-16MB of the cold code (depending on which hot function it
+ // is actually calling) can call the hot code without a range extension thunk.
+ // However, if we use this layout:
+ //
+ // 16MB cold
+ // 8MB hot
+ // 16MB cold
+ //
+ // both the last 8-16MB of the first block of cold code and the first 8-16MB
+ // of the second block of cold code can call the hot code without a thunk. So
+ // we effectively double the amount of code that could potentially call into
+ // the hot code without a thunk.
+ size_t InsPt = 0;
+ if (Target->getThunkSectionSpacing() && !OrderedSections.empty()) {
+ uint64_t UnorderedPos = 0;
+ for (; InsPt != UnorderedSections.size(); ++InsPt) {
+ UnorderedPos += UnorderedSections[InsPt]->getSize();
+ if (UnorderedPos > UnorderedSize / 2)
+ break;
+ }
+ }
+ ISD->Sections.clear();
+ for (InputSection *IS : makeArrayRef(UnorderedSections).slice(0, InsPt))
+ ISD->Sections.push_back(IS);
+ for (std::pair<InputSection *, int> P : OrderedSections)
+ ISD->Sections.push_back(P.first);
+ for (InputSection *IS : makeArrayRef(UnorderedSections).slice(InsPt))
+ ISD->Sections.push_back(IS);
+static void sortSection(OutputSection *Sec,
+ const DenseMap<const InputSectionBase *, int> &Order) {
+ StringRef Name = Sec->Name;
+ // Sort input sections by section name suffixes for
+ // __attribute__((init_priority(N))).
+ if (Name == ".init_array" || Name == ".fini_array") {
+ if (!Script->HasSectionsCommand)
+ Sec->sortInitFini();
+ return;
+ }
+ // Sort input sections by the special rule for .ctors and .dtors.
+ if (Name == ".ctors" || Name == ".dtors") {
+ if (!Script->HasSectionsCommand)
+ Sec->sortCtorsDtors();
+ return;
+ }
+ // Never sort these.
+ if (Name == ".init" || Name == ".fini")
+ return;
+ // Sort input sections by priority using the list provided
+ // by --symbol-ordering-file.
+ if (!Order.empty())
+ for (BaseCommand *B : Sec->SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(B))
+ sortISDBySectionOrder(ISD, Order);
+// If no layout was provided by linker script, we want to apply default
+// sorting for special input sections. This also handles --symbol-ordering-file.
+template <class ELFT> void Writer<ELFT>::sortInputSections() {
+ // Build the order once since it is expensive.
+ DenseMap<const InputSectionBase *, int> Order = buildSectionOrder();
+ for (BaseCommand *Base : Script->SectionCommands)
+ if (auto *Sec = dyn_cast<OutputSection>(Base))
+ sortSection(Sec, Order);
+template <class ELFT> void Writer<ELFT>::sortSections() {
+ Script->adjustSectionsBeforeSorting();
+ // Don't sort if using -r. It is not necessary and we want to preserve the
+ // relative order for SHF_LINK_ORDER sections.
+ if (Config->Relocatable)
+ return;
+ sortInputSections();
+ for (BaseCommand *Base : Script->SectionCommands) {
+ auto *OS = dyn_cast<OutputSection>(Base);
+ if (!OS)
+ continue;
+ OS->SortRank = getSectionRank(OS);
+ // We want to assign rude approximation values to OutSecOff fields
+ // to know the relative order of the input sections. We use it for
+ // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder().
+ uint64_t I = 0;
+ for (InputSection *Sec : getInputSections(OS))
+ Sec->OutSecOff = I++;
+ }
+ if (!Script->HasSectionsCommand) {
+ // We know that all the OutputSections are contiguous in this case.
+ auto IsSection = [](BaseCommand *Base) { return isa<OutputSection>(Base); };
+ std::stable_sort(
+ llvm::find_if(Script->SectionCommands, IsSection),
+ llvm::find_if(llvm::reverse(Script->SectionCommands), IsSection).base(),
+ compareSections);
+ return;
+ }
+ // Orphan sections are sections present in the input files which are
+ // not explicitly placed into the output file by the linker script.
+ //
+ // The sections in the linker script are already in the correct
+ // order. We have to figuere out where to insert the orphan
+ // sections.
+ //
+ // The order of the sections in the script is arbitrary and may not agree with
+ // compareSections. This means that we cannot easily define a strict weak
+ // ordering. To see why, consider a comparison of a section in the script and
+ // one not in the script. We have a two simple options:
+ // * Make them equivalent (a is not less than b, and b is not less than a).
+ // The problem is then that equivalence has to be transitive and we can
+ // have sections a, b and c with only b in a script and a less than c
+ // which breaks this property.
+ // * Use compareSectionsNonScript. Given that the script order doesn't have
+ // to match, we can end up with sections a, b, c, d where b and c are in the
+ // script and c is compareSectionsNonScript less than b. In which case d
+ // can be equivalent to c, a to b and d < a. As a concrete example:
+ // .a (rx) # not in script
+ // .b (rx) # in script
+ // .c (ro) # in script
+ // .d (ro) # not in script
+ //
+ // The way we define an order then is:
+ // * Sort only the orphan sections. They are in the end right now.
+ // * Move each orphan section to its preferred position. We try
+ // to put each section in the last position where it can share
+ // a PT_LOAD.
+ //
+ // There is some ambiguity as to where exactly a new entry should be
+ // inserted, because Commands contains not only output section
+ // commands but also other types of commands such as symbol assignment
+ // expressions. There's no correct answer here due to the lack of the
+ // formal specification of the linker script. We use heuristics to
+ // determine whether a new output command should be added before or
+ // after another commands. For the details, look at shouldSkip
+ // function.
+ auto I = Script->SectionCommands.begin();
+ auto E = Script->SectionCommands.end();
+ auto NonScriptI = std::find_if(I, E, [](BaseCommand *Base) {
+ if (auto *Sec = dyn_cast<OutputSection>(Base))
+ return Sec->SectionIndex == UINT32_MAX;
+ return false;
+ });
+ // Sort the orphan sections.
+ std::stable_sort(NonScriptI, E, compareSections);
+ // As a horrible special case, skip the first . assignment if it is before any
+ // section. We do this because it is common to set a load address by starting
+ // the script with ". = 0xabcd" and the expectation is that every section is
+ // after that.
+ auto FirstSectionOrDotAssignment =
+ std::find_if(I, E, [](BaseCommand *Cmd) { return !shouldSkip(Cmd); });
+ if (FirstSectionOrDotAssignment != E &&
+ isa<SymbolAssignment>(**FirstSectionOrDotAssignment))
+ ++FirstSectionOrDotAssignment;
+ I = FirstSectionOrDotAssignment;
+ while (NonScriptI != E) {
+ auto Pos = findOrphanPos<ELFT>(I, NonScriptI);
+ OutputSection *Orphan = cast<OutputSection>(*NonScriptI);
+ // As an optimization, find all sections with the same sort rank
+ // and insert them with one rotate.
+ unsigned Rank = Orphan->SortRank;
+ auto End = std::find_if(NonScriptI + 1, E, [=](BaseCommand *Cmd) {
+ return cast<OutputSection>(Cmd)->SortRank != Rank;
+ });
+ std::rotate(Pos, NonScriptI, End);
+ NonScriptI = End;
+ }
+ Script->adjustSectionsAfterSorting();
+static bool compareByFilePosition(InputSection *A, InputSection *B) {
+ // Synthetic, i. e. a sentinel section, should go last.
+ if (A->kind() == InputSectionBase::Synthetic ||
+ B->kind() == InputSectionBase::Synthetic)
+ return A->kind() != InputSectionBase::Synthetic;
+ InputSection *LA = A->getLinkOrderDep();
+ InputSection *LB = B->getLinkOrderDep();
+ OutputSection *AOut = LA->getParent();
+ OutputSection *BOut = LB->getParent();
+ if (AOut != BOut)
+ return AOut->SectionIndex < BOut->SectionIndex;
+ return LA->OutSecOff < LB->OutSecOff;
+// This function is used by the --merge-exidx-entries to detect duplicate
+// .ARM.exidx sections. It is Arm only.
+// The .ARM.exidx section is of the form:
+// | PREL31 offset to function | Unwind instructions for function |
+// where the unwind instructions are either a small number of unwind
+// instructions inlined into the table entry, the special CANT_UNWIND value of
+// 0x1 or a PREL31 offset into a .ARM.extab Section that contains unwind
+// instructions.
+// We return true if all the unwind instructions in the .ARM.exidx entries of
+// Cur can be merged into the last entry of Prev.
+static bool isDuplicateArmExidxSec(InputSection *Prev, InputSection *Cur) {
+ // References to .ARM.Extab Sections have bit 31 clear and are not the
+ // special EXIDX_CANTUNWIND bit-pattern.
+ auto IsExtabRef = [](uint32_t Unwind) {
+ return (Unwind & 0x80000000) == 0 && Unwind != 0x1;
+ };
+ struct ExidxEntry {
+ ulittle32_t Fn;
+ ulittle32_t Unwind;
+ };
+ // Get the last table Entry from the previous .ARM.exidx section.
+ const ExidxEntry &PrevEntry = Prev->getDataAs<ExidxEntry>().back();
+ if (IsExtabRef(PrevEntry.Unwind))
+ return false;
+ // We consider the unwind instructions of an .ARM.exidx table entry
+ // a duplicate if the previous unwind instructions if:
+ // - Both are the special EXIDX_CANTUNWIND.
+ // - Both are the same inline unwind instructions.
+ // We do not attempt to follow and check links into .ARM.extab tables as
+ // consecutive identical entries are rare and the effort to check that they
+ // are identical is high.
+ for (const ExidxEntry Entry : Cur->getDataAs<ExidxEntry>())
+ if (IsExtabRef(Entry.Unwind) || Entry.Unwind != PrevEntry.Unwind)
+ return false;
+ // All table entries in this .ARM.exidx Section can be merged into the
+ // previous Section.
+ return true;
+template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() {
+ for (OutputSection *Sec : OutputSections) {
+ if (!(Sec->Flags & SHF_LINK_ORDER))
+ continue;
+ // Link order may be distributed across several InputSectionDescriptions
+ // but sort must consider them all at once.
+ std::vector<InputSection **> ScriptSections;
+ std::vector<InputSection *> Sections;
+ for (BaseCommand *Base : Sec->SectionCommands) {
+ if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) {
+ for (InputSection *&IS : ISD->Sections) {
+ ScriptSections.push_back(&IS);
+ Sections.push_back(IS);
+ }
+ }
+ }
+ std::stable_sort(Sections.begin(), Sections.end(), compareByFilePosition);
+ if (!Config->Relocatable && Config->EMachine == EM_ARM &&
+ Sec->Type == SHT_ARM_EXIDX) {
+ if (auto *Sentinel = dyn_cast<ARMExidxSentinelSection>(Sections.back())) {
+ assert(Sections.size() >= 2 &&
+ "We should create a sentinel section only if there are "
+ "alive regular exidx sections.");
+ // The last executable section is required to fill the sentinel.
+ // Remember it here so that we don't have to find it again.
+ Sentinel->Highest = Sections[Sections.size() - 2]->getLinkOrderDep();
+ }
+ // The EHABI for the Arm Architecture permits consecutive identical
+ // table entries to be merged. We use a simple implementation that
+ // removes a .ARM.exidx Input Section if it can be merged into the
+ // previous one. This does not require any rewriting of InputSection
+ // contents but misses opportunities for fine grained deduplication
+ // where only a subset of the InputSection contents can be merged.
+ if (Config->MergeArmExidx) {
+ size_t Prev = 0;
+ // The last one is a sentinel entry which should not be removed.
+ for (size_t I = 1; I < Sections.size() - 1; ++I) {
+ if (isDuplicateArmExidxSec(Sections[Prev], Sections[I]))
+ Sections[I] = nullptr;
+ else
+ Prev = I;
+ }
+ }
+ }
+ for (int I = 0, N = Sections.size(); I < N; ++I)
+ *ScriptSections[I] = Sections[I];
+ // Remove the Sections we marked as duplicate earlier.
+ for (BaseCommand *Base : Sec->SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(Base))
+ llvm::erase_if(ISD->Sections, [](InputSection *IS) { return !IS; });
+ }
+// For most RISC ISAs, we need to generate content that depends on the address
+// of InputSections. For example some architectures such as AArch64 use small
+// displacements for jump instructions that is the linker's responsibility for
+// creating range extension thunks for. As the generation of the content may
+// also alter InputSection addresses we must converge to a fixed point.
+template <class ELFT> void Writer<ELFT>::maybeAddThunks() {
+ if (!Target->NeedsThunks && !Config->AndroidPackDynRelocs &&
+ !Config->RelrPackDynRelocs)
+ return;
+ ThunkCreator TC;
+ AArch64Err843419Patcher A64P;
+ for (;;) {
+ bool Changed = false;
+ Script->assignAddresses();
+ if (Target->NeedsThunks)
+ Changed |= TC.createThunks(OutputSections);
+ if (Config->FixCortexA53Errata843419) {
+ if (Changed)
+ Script->assignAddresses();
+ Changed |= A64P.createFixes();
+ }
+ if (In.MipsGot)
+ In.MipsGot->updateAllocSize();
+ Changed |= In.RelaDyn->updateAllocSize();
+ if (In.RelrDyn)
+ Changed |= In.RelrDyn->updateAllocSize();
+ if (!Changed)
+ return;
+ }
+static void finalizeSynthetic(SyntheticSection *Sec) {
+ if (Sec && !Sec->empty() && Sec->getParent())
+ Sec->finalizeContents();
+// In order to allow users to manipulate linker-synthesized sections,
+// we had to add synthetic sections to the input section list early,
+// even before we make decisions whether they are needed. This allows
+// users to write scripts like this: ".mygot : { .got }".
+// Doing it has an unintended side effects. If it turns out that we
+// don't need a .got (for example) at all because there's no
+// relocation that needs a .got, we don't want to emit .got.
+// To deal with the above problem, this function is called after
+// scanRelocations is called to remove synthetic sections that turn
+// out to be empty.
+static void removeUnusedSyntheticSections() {
+ // All input synthetic sections that can be empty are placed after
+ // all regular ones. We iterate over them all and exit at first
+ // non-synthetic.
+ for (InputSectionBase *S : llvm::reverse(InputSections)) {
+ SyntheticSection *SS = dyn_cast<SyntheticSection>(S);
+ if (!SS)
+ return;
+ OutputSection *OS = SS->getParent();
+ if (!OS || !SS->empty())
+ continue;
+ // If we reach here, then SS is an unused synthetic section and we want to
+ // remove it from corresponding input section description of output section.
+ for (BaseCommand *B : OS->SectionCommands)
+ if (auto *ISD = dyn_cast<InputSectionDescription>(B))
+ llvm::erase_if(ISD->Sections,
+ [=](InputSection *IS) { return IS == SS; });
+ }
+// Returns true if a symbol can be replaced at load-time by a symbol
+// with the same name defined in other ELF executable or DSO.
+static bool computeIsPreemptible(const Symbol &B) {
+ assert(!B.isLocal());
+ // Only symbols that appear in dynsym can be preempted.
+ if (!B.includeInDynsym())
+ return false;
+ // Only default visibility symbols can be preempted.
+ if (B.Visibility != STV_DEFAULT)
+ return false;
+ // At this point copy relocations have not been created yet, so any
+ // symbol that is not defined locally is preemptible.
+ if (!B.isDefined())
+ return true;
+ // If we have a dynamic list it specifies which local symbols are preemptible.
+ if (Config->HasDynamicList)
+ return false;
+ if (!Config->Shared)
+ return false;
+ // -Bsymbolic means that definitions are not preempted.
+ if (Config->Bsymbolic || (Config->BsymbolicFunctions && B.isFunc()))
+ return false;
+ return true;
+// Create output section objects and add them to OutputSections.
+template <class ELFT> void Writer<ELFT>::finalizeSections() {
+ Out::PreinitArray = findSection(".preinit_array");
+ Out::InitArray = findSection(".init_array");
+ Out::FiniArray = findSection(".fini_array");
+ // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop
+ // symbols for sections, so that the runtime can get the start and end
+ // addresses of each section by section name. Add such symbols.
+ if (!Config->Relocatable) {
+ addStartEndSymbols();
+ for (BaseCommand *Base : Script->SectionCommands)
+ if (auto *Sec = dyn_cast<OutputSection>(Base))
+ addStartStopSymbols(Sec);
+ }
+ // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type.
+ // It should be okay as no one seems to care about the type.
+ // Even the author of gold doesn't remember why gold behaves that way.
+ // https://sourceware.org/ml/binutils/2002-03/msg00360.html
+ if (In.Dynamic->Parent)
+ Symtab->addDefined("_DYNAMIC", STV_HIDDEN, STT_NOTYPE, 0 /*Value*/,
+ /*Size=*/0, STB_WEAK, In.Dynamic,
+ /*File=*/nullptr);
+ // Define __rel[a]_iplt_{start,end} symbols if needed.
+ addRelIpltSymbols();
+ // RISC-V's gp can address +/- 2 KiB, set it to .sdata + 0x800 if not defined.
+ if (Config->EMachine == EM_RISCV)
+ if (!dyn_cast_or_null<Defined>(Symtab->find("__global_pointer$")))
+ addOptionalRegular("__global_pointer$", findSection(".sdata"), 0x800);
+ // This responsible for splitting up .eh_frame section into
+ // pieces. The relocation scan uses those pieces, so this has to be
+ // earlier.
+ finalizeSynthetic(In.EhFrame);
+ for (Symbol *S : Symtab->getSymbols()) {
+ if (!S->IsPreemptible)
+ S->IsPreemptible = computeIsPreemptible(*S);
+ if (S->isGnuIFunc() && Config->ZIfuncnoplt)
+ S->ExportDynamic = true;
+ }
+ // Scan relocations. This must be done after every symbol is declared so that
+ // we can correctly decide if a dynamic relocation is needed.
+ if (!Config->Relocatable)
+ forEachRelSec(scanRelocations<ELFT>);
+ if (In.Plt && !In.Plt->empty())
+ In.Plt->addSymbols();
+ if (In.Iplt && !In.Iplt->empty())
+ In.Iplt->addSymbols();
+ // Now that we have defined all possible global symbols including linker-
+ // synthesized ones. Visit all symbols to give the finishing touches.
+ for (Symbol *Sym : Symtab->getSymbols()) {
+ if (!includeInSymtab(*Sym))
+ continue;
+ if (In.SymTab)
+ In.SymTab->addSymbol(Sym);
+ if (Sym->includeInDynsym()) {
+ In.DynSymTab->addSymbol(Sym);
+ if (auto *File = dyn_cast_or_null<SharedFile<ELFT>>(Sym->File))
+ if (File->IsNeeded && !Sym->isUndefined())
+ InX<ELFT>::VerNeed->addSymbol(Sym);
+ }
+ }
+ // Do not proceed if there was an undefined symbol.
+ if (errorCount())
+ return;
+ if (In.MipsGot)
+ In.MipsGot->build<ELFT>();
+ removeUnusedSyntheticSections();
+ sortSections();
+ // Now that we have the final list, create a list of all the
+ // OutputSections for convenience.
+ for (BaseCommand *Base : Script->SectionCommands)
+ if (auto *Sec = dyn_cast<OutputSection>(Base))
+ OutputSections.push_back(Sec);
+ // Prefer command line supplied address over other constraints.
+ for (OutputSection *Sec : OutputSections) {
+ auto I = Config->SectionStartMap.find(Sec->Name);
+ if (I != Config->SectionStartMap.end())
+ Sec->AddrExpr = [=] { return I->second; };
+ }
+ // This is a bit of a hack. A value of 0 means undef, so we set it
+ // to 1 to make __ehdr_start defined. The section number is not
+ // particularly relevant.
+ Out::ElfHeader->SectionIndex = 1;
+ for (size_t I = 0, E = OutputSections.size(); I != E; ++I) {
+ OutputSection *Sec = OutputSections[I];
+ Sec->SectionIndex = I + 1;
+ Sec->ShName = In.ShStrTab->addString(Sec->Name);
+ }
+ // Binary and relocatable output does not have PHDRS.
+ // The headers have to be created before finalize as that can influence the
+ // image base and the dynamic section on mips includes the image base.
+ if (!Config->Relocatable && !Config->OFormatBinary) {
+ Phdrs = Script->hasPhdrsCommands() ? Script->createPhdrs() : createPhdrs();
+ addPtArmExid(Phdrs);
+ Out::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size();
+ // Find the TLS segment. This happens before the section layout loop so that
+ // Android relocation packing can look up TLS symbol addresses.
+ for (PhdrEntry *P : Phdrs)
+ if (P->p_type == PT_TLS)
+ Out::TlsPhdr = P;
+ }
+ // Some symbols are defined in term of program headers. Now that we
+ // have the headers, we can find out which sections they point to.
+ setReservedSymbolSections();
+ // Dynamic section must be the last one in this list and dynamic
+ // symbol table section (DynSymTab) must be the first one.
+ finalizeSynthetic(In.DynSymTab);
+ finalizeSynthetic(In.Bss);
+ finalizeSynthetic(In.BssRelRo);
+ finalizeSynthetic(In.GnuHashTab);
+ finalizeSynthetic(In.HashTab);
+ finalizeSynthetic(In.SymTabShndx);
+ finalizeSynthetic(In.ShStrTab);
+ finalizeSynthetic(In.StrTab);
+ finalizeSynthetic(In.VerDef);
+ finalizeSynthetic(In.DynStrTab);
+ finalizeSynthetic(In.Got);
+ finalizeSynthetic(In.MipsGot);
+ finalizeSynthetic(In.IgotPlt);
+ finalizeSynthetic(In.GotPlt);
+ finalizeSynthetic(In.RelaDyn);
+ finalizeSynthetic(In.RelrDyn);
+ finalizeSynthetic(In.RelaIplt);
+ finalizeSynthetic(In.RelaPlt);
+ finalizeSynthetic(In.Plt);
+ finalizeSynthetic(In.Iplt);
+ finalizeSynthetic(In.EhFrameHdr);
+ finalizeSynthetic(InX<ELFT>::VerSym);
+ finalizeSynthetic(InX<ELFT>::VerNeed);
+ finalizeSynthetic(In.Dynamic);
+ if (!Script->HasSectionsCommand && !Config->Relocatable)
+ fixSectionAlignments();
+ // After link order processing .ARM.exidx sections can be deduplicated, which
+ // needs to be resolved before any other address dependent operation.
+ resolveShfLinkOrder();
+ // Jump instructions in many ISAs have small displacements, and therefore they
+ // cannot jump to arbitrary addresses in memory. For example, RISC-V JAL
+ // instruction can target only +-1 MiB from PC. It is a linker's
+ // responsibility to create and insert small pieces of code between sections
+ // to extend the ranges if jump targets are out of range. Such code pieces are
+ // called "thunks".
+ //
+ // We add thunks at this stage. We couldn't do this before this point because
+ // this is the earliest point where we know sizes of sections and their
+ // layouts (that are needed to determine if jump targets are in range).
+ maybeAddThunks();
+ // maybeAddThunks may have added local symbols to the static symbol table.
+ finalizeSynthetic(In.SymTab);
+ finalizeSynthetic(In.PPC64LongBranchTarget);
+ // Fill other section headers. The dynamic table is finalized
+ // at the end because some tags like RELSZ depend on result
+ // of finalizing other sections.
+ for (OutputSection *Sec : OutputSections)
+ Sec->finalize<ELFT>();
+// Ensure data sections are not mixed with executable sections when
+// -execute-only is used. -execute-only is a feature to make pages executable
+// but not readable, and the feature is currently supported only on AArch64.
+template <class ELFT> void Writer<ELFT>::checkExecuteOnly() {
+ if (!Config->ExecuteOnly)
+ return;
+ for (OutputSection *OS : OutputSections)
+ if (OS->Flags & SHF_EXECINSTR)
+ for (InputSection *IS : getInputSections(OS))
+ if (!(IS->Flags & SHF_EXECINSTR))
+ error("cannot place " + toString(IS) + " into " + toString(OS->Name) +
+ ": -execute-only does not support intermingling data and code");
+// The linker is expected to define SECNAME_start and SECNAME_end
+// symbols for a few sections. This function defines them.
+template <class ELFT> void Writer<ELFT>::addStartEndSymbols() {
+ // If a section does not exist, there's ambiguity as to how we
+ // define _start and _end symbols for an init/fini section. Since
+ // the loader assume that the symbols are always defined, we need to
+ // always define them. But what value? The loader iterates over all
+ // pointers between _start and _end to run global ctors/dtors, so if
+ // the section is empty, their symbol values don't actually matter
+ // as long as _start and _end point to the same location.
+ //
+ // That said, we don't want to set the symbols to 0 (which is
+ // probably the simplest value) because that could cause some
+ // program to fail to link due to relocation overflow, if their
+ // program text is above 2 GiB. We use the address of the .text
+ // section instead to prevent that failure.
+ //
+ // In a rare sitaution, .text section may not exist. If that's the
+ // case, use the image base address as a last resort.
+ OutputSection *Default = findSection(".text");
+ if (!Default)
+ Default = Out::ElfHeader;
+ auto Define = [=](StringRef Start, StringRef End, OutputSection *OS) {
+ if (OS) {
+ addOptionalRegular(Start, OS, 0);
+ addOptionalRegular(End, OS, -1);
+ } else {
+ addOptionalRegular(Start, Default, 0);
+ addOptionalRegular(End, Default, 0);
+ }
+ };
+ Define("__preinit_array_start", "__preinit_array_end", Out::PreinitArray);
+ Define("__init_array_start", "__init_array_end", Out::InitArray);
+ Define("__fini_array_start", "__fini_array_end", Out::FiniArray);
+ if (OutputSection *Sec = findSection(".ARM.exidx"))
+ Define("__exidx_start", "__exidx_end", Sec);
+// If a section name is valid as a C identifier (which is rare because of
+// the leading '.'), linkers are expected to define __start_<secname> and
+// __stop_<secname> symbols. They are at beginning and end of the section,
+// respectively. This is not requested by the ELF standard, but GNU ld and
+// gold provide the feature, and used by many programs.
+template <class ELFT>
+void Writer<ELFT>::addStartStopSymbols(OutputSection *Sec) {
+ StringRef S = Sec->Name;
+ if (!isValidCIdentifier(S))
+ return;
+ addOptionalRegular(Saver.save("__start_" + S), Sec, 0, STV_PROTECTED);
+ addOptionalRegular(Saver.save("__stop_" + S), Sec, -1, STV_PROTECTED);
+static bool needsPtLoad(OutputSection *Sec) {
+ if (!(Sec->Flags & SHF_ALLOC) || Sec->Noload)
+ return false;
+ // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is
+ // responsible for allocating space for them, not the PT_LOAD that
+ // contains the TLS initialization image.
+ if ((Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS)
+ return false;
+ return true;
+// Linker scripts are responsible for aligning addresses. Unfortunately, most
+// linker scripts are designed for creating two PT_LOADs only, one RX and one
+// RW. This means that there is no alignment in the RO to RX transition and we
+// cannot create a PT_LOAD there.
+static uint64_t computeFlags(uint64_t Flags) {
+ if (Config->Omagic)
+ return PF_R | PF_W | PF_X;
+ if (Config->ExecuteOnly && (Flags & PF_X))
+ return Flags & ~PF_R;
+ if (Config->SingleRoRx && !(Flags & PF_W))
+ return Flags | PF_X;
+ return Flags;
+// Decide which program headers to create and which sections to include in each
+// one.
+template <class ELFT> std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs() {
+ std::vector<PhdrEntry *> Ret;
+ auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * {
+ Ret.push_back(make<PhdrEntry>(Type, Flags));
+ return Ret.back();
+ };
+ // The first phdr entry is PT_PHDR which describes the program header itself.
+ AddHdr(PT_PHDR, PF_R)->add(Out::ProgramHeaders);
+ // PT_INTERP must be the second entry if exists.
+ if (OutputSection *Cmd = findSection(".interp"))
+ AddHdr(PT_INTERP, Cmd->getPhdrFlags())->add(Cmd);
+ // Add the first PT_LOAD segment for regular output sections.
+ uint64_t Flags = computeFlags(PF_R);
+ PhdrEntry *Load = AddHdr(PT_LOAD, Flags);
+ // Add the headers. We will remove them if they don't fit.
+ Load->add(Out::ElfHeader);
+ Load->add(Out::ProgramHeaders);
+ for (OutputSection *Sec : OutputSections) {
+ if (!(Sec->Flags & SHF_ALLOC))
+ break;
+ if (!needsPtLoad(Sec))
+ continue;
+ // Segments are contiguous memory regions that has the same attributes
+ // (e.g. executable or writable). There is one phdr for each segment.
+ // Therefore, we need to create a new phdr when the next section has
+ // different flags or is loaded at a discontiguous address or memory
+ // region using AT or AT> linker script command, respectively. At the same
+ // time, we don't want to create a separate load segment for the headers,
+ // even if the first output section has an AT or AT> attribute.
+ uint64_t NewFlags = computeFlags(Sec->getPhdrFlags());
+ if (((Sec->LMAExpr ||
+ (Sec->LMARegion && (Sec->LMARegion != Load->FirstSec->LMARegion))) &&
+ Load->LastSec != Out::ProgramHeaders) ||
+ Sec->MemRegion != Load->FirstSec->MemRegion || Flags != NewFlags) {
+ Load = AddHdr(PT_LOAD, NewFlags);
+ Flags = NewFlags;
+ }
+ Load->add(Sec);
+ }
+ // Add a TLS segment if any.
+ PhdrEntry *TlsHdr = make<PhdrEntry>(PT_TLS, PF_R);
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Flags & SHF_TLS)
+ TlsHdr->add(Sec);
+ if (TlsHdr->FirstSec)
+ Ret.push_back(TlsHdr);
+ // Add an entry for .dynamic.
+ if (OutputSection *Sec = In.Dynamic->getParent())
+ AddHdr(PT_DYNAMIC, Sec->getPhdrFlags())->add(Sec);
+ // PT_GNU_RELRO includes all sections that should be marked as
+ // read-only by dynamic linker after proccessing relocations.
+ // Current dynamic loaders only support one PT_GNU_RELRO PHDR, give
+ // an error message if more than one PT_GNU_RELRO PHDR is required.
+ PhdrEntry *RelRo = make<PhdrEntry>(PT_GNU_RELRO, PF_R);
+ bool InRelroPhdr = false;
+ bool IsRelroFinished = false;
+ for (OutputSection *Sec : OutputSections) {
+ if (!needsPtLoad(Sec))
+ continue;
+ if (isRelroSection(Sec)) {
+ InRelroPhdr = true;
+ if (!IsRelroFinished)
+ RelRo->add(Sec);
+ else
+ error("section: " + Sec->Name + " is not contiguous with other relro" +
+ " sections");
+ } else if (InRelroPhdr) {
+ InRelroPhdr = false;
+ IsRelroFinished = true;
+ }
+ }
+ if (RelRo->FirstSec)
+ Ret.push_back(RelRo);
+ // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr.
+ if (!In.EhFrame->empty() && In.EhFrameHdr && In.EhFrame->getParent() &&
+ In.EhFrameHdr->getParent())
+ AddHdr(PT_GNU_EH_FRAME, In.EhFrameHdr->getParent()->getPhdrFlags())
+ ->add(In.EhFrameHdr->getParent());
+ // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes
+ // the dynamic linker fill the segment with random data.
+ if (OutputSection *Cmd = findSection(".openbsd.randomdata"))
+ AddHdr(PT_OPENBSD_RANDOMIZE, Cmd->getPhdrFlags())->add(Cmd);
+ // PT_GNU_STACK is a special section to tell the loader to make the
+ // pages for the stack non-executable. If you really want an executable
+ // stack, you can pass -z execstack, but that's not recommended for
+ // security reasons.
+ unsigned Perm = PF_R | PF_W;
+ if (Config->ZExecstack)
+ Perm |= PF_X;
+ AddHdr(PT_GNU_STACK, Perm)->p_memsz = Config->ZStackSize;
+ // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable
+ // is expected to perform W^X violations, such as calling mprotect(2) or
+ // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on
+ // OpenBSD.
+ if (Config->ZWxneeded)
+ // Create one PT_NOTE per a group of contiguous .note sections.
+ PhdrEntry *Note = nullptr;
+ for (OutputSection *Sec : OutputSections) {
+ if (Sec->Type == SHT_NOTE && (Sec->Flags & SHF_ALLOC)) {
+ if (!Note || Sec->LMAExpr)
+ Note = AddHdr(PT_NOTE, PF_R);
+ Note->add(Sec);
+ } else {
+ Note = nullptr;
+ }
+ }
+ return Ret;
+template <class ELFT>
+void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry *> &Phdrs) {
+ if (Config->EMachine != EM_ARM)
+ return;
+ auto I = llvm::find_if(OutputSections, [](OutputSection *Cmd) {
+ return Cmd->Type == SHT_ARM_EXIDX;
+ });
+ if (I == OutputSections.end())
+ return;
+ // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME
+ PhdrEntry *ARMExidx = make<PhdrEntry>(PT_ARM_EXIDX, PF_R);
+ ARMExidx->add(*I);
+ Phdrs.push_back(ARMExidx);
+// The first section of each PT_LOAD, the first section in PT_GNU_RELRO and the
+// first section after PT_GNU_RELRO have to be page aligned so that the dynamic
+// linker can set the permissions.
+template <class ELFT> void Writer<ELFT>::fixSectionAlignments() {
+ auto PageAlign = [](OutputSection *Cmd) {
+ if (Cmd && !Cmd->AddrExpr)
+ Cmd->AddrExpr = [=] {
+ return alignTo(Script->getDot(), Config->MaxPageSize);
+ };
+ };
+ for (const PhdrEntry *P : Phdrs)
+ if (P->p_type == PT_LOAD && P->FirstSec)
+ PageAlign(P->FirstSec);
+ for (const PhdrEntry *P : Phdrs) {
+ if (P->p_type != PT_GNU_RELRO)
+ continue;
+ if (P->FirstSec)
+ PageAlign(P->FirstSec);
+ // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we
+ // have to align it to a page.
+ auto End = OutputSections.end();
+ auto I = std::find(OutputSections.begin(), End, P->LastSec);
+ if (I == End || (I + 1) == End)
+ continue;
+ OutputSection *Cmd = (*(I + 1));
+ if (needsPtLoad(Cmd))
+ PageAlign(Cmd);
+ }
+// Compute an in-file position for a given section. The file offset must be the
+// same with its virtual address modulo the page size, so that the loader can
+// load executables without any address adjustment.
+static uint64_t computeFileOffset(OutputSection *OS, uint64_t Off) {
+ // File offsets are not significant for .bss sections. By convention, we keep
+ // section offsets monotonically increasing rather than setting to zero.
+ if (OS->Type == SHT_NOBITS)
+ return Off;
+ // If the section is not in a PT_LOAD, we just have to align it.
+ if (!OS->PtLoad)
+ return alignTo(Off, OS->Alignment);
+ // The first section in a PT_LOAD has to have congruent offset and address
+ // module the page size.
+ OutputSection *First = OS->PtLoad->FirstSec;
+ if (OS == First) {
+ uint64_t Alignment = std::max<uint64_t>(OS->Alignment, Config->MaxPageSize);
+ return alignTo(Off, Alignment, OS->Addr);
+ }
+ // If two sections share the same PT_LOAD the file offset is calculated
+ // using this formula: Off2 = Off1 + (VA2 - VA1).
+ return First->Offset + OS->Addr - First->Addr;
+// Set an in-file position to a given section and returns the end position of
+// the section.
+static uint64_t setFileOffset(OutputSection *OS, uint64_t Off) {
+ Off = computeFileOffset(OS, Off);
+ OS->Offset = Off;
+ if (OS->Type == SHT_NOBITS)
+ return Off;
+ return Off + OS->Size;
+template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() {
+ uint64_t Off = 0;
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Flags & SHF_ALLOC)
+ Off = setFileOffset(Sec, Off);
+ FileSize = alignTo(Off, Config->Wordsize);
+static std::string rangeToString(uint64_t Addr, uint64_t Len) {
+ return "[0x" + utohexstr(Addr) + ", 0x" + utohexstr(Addr + Len - 1) + "]";
+// Assign file offsets to output sections.
+template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
+ uint64_t Off = 0;
+ Off = setFileOffset(Out::ElfHeader, Off);
+ Off = setFileOffset(Out::ProgramHeaders, Off);
+ PhdrEntry *LastRX = nullptr;
+ for (PhdrEntry *P : Phdrs)
+ if (P->p_type == PT_LOAD && (P->p_flags & PF_X))
+ LastRX = P;
+ for (OutputSection *Sec : OutputSections) {
+ Off = setFileOffset(Sec, Off);
+ if (Script->HasSectionsCommand)
+ continue;
+ // If this is a last section of the last executable segment and that
+ // segment is the last loadable segment, align the offset of the
+ // following section to avoid loading non-segments parts of the file.
+ if (LastRX && LastRX->LastSec == Sec)
+ Off = alignTo(Off, Target->PageSize);
+ }
+ SectionHeaderOff = alignTo(Off, Config->Wordsize);
+ FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr);
+ // Our logic assumes that sections have rising VA within the same segment.
+ // With use of linker scripts it is possible to violate this rule and get file
+ // offset overlaps or overflows. That should never happen with a valid script
+ // which does not move the location counter backwards and usually scripts do
+ // not do that. Unfortunately, there are apps in the wild, for example, Linux
+ // kernel, which control segment distribution explicitly and move the counter
+ // backwards, so we have to allow doing that to support linking them. We
+ // perform non-critical checks for overlaps in checkSectionOverlap(), but here
+ // we want to prevent file size overflows because it would crash the linker.
+ for (OutputSection *Sec : OutputSections) {
+ if (Sec->Type == SHT_NOBITS)
+ continue;
+ if ((Sec->Offset > FileSize) || (Sec->Offset + Sec->Size > FileSize))
+ error("unable to place section " + Sec->Name + " at file offset " +
+ rangeToString(Sec->Offset, Sec->Size) +
+ "; check your linker script for overflows");
+ }
+// Finalize the program headers. We call this function after we assign
+// file offsets and VAs to all sections.
+template <class ELFT> void Writer<ELFT>::setPhdrs() {
+ for (PhdrEntry *P : Phdrs) {
+ OutputSection *First = P->FirstSec;
+ OutputSection *Last = P->LastSec;
+ if (First) {
+ P->p_filesz = Last->Offset - First->Offset;
+ if (Last->Type != SHT_NOBITS)
+ P->p_filesz += Last->Size;
+ P->p_memsz = Last->Addr + Last->Size - First->Addr;
+ P->p_offset = First->Offset;
+ P->p_vaddr = First->Addr;
+ if (!P->HasLMA)
+ P->p_paddr = First->getLMA();
+ }
+ if (P->p_type == PT_LOAD) {
+ P->p_align = std::max<uint64_t>(P->p_align, Config->MaxPageSize);
+ } else if (P->p_type == PT_GNU_RELRO) {
+ P->p_align = 1;
+ // The glibc dynamic loader rounds the size down, so we need to round up
+ // to protect the last page. This is a no-op on FreeBSD which always
+ // rounds up.
+ P->p_memsz = alignTo(P->p_memsz, Target->PageSize);
+ }
+ if (P->p_type == PT_TLS && P->p_memsz) {
+ if (!Config->Shared &&
+ (Config->EMachine == EM_ARM || Config->EMachine == EM_AARCH64)) {
+ // On ARM/AArch64, reserve extra space (8 words) between the thread
+ // pointer and an executable's TLS segment by overaligning the segment.
+ // This reservation is needed for backwards compatibility with Android's
+ // TCB, which allocates several slots after the thread pointer (e.g.
+ // TLS_SLOT_STACK_GUARD==5). For simplicity, this overalignment is also
+ // done on other operating systems.
+ P->p_align = std::max<uint64_t>(P->p_align, Config->Wordsize * 8);
+ }
+ // The TLS pointer goes after PT_TLS for variant 2 targets. At least glibc
+ // will align it, so round up the size to make sure the offsets are
+ // correct.
+ P->p_memsz = alignTo(P->p_memsz, P->p_align);
+ }
+ }
+// A helper struct for checkSectionOverlap.
+namespace {
+struct SectionOffset {
+ OutputSection *Sec;
+ uint64_t Offset;
+} // namespace
+// Check whether sections overlap for a specific address range (file offsets,
+// load and virtual adresses).
+static void checkOverlap(StringRef Name, std::vector<SectionOffset> &Sections,
+ bool IsVirtualAddr) {
+ llvm::sort(Sections, [=](const SectionOffset &A, const SectionOffset &B) {
+ return A.Offset < B.Offset;
+ });
+ // Finding overlap is easy given a vector is sorted by start position.
+ // If an element starts before the end of the previous element, they overlap.
+ for (size_t I = 1, End = Sections.size(); I < End; ++I) {
+ SectionOffset A = Sections[I - 1];
+ SectionOffset B = Sections[I];
+ if (B.Offset >= A.Offset + A.Sec->Size)
+ continue;
+ // If both sections are in OVERLAY we allow the overlapping of virtual
+ // addresses, because it is what OVERLAY was designed for.
+ if (IsVirtualAddr && A.Sec->InOverlay && B.Sec->InOverlay)
+ continue;
+ errorOrWarn("section " + A.Sec->Name + " " + Name +
+ " range overlaps with " + B.Sec->Name + "\n>>> " + A.Sec->Name +
+ " range is " + rangeToString(A.Offset, A.Sec->Size) + "\n>>> " +
+ B.Sec->Name + " range is " +
+ rangeToString(B.Offset, B.Sec->Size));
+ }
+// Check for overlapping sections and address overflows.
+// In this function we check that none of the output sections have overlapping
+// file offsets. For SHF_ALLOC sections we also check that the load address
+// ranges and the virtual address ranges don't overlap
+template <class ELFT> void Writer<ELFT>::checkSections() {
+ // First, check that section's VAs fit in available address space for target.
+ for (OutputSection *OS : OutputSections)
+ if ((OS->Addr + OS->Size < OS->Addr) ||
+ (!ELFT::Is64Bits && OS->Addr + OS->Size > UINT32_MAX))
+ errorOrWarn("section " + OS->Name + " at 0x" + utohexstr(OS->Addr) +
+ " of size 0x" + utohexstr(OS->Size) +
+ " exceeds available address space");
+ // Check for overlapping file offsets. In this case we need to skip any
+ // section marked as SHT_NOBITS. These sections don't actually occupy space in
+ // the file so Sec->Offset + Sec->Size can overlap with others. If --oformat
+ // binary is specified only add SHF_ALLOC sections are added to the output
+ // file so we skip any non-allocated sections in that case.
+ std::vector<SectionOffset> FileOffs;
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Size > 0 && Sec->Type != SHT_NOBITS &&
+ (!Config->OFormatBinary || (Sec->Flags & SHF_ALLOC)))
+ FileOffs.push_back({Sec, Sec->Offset});
+ checkOverlap("file", FileOffs, false);
+ // When linking with -r there is no need to check for overlapping virtual/load
+ // addresses since those addresses will only be assigned when the final
+ // executable/shared object is created.
+ if (Config->Relocatable)
+ return;
+ // Checking for overlapping virtual and load addresses only needs to take
+ // into account SHF_ALLOC sections since others will not be loaded.
+ // Furthermore, we also need to skip SHF_TLS sections since these will be
+ // mapped to other addresses at runtime and can therefore have overlapping
+ // ranges in the file.
+ std::vector<SectionOffset> VMAs;
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Size > 0 && (Sec->Flags & SHF_ALLOC) && !(Sec->Flags & SHF_TLS))
+ VMAs.push_back({Sec, Sec->Addr});
+ checkOverlap("virtual address", VMAs, true);
+ // Finally, check that the load addresses don't overlap. This will usually be
+ // the same as the virtual addresses but can be different when using a linker
+ // script with AT().
+ std::vector<SectionOffset> LMAs;
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Size > 0 && (Sec->Flags & SHF_ALLOC) && !(Sec->Flags & SHF_TLS))
+ LMAs.push_back({Sec, Sec->getLMA()});
+ checkOverlap("load address", LMAs, false);
+// The entry point address is chosen in the following ways.
+// 1. the '-e' entry command-line option;
+// 2. the ENTRY(symbol) command in a linker control script;
+// 3. the value of the symbol _start, if present;
+// 4. the number represented by the entry symbol, if it is a number;
+// 5. the address of the first byte of the .text section, if present;
+// 6. the address 0.
+static uint64_t getEntryAddr() {
+ // Case 1, 2 or 3
+ if (Symbol *B = Symtab->find(Config->Entry))
+ return B->getVA();
+ // Case 4
+ uint64_t Addr;
+ if (to_integer(Config->Entry, Addr))
+ return Addr;
+ // Case 5
+ if (OutputSection *Sec = findSection(".text")) {
+ if (Config->WarnMissingEntry)
+ warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" +
+ utohexstr(Sec->Addr));
+ return Sec->Addr;
+ }
+ // Case 6
+ if (Config->WarnMissingEntry)
+ warn("cannot find entry symbol " + Config->Entry +
+ "; not setting start address");
+ return 0;
+static uint16_t getELFType() {
+ if (Config->Pic)
+ return ET_DYN;
+ if (Config->Relocatable)
+ return ET_REL;
+ return ET_EXEC;
+static uint8_t getAbiVersion() {
+ // MIPS non-PIC executable gets ABI version 1.
+ if (Config->EMachine == EM_MIPS && getELFType() == ET_EXEC &&
+ (Config->EFlags & (EF_MIPS_PIC | EF_MIPS_CPIC)) == EF_MIPS_CPIC)
+ return 1;
+ return 0;
+template <class ELFT> void Writer<ELFT>::writeHeader() {
+ uint8_t *Buf = Buffer->getBufferStart();
+ // For executable segments, the trap instructions are written before writing
+ // the header. Setting Elf header bytes to zero ensures that any unused bytes
+ // in header are zero-cleared, instead of having trap instructions.
+ memset(Buf, 0, sizeof(Elf_Ehdr));
+ memcpy(Buf, "\177ELF", 4);
+ // Write the ELF header.
+ auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Buf);
+ EHdr->e_ident[EI_CLASS] = Config->Is64 ? ELFCLASS64 : ELFCLASS32;
+ EHdr->e_ident[EI_DATA] = Config->IsLE ? ELFDATA2LSB : ELFDATA2MSB;
+ EHdr->e_ident[EI_VERSION] = EV_CURRENT;
+ EHdr->e_ident[EI_OSABI] = Config->OSABI;
+ EHdr->e_ident[EI_ABIVERSION] = getAbiVersion();
+ EHdr->e_type = getELFType();
+ EHdr->e_machine = Config->EMachine;
+ EHdr->e_version = EV_CURRENT;
+ EHdr->e_entry = getEntryAddr();
+ EHdr->e_shoff = SectionHeaderOff;
+ EHdr->e_flags = Config->EFlags;
+ EHdr->e_ehsize = sizeof(Elf_Ehdr);
+ EHdr->e_phnum = Phdrs.size();
+ EHdr->e_shentsize = sizeof(Elf_Shdr);
+ if (!Config->Relocatable) {
+ EHdr->e_phoff = sizeof(Elf_Ehdr);
+ EHdr->e_phentsize = sizeof(Elf_Phdr);
+ }
+ // Write the program header table.
+ auto *HBuf = reinterpret_cast<Elf_Phdr *>(Buf + EHdr->e_phoff);
+ for (PhdrEntry *P : Phdrs) {
+ HBuf->p_type = P->p_type;
+ HBuf->p_flags = P->p_flags;
+ HBuf->p_offset = P->p_offset;
+ HBuf->p_vaddr = P->p_vaddr;
+ HBuf->p_paddr = P->p_paddr;
+ HBuf->p_filesz = P->p_filesz;
+ HBuf->p_memsz = P->p_memsz;
+ HBuf->p_align = P->p_align;
+ ++HBuf;
+ }
+ // Write the section header table.
+ //
+ // The ELF header can only store numbers up to SHN_LORESERVE in the e_shnum
+ // and e_shstrndx fields. When the value of one of these fields exceeds
+ // SHN_LORESERVE ELF requires us to put sentinel values in the ELF header and
+ // use fields in the section header at index 0 to store
+ // the value. The sentinel values and fields are:
+ // e_shnum = 0, SHdrs[0].sh_size = number of sections.
+ // e_shstrndx = SHN_XINDEX, SHdrs[0].sh_link = .shstrtab section index.
+ auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff);
+ size_t Num = OutputSections.size() + 1;
+ if (Num >= SHN_LORESERVE)
+ SHdrs->sh_size = Num;
+ else
+ EHdr->e_shnum = Num;
+ uint32_t StrTabIndex = In.ShStrTab->getParent()->SectionIndex;
+ if (StrTabIndex >= SHN_LORESERVE) {
+ SHdrs->sh_link = StrTabIndex;
+ EHdr->e_shstrndx = SHN_XINDEX;
+ } else {
+ EHdr->e_shstrndx = StrTabIndex;
+ }
+ for (OutputSection *Sec : OutputSections)
+ Sec->writeHeaderTo<ELFT>(++SHdrs);
+// Open a result file.
+template <class ELFT> void Writer<ELFT>::openFile() {
+ uint64_t MaxSize = Config->Is64 ? INT64_MAX : UINT32_MAX;
+ if (MaxSize < FileSize) {
+ error("output file too large: " + Twine(FileSize) + " bytes");
+ return;
+ }
+ unlinkAsync(Config->OutputFile);
+ unsigned Flags = 0;
+ if (!Config->Relocatable)
+ Flags = FileOutputBuffer::F_executable;
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(Config->OutputFile, FileSize, Flags);
+ if (!BufferOrErr)
+ error("failed to open " + Config->OutputFile + ": " +
+ llvm::toString(BufferOrErr.takeError()));
+ else
+ Buffer = std::move(*BufferOrErr);
+template <class ELFT> void Writer<ELFT>::writeSectionsBinary() {
+ uint8_t *Buf = Buffer->getBufferStart();
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Flags & SHF_ALLOC)
+ Sec->writeTo<ELFT>(Buf + Sec->Offset);
+static void fillTrap(uint8_t *I, uint8_t *End) {
+ for (; I + 4 <= End; I += 4)
+ memcpy(I, &Target->TrapInstr, 4);
+// Fill the last page of executable segments with trap instructions
+// instead of leaving them as zero. Even though it is not required by any
+// standard, it is in general a good thing to do for security reasons.
+// We'll leave other pages in segments as-is because the rest will be
+// overwritten by output sections.
+template <class ELFT> void Writer<ELFT>::writeTrapInstr() {
+ if (Script->HasSectionsCommand)
+ return;
+ // Fill the last page.
+ uint8_t *Buf = Buffer->getBufferStart();
+ for (PhdrEntry *P : Phdrs)
+ if (P->p_type == PT_LOAD && (P->p_flags & PF_X))
+ fillTrap(Buf + alignDown(P->p_offset + P->p_filesz, Target->PageSize),
+ Buf + alignTo(P->p_offset + P->p_filesz, Target->PageSize));
+ // Round up the file size of the last segment to the page boundary iff it is
+ // an executable segment to ensure that other tools don't accidentally
+ // trim the instruction padding (e.g. when stripping the file).
+ PhdrEntry *Last = nullptr;
+ for (PhdrEntry *P : Phdrs)
+ if (P->p_type == PT_LOAD)
+ Last = P;
+ if (Last && (Last->p_flags & PF_X))
+ Last->p_memsz = Last->p_filesz = alignTo(Last->p_filesz, Target->PageSize);
+// Write section contents to a mmap'ed file.
+template <class ELFT> void Writer<ELFT>::writeSections() {
+ uint8_t *Buf = Buffer->getBufferStart();
+ OutputSection *EhFrameHdr = nullptr;
+ if (In.EhFrameHdr && !In.EhFrameHdr->empty())
+ EhFrameHdr = In.EhFrameHdr->getParent();
+ // In -r or -emit-relocs mode, write the relocation sections first as in
+ // ELf_Rel targets we might find out that we need to modify the relocated
+ // section while doing it.
+ for (OutputSection *Sec : OutputSections)
+ if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA)
+ Sec->writeTo<ELFT>(Buf + Sec->Offset);
+ for (OutputSection *Sec : OutputSections)
+ if (Sec != EhFrameHdr && Sec->Type != SHT_REL && Sec->Type != SHT_RELA)
+ Sec->writeTo<ELFT>(Buf + Sec->Offset);
+ // The .eh_frame_hdr depends on .eh_frame section contents, therefore
+ // it should be written after .eh_frame is written.
+ if (EhFrameHdr)
+ EhFrameHdr->writeTo<ELFT>(Buf + EhFrameHdr->Offset);
+template <class ELFT> void Writer<ELFT>::writeBuildId() {
+ if (!In.BuildId || !In.BuildId->getParent())
+ return;
+ // Compute a hash of all sections of the output file.
+ uint8_t *Start = Buffer->getBufferStart();
+ uint8_t *End = Start + FileSize;
+ In.BuildId->writeBuildId({Start, End});
+template void elf::writeResult<ELF32LE>();
+template void elf::writeResult<ELF32BE>();
+template void elf::writeResult<ELF64LE>();
+template void elf::writeResult<ELF64BE>();
diff --git a/contrib/llvm/tools/lld/ELF/Writer.h b/contrib/llvm/tools/lld/ELF/Writer.h
new file mode 100644
index 000000000000..7806f824c58f
--- /dev/null
+++ b/contrib/llvm/tools/lld/ELF/Writer.h
@@ -0,0 +1,64 @@
+//===- Writer.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+#include <memory>
+namespace lld {
+namespace elf {
+class InputFile;
+class OutputSection;
+class InputSectionBase;
+template <class ELFT> class ObjFile;
+class SymbolTable;
+template <class ELFT> void writeResult();
+// This describes a program header entry.
+// Each contains type, access flags and range of output sections that will be
+// placed in it.
+struct PhdrEntry {
+ PhdrEntry(unsigned Type, unsigned Flags) : p_type(Type), p_flags(Flags) {}
+ void add(OutputSection *Sec);
+ uint64_t p_paddr = 0;
+ uint64_t p_vaddr = 0;
+ uint64_t p_memsz = 0;
+ uint64_t p_filesz = 0;
+ uint64_t p_offset = 0;
+ uint32_t p_align = 0;
+ uint32_t p_type = 0;
+ uint32_t p_flags = 0;
+ OutputSection *FirstSec = nullptr;
+ OutputSection *LastSec = nullptr;
+ bool HasLMA = false;
+ uint64_t LMAOffset = 0;
+void addReservedSymbols();
+llvm::StringRef getOutputSectionName(const InputSectionBase *S);
+template <class ELFT> uint32_t calcMipsEFlags();
+uint8_t getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag,
+ llvm::StringRef FileName);
+bool isMipsN32Abi(const InputFile *F);
+bool isMicroMips();
+bool isMipsR6();
+} // namespace elf
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/FREEBSD-Xlist b/contrib/llvm/tools/lld/FREEBSD-Xlist
new file mode 100644
index 000000000000..4330bf30bcc4
--- /dev/null
+++ b/contrib/llvm/tools/lld/FREEBSD-Xlist
@@ -0,0 +1,7 @@
+# $FreeBSD$
diff --git a/contrib/llvm/tools/lld/LICENSE.TXT b/contrib/llvm/tools/lld/LICENSE.TXT
new file mode 100644
index 000000000000..09b8b616c227
--- /dev/null
+++ b/contrib/llvm/tools/lld/LICENSE.TXT
@@ -0,0 +1,62 @@
+lld License
+University of Illinois/NCSA
+Open Source License
+Copyright (c) 2011-2019 by the contributors listed in CREDITS.TXT
+All rights reserved.
+Developed by:
+ LLVM Team
+ University of Illinois at Urbana-Champaign
+ http://llvm.org
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the names of the LLVM Team, University of Illinois at
+ Urbana-Champaign, nor the names of its contributors may be used to
+ endorse or promote products derived from this Software without specific
+ prior written permission.
+The lld software contains code written by third parties. Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the lld Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+Program Directory
+------- ---------
+<none yet>
diff --git a/contrib/llvm/tools/lld/README.md b/contrib/llvm/tools/lld/README.md
new file mode 100644
index 000000000000..3b8cd7a14948
--- /dev/null
+++ b/contrib/llvm/tools/lld/README.md
@@ -0,0 +1,19 @@
+LLVM Linker (lld)
+This directory and its subdirectories contain source code for the LLVM Linker, a
+modular cross platform linker which is built as part of the LLVM compiler
+infrastructure project.
+lld is open source software. You may freely distribute it under the terms of
+the license agreement found in LICENSE.txt.
+In order to make sure various developers can evaluate patches over the
+same tests, we create a collection of self contained programs.
+It is hosted at https://s3-us-west-2.amazonaws.com/linker-tests/lld-speed-test.tar.xz
+The current sha256 is 10eec685463d5a8bbf08d77f4ca96282161d396c65bd97dc99dbde644a31610f.
diff --git a/contrib/llvm/tools/lld/docs/AtomLLD.rst b/contrib/llvm/tools/lld/docs/AtomLLD.rst
new file mode 100644
index 000000000000..614e568d1997
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/AtomLLD.rst
@@ -0,0 +1,62 @@
+ATOM-based lld
+Note: this document discuss Mach-O port of LLD. For ELF and COFF,
+see :doc:`index`.
+ATOM-based lld is a new set of modular code for creating linker tools.
+Currently it supports Mach-O.
+* End-User Features:
+ * Compatible with existing linker options
+ * Reads standard Object Files
+ * Writes standard Executable Files
+ * Remove clang's reliance on "the system linker"
+ * Uses the LLVM `"UIUC" BSD-Style license`__.
+* Applications:
+ * Modular design
+ * Support cross linking
+ * Easy to add new CPU support
+ * Can be built as static tool or library
+* Design and Implementation:
+ * Extensive unit tests
+ * Internal linker model can be dumped/read to textual format
+ * Additional linking features can be plugged in as "passes"
+ * OS specific and CPU specific code factored out
+Why a new linker?
+The fact that clang relies on whatever linker tool you happen to have installed
+means that clang has been very conservative adopting features which require a
+recent linker.
+In the same way that the MC layer of LLVM has removed clang's reliance on the
+system assembler tool, the lld project will remove clang's reliance on the
+system linker tool.
+.. toctree::
+ :maxdepth: 2
+ design
+ getting_started
+ development
+ open_projects
+ sphinx_intro
+Indices and tables
+* :ref:`genindex`
+* :ref:`search`
+__ http://llvm.org/docs/DeveloperPolicy.html#license
diff --git a/contrib/llvm/tools/lld/docs/CMakeLists.txt b/contrib/llvm/tools/lld/docs/CMakeLists.txt
new file mode 100644
index 000000000000..112ce35e8cf4
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/CMakeLists.txt
@@ -0,0 +1,8 @@
+ include(AddSphinxTarget)
+ add_sphinx_target(html lld)
+ endif()
+ endif()
diff --git a/contrib/llvm/tools/lld/docs/Driver.rst b/contrib/llvm/tools/lld/docs/Driver.rst
new file mode 100644
index 000000000000..4ee6ce0c985f
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/Driver.rst
@@ -0,0 +1,82 @@
+Note: this document discuss Mach-O port of LLD. For ELF and COFF,
+see :doc:`index`.
+.. contents::
+ :local:
+This document describes the lld driver. The purpose of this document is to
+describe both the motivation and design goals for the driver, as well as details
+of the internal implementation.
+The lld driver is designed to support a number of different command line
+interfaces. The main interfaces we plan to support are binutils' ld, Apple's
+ld, and Microsoft's link.exe.
+Each of these different interfaces is referred to as a flavor. There is also an
+extra flavor "core" which is used to exercise the core functionality of the
+linker it the test suite.
+* gnu
+* darwin
+* link
+* core
+Selecting a Flavor
+There are two different ways to tell lld which flavor to be. They are checked in
+order, so the second overrides the first. The first is to symlink :program:`lld`
+as :program:`lld-{flavor}` or just :program:`{flavor}`. You can also specify
+it as the first command line argument using ``-flavor``::
+ $ lld -flavor gnu
+There is a shortcut for ``-flavor core`` as ``-core``.
+Adding an Option to an existing Flavor
+#. Add the option to the desired :file:`lib/Driver/{flavor}Options.td`.
+#. Add to :cpp:class:`lld::FlavorLinkingContext` a getter and setter method
+ for the option.
+#. Modify :cpp:func:`lld::FlavorDriver::parse` in :file:
+ `lib/Driver/{Flavor}Driver.cpp` to call the targetInfo setter
+ for corresponding to the option.
+#. Modify {Flavor}Reader and {Flavor}Writer to use the new targtInfo option.
+Adding a Flavor
+#. Add an entry for the flavor in :file:`include/lld/Common/Driver.h` to
+ :cpp:class:`lld::UniversalDriver::Flavor`.
+#. Add an entry in :file:`lib/Driver/UniversalDriver.cpp` to
+ :cpp:func:`lld::Driver::strToFlavor` and
+ :cpp:func:`lld::UniversalDriver::link`.
+ This allows the flavor to be selected via symlink and `-flavor`.
+#. Add a tablegen file called :file:`lib/Driver/{flavor}Options.td` that
+ describes the options. If the options are a superset of another driver, that
+ driver's td file can simply be included. The :file:`{flavor}Options.td` file
+ must also be added to :file:`lib/Driver/CMakeLists.txt`.
+#. Add a ``{flavor}Driver`` as a subclass of :cpp:class:`lld::Driver`
+ in :file:`lib/Driver/{flavor}Driver.cpp`.
diff --git a/contrib/llvm/tools/lld/docs/NewLLD.rst b/contrib/llvm/tools/lld/docs/NewLLD.rst
new file mode 100644
index 000000000000..79bdf90c6ccd
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/NewLLD.rst
@@ -0,0 +1,309 @@
+The ELF, COFF and Wasm Linkers
+The ELF Linker as a Library
+You can embed LLD to your program by linking against it and calling the linker's
+entry point function lld::elf::link.
+The current policy is that it is your reponsibility to give trustworthy object
+files. The function is guaranteed to return as long as you do not pass corrupted
+or malicious object files. A corrupted file could cause a fatal error or SEGV.
+That being said, you don't need to worry too much about it if you create object
+files in the usual way and give them to the linker. It is naturally expected to
+work, or otherwise it's a linker's bug.
+We will describe the design of the linkers in the rest of the document.
+Key Concepts
+Linkers are fairly large pieces of software.
+There are many design choices you have to make to create a complete linker.
+This is a list of design choices we've made for ELF and COFF LLD.
+We believe that these high-level design choices achieved a right balance
+between speed, simplicity and extensibility.
+* Implement as native linkers
+ We implemented the linkers as native linkers for each file format.
+ The linkers share the same design but share very little code.
+ Sharing code makes sense if the benefit is worth its cost.
+ In our case, the object formats are different enough that we thought the layer
+ to abstract the differences wouldn't be worth its complexity and run-time
+ cost. Elimination of the abstract layer has greatly simplified the
+ implementation.
+* Speed by design
+ One of the most important things in archiving high performance is to
+ do less rather than do it efficiently.
+ Therefore, the high-level design matters more than local optimizations.
+ Since we are trying to create a high-performance linker,
+ it is very important to keep the design as efficient as possible.
+ Broadly speaking, we do not do anything until we have to do it.
+ For example, we do not read section contents or relocations
+ until we need them to continue linking.
+ When we need to do some costly operation (such as looking up
+ a hash table for each symbol), we do it only once.
+ We obtain a handle (which is typically just a pointer to actual data)
+ on the first operation and use it throughout the process.
+* Efficient archive file handling
+ LLD's handling of archive files (the files with ".a" file extension) is
+ different from the traditional Unix linkers and similar to Windows linkers.
+ We'll describe how the traditional Unix linker handles archive files, what the
+ problem is, and how LLD approached the problem.
+ The traditional Unix linker maintains a set of undefined symbols during
+ linking. The linker visits each file in the order as they appeared in the
+ command line until the set becomes empty. What the linker would do depends on
+ file type.
+ - If the linker visits an object file, the linker links object files to the
+ result, and undefined symbols in the object file are added to the set.
+ - If the linker visits an archive file, it checks for the archive file's
+ symbol table and extracts all object files that have definitions for any
+ symbols in the set.
+ This algorithm sometimes leads to a counter-intuitive behavior. If you give
+ archive files before object files, nothing will happen because when the linker
+ visits archives, there is no undefined symbols in the set. As a result, no
+ files are extracted from the first archive file, and the link is done at that
+ point because the set is empty after it visits one file.
+ You can fix the problem by reordering the files,
+ but that cannot fix the issue of mutually-dependent archive files.
+ Linking mutually-dependent archive files is tricky. You may specify the same
+ archive file multiple times to let the linker visit it more than once. Or,
+ you may use the special command line options, `--start-group` and
+ `--end-group`, to let the linker loop over the files between the options until
+ no new symbols are added to the set.
+ Visiting the same archive files multiple times makes the linker slower.
+ Here is how LLD approaches the problem. Instead of memorizing only undefined
+ symbols, we program LLD so that it memorizes all symbols. When it sees an
+ undefined symbol that can be resolved by extracting an object file from an
+ archive file it previously visited, it immediately extracts the file and links
+ it. It is doable because LLD does not forget symbols it has seen in archive
+ files.
+ We believe that LLD's way is efficient and easy to justify.
+ The semantics of LLD's archive handling are different from the traditional
+ Unix's. You can observe it if you carefully craft archive files to exploit
+ it. However, in reality, we don't know any program that cannot link with our
+ algorithm so far, so it's not going to cause trouble.
+Numbers You Want to Know
+To give you intuition about what kinds of data the linker is mainly working on,
+I'll give you the list of objects and their numbers LLD has to read and process
+in order to link a very large executable. In order to link Chrome with debug
+info, which is roughly 2 GB in output size, LLD reads
+- 17,000 files,
+- 1,800,000 sections,
+- 6,300,000 symbols, and
+- 13,000,000 relocations.
+LLD produces the 2 GB executable in 15 seconds.
+These numbers vary depending on your program, but in general,
+you have a lot of relocations and symbols for each file.
+If your program is written in C++, symbol names are likely to be
+pretty long because of name mangling.
+It is important to not waste time on relocations and symbols.
+In the above case, the total amount of symbol strings is 450 MB,
+and inserting all of them to a hash table takes 1.5 seconds.
+Therefore, if you causally add a hash table lookup for each symbol,
+it would slow down the linker by 10%. So, don't do that.
+On the other hand, you don't have to pursue efficiency
+when handling files.
+Important Data Structures
+We will describe the key data structures in LLD in this section. The linker can
+be understood as the interactions between them. Once you understand their
+functions, the code of the linker should look obvious to you.
+* Symbol
+ This class represents a symbol.
+ They are created for symbols in object files or archive files.
+ The linker creates linker-defined symbols as well.
+ There are basically three types of Symbols: Defined, Undefined, or Lazy.
+ - Defined symbols are for all symbols that are considered as "resolved",
+ including real defined symbols, COMDAT symbols, common symbols,
+ absolute symbols, linker-created symbols, etc.
+ - Undefined symbols represent undefined symbols, which need to be replaced by
+ Defined symbols by the resolver until the link is complete.
+ - Lazy symbols represent symbols we found in archive file headers
+ which can turn into Defined if we read archive members.
+ There's only one Symbol instance for each unique symbol name. This uniqueness
+ is guaranteed by the symbol table. As the resolver reads symbols from input
+ files, it replaces an existing Symbol with the "best" Symbol for its symbol
+ name using the placement new.
+ The above mechanism allows you to use pointers to Symbols as a very cheap way
+ to access name resolution results. Assume for example that you have a pointer
+ to an undefined symbol before name resolution. If the symbol is resolved to a
+ defined symbol by the resolver, the pointer will "automatically" point to the
+ defined symbol, because the undefined symbol the pointer pointed to will have
+ been replaced by the defined symbol in-place.
+* SymbolTable
+ SymbolTable is basically a hash table from strings to Symbols
+ with logic to resolve symbol conflicts. It resolves conflicts by symbol type.
+ - If we add Defined and Undefined symbols, the symbol table will keep the
+ former.
+ - If we add Defined and Lazy symbols, it will keep the former.
+ - If we add Lazy and Undefined, it will keep the former,
+ but it will also trigger the Lazy symbol to load the archive member
+ to actually resolve the symbol.
+* Chunk (COFF specific)
+ Chunk represents a chunk of data that will occupy space in an output.
+ Each regular section becomes a chunk.
+ Chunks created for common or BSS symbols are not backed by sections.
+ The linker may create chunks to append additional data to an output as well.
+ Chunks know about their size, how to copy their data to mmap'ed outputs,
+ and how to apply relocations to them.
+ Specifically, section-based chunks know how to read relocation tables
+ and how to apply them.
+* InputSection (ELF specific)
+ Since we have less synthesized data for ELF, we don't abstract slices of
+ input files as Chunks for ELF. Instead, we directly use the input section
+ as an internal data type.
+ InputSection knows about their size and how to copy themselves to
+ mmap'ed outputs, just like COFF Chunks.
+* OutputSection
+ OutputSection is a container of InputSections (ELF) or Chunks (COFF).
+ An InputSection or Chunk belongs to at most one OutputSection.
+There are mainly three actors in this linker.
+* InputFile
+ InputFile is a superclass of file readers.
+ We have a different subclass for each input file type,
+ such as regular object file, archive file, etc.
+ They are responsible for creating and owning Symbols and InputSections/Chunks.
+* Writer
+ The writer is responsible for writing file headers and InputSections/Chunks to
+ a file. It creates OutputSections, put all InputSections/Chunks into them,
+ assign unique, non-overlapping addresses and file offsets to them, and then
+ write them down to a file.
+* Driver
+ The linking process is driven by the driver. The driver:
+ - processes command line options,
+ - creates a symbol table,
+ - creates an InputFile for each input file and puts all symbols within into
+ the symbol table,
+ - checks if there's no remaining undefined symbols,
+ - creates a writer,
+ - and passes the symbol table to the writer to write the result to a file.
+Link-Time Optimization
+LTO is implemented by handling LLVM bitcode files as object files.
+The linker resolves symbols in bitcode files normally. If all symbols
+are successfully resolved, it then runs LLVM passes
+with all bitcode files to convert them to one big regular ELF/COFF file.
+Finally, the linker replaces bitcode symbols with ELF/COFF symbols,
+so that they are linked as if they were in the native format from the beginning.
+The details are described in this document.
+ Short for Relative Virtual Address.
+ Windows executables or DLLs are not position-independent; they are
+ linked against a fixed address called an image base. RVAs are
+ offsets from an image base.
+ Default image bases are 0x140000000 for executables and 0x18000000
+ for DLLs. For example, when we are creating an executable, we assume
+ that the executable will be loaded at address 0x140000000 by the
+ loader, so we apply relocations accordingly. Result texts and data
+ will contain raw absolute addresses.
+* VA
+ Short for Virtual Address. For COFF, it is equivalent to RVA + image base.
+* Base relocations (COFF)
+ Relocation information for the loader. If the loader decides to map
+ an executable or a DLL to a different address than their image
+ bases, it fixes up binaries using information contained in the base
+ relocation table. A base relocation table consists of a list of
+ locations containing addresses. The loader adds a difference between
+ RVA and actual load address to all locations listed there.
+ Note that this run-time relocation mechanism is much simpler than ELF.
+ There's no PLT or GOT. Images are relocated as a whole just
+ by shifting entire images in memory by some offsets. Although doing
+ this breaks text sharing, I think this mechanism is not actually bad
+ on today's computers.
+* ICF
+ Short for Identical COMDAT Folding (COFF) or Identical Code Folding (ELF).
+ ICF is an optimization to reduce output size by merging read-only sections
+ by not only their names but by their contents. If two read-only sections
+ happen to have the same metadata, actual contents and relocations,
+ they are merged by ICF. It is known as an effective technique,
+ and it usually reduces C++ program's size by a few percent or more.
+ Note that this is not an entirely sound optimization. C/C++ require
+ different functions have different addresses. If a program depends on
+ that property, it would fail at runtime.
+ On Windows, that's not really an issue because MSVC link.exe enabled
+ the optimization by default. As long as your program works
+ with the linker's default settings, your program should be safe with ICF.
+ On Unix, your program is generally not guaranteed to be safe with ICF,
+ although large programs happen to work correctly.
+ LLD works fine with ICF for example.
diff --git a/contrib/llvm/tools/lld/docs/README.txt b/contrib/llvm/tools/lld/docs/README.txt
new file mode 100644
index 000000000000..2ed016639de7
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/README.txt
@@ -0,0 +1,9 @@
+lld Documentation
+The lld documentation is written using the Sphinx documentation generator. It is
+currently tested with Sphinx 1.1.3.
+We currently use the 'nature' theme and a Beaker inspired structure.
+See sphinx_intro.rst for more details.
diff --git a/contrib/llvm/tools/lld/docs/Readers.rst b/contrib/llvm/tools/lld/docs/Readers.rst
new file mode 100644
index 000000000000..eae1717f6e5b
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/Readers.rst
@@ -0,0 +1,174 @@
+.. _Readers:
+Developing lld Readers
+Note: this document discuss Mach-O port of LLD. For ELF and COFF,
+see :doc:`index`.
+The purpose of a "Reader" is to take an object file in a particular format
+and create an `lld::File`:cpp:class: (which is a graph of Atoms)
+representing the object file. A Reader inherits from
+`lld::Reader`:cpp:class: which lives in
+:file:`include/lld/Core/Reader.h` and
+The Reader infrastructure for an object format ``Foo`` requires the
+following pieces in order to fit into lld:
+ .. cpp:class:: ReaderOptionsFoo : public ReaderOptions
+ This Options class is the only way to configure how the Reader will
+ parse any file into an `lld::Reader`:cpp:class: object. This class
+ should be declared in the `lld`:cpp:class: namespace.
+ .. cpp:function:: Reader *createReaderFoo(ReaderOptionsFoo &reader)
+ This factory function configures and create the Reader. This function
+ should be declared in the `lld`:cpp:class: namespace.
+ .. cpp:class:: ReaderFoo : public Reader
+ This is the concrete Reader class which can be called to parse
+ object files. It should be declared in an anonymous namespace or
+ if there is shared code with the `lld::WriterFoo`:cpp:class: you
+ can make a nested namespace (e.g. `lld::foo`:cpp:class:).
+You may have noticed that :cpp:class:`ReaderFoo` is not declared in the
+``.h`` file. An important design aspect of lld is that all Readers are
+created *only* through an object-format-specific
+:cpp:func:`createReaderFoo` factory function. The creation of the Reader is
+parametrized through a :cpp:class:`ReaderOptionsFoo` class. This options
+class is the one-and-only way to control how the Reader operates when
+parsing an input file into an Atom graph. For instance, you may want the
+Reader to only accept certain architectures. The options class can be
+instantiated from command line options or be programmatically configured.
+Where to start
+The lld project already has a skeleton of source code for Readers for
+``ELF``, ``PECOFF``, ``MachO``, and lld's native ``YAML`` graph format.
+If your file format is a variant of one of those, you should modify the
+existing Reader to support your variant. This is done by customizing the Options
+class for the Reader and making appropriate changes to the ``.cpp`` file to
+interpret those options and act accordingly.
+If your object file format is not a variant of any existing Reader, you'll need
+to create a new Reader subclass with the organization described above.
+Readers are factories
+The linker will usually only instantiate your Reader once. That one Reader will
+have its loadFile() method called many times with different input files.
+To support multithreaded linking, the Reader may be parsing multiple input
+files in parallel. Therefore, there should be no parsing state in you Reader
+object. Any parsing state should be in ivars of your File subclass or in
+some temporary object.
+The key function to implement in a reader is::
+ virtual error_code loadFile(LinkerInput &input,
+ std::vector<std::unique_ptr<File>> &result);
+It takes a memory buffer (which contains the contents of the object file
+being read) and returns an instantiated lld::File object which is
+a collection of Atoms. The result is a vector of File pointers (instead of
+simple a File pointer) because some file formats allow multiple object
+"files" to be encoded in one file system file.
+Memory Ownership
+Atoms are always owned by their File object. During core linking when Atoms
+are coalesced or stripped away, core linking does not delete them.
+Core linking just removes those unused Atoms from its internal list.
+The destructor of a File object is responsible for deleting all Atoms it
+owns, and if ownership of the MemoryBuffer was passed to it, the File
+destructor needs to delete that too.
+Making Atoms
+The internal model of lld is purely Atom based. But most object files do not
+have an explicit concept of Atoms, instead most have "sections". The way
+to think of this is that a section is just a list of Atoms with common
+The first step in parsing section-based object files is to cleave each
+section into a list of Atoms. The technique may vary by section type. For
+code sections (e.g. .text), there are usually symbols at the start of each
+function. Those symbol addresses are the points at which the section is
+cleaved into discrete Atoms. Some file formats (like ELF) also include the
+length of each symbol in the symbol table. Otherwise, the length of each
+Atom is calculated to run to the start of the next symbol or the end of the
+Other sections types can be implicitly cleaved. For instance c-string literals
+or unwind info (e.g. .eh_frame) can be cleaved by having the Reader look at
+the content of the section. It is important to cleave sections into Atoms
+to remove false dependencies. For instance the .eh_frame section often
+has no symbols, but contains "pointers" to the functions for which it
+has unwind info. If the .eh_frame section was not cleaved (but left as one
+big Atom), there would always be a reference (from the eh_frame Atom) to
+each function. So the linker would be unable to coalesce or dead stripped
+away the function atoms.
+The lld Atom model also requires that a reference to an undefined symbol be
+modeled as a Reference to an UndefinedAtom. So the Reader also needs to
+create an UndefinedAtom for each undefined symbol in the object file.
+Once all Atoms have been created, the second step is to create References
+(recall that Atoms are "nodes" and References are "edges"). Most References
+are created by looking at the "relocation records" in the object file. If
+a function contains a call to "malloc", there is usually a relocation record
+specifying the address in the section and the symbol table index. Your
+Reader will need to convert the address to an Atom and offset and the symbol
+table index into a target Atom. If "malloc" is not defined in the object file,
+the target Atom of the Reference will be an UndefinedAtom.
+Once you have the above working to parse an object file into Atoms and
+References, you'll want to look at performance. Some techniques that can
+help performance are:
+* Use llvm::BumpPtrAllocator or pre-allocate one big vector<Reference> and then
+ just have each atom point to its subrange of References in that vector.
+ This can be faster that allocating each Reference as separate object.
+* Pre-scan the symbol table and determine how many atoms are in each section
+ then allocate space for all the Atom objects at once.
+* Don't copy symbol names or section content to each Atom, instead use
+ StringRef and ArrayRef in each Atom to point to its name and content in the
+ MemoryBuffer.
+We are still working on infrastructure to test Readers. The issue is that
+you don't want to check in binary files to the test suite. And the tools
+for creating your object file from assembly source may not be available on
+every OS.
+We are investigating a way to use YAML to describe the section, symbols,
+and content of a file. Then have some code which will write out an object
+file from that YAML description.
+Once that is in place, you can write test cases that contain section/symbols
+YAML and is run through the linker to produce Atom/References based YAML which
+is then run through FileCheck to verify the Atoms and References are as
diff --git a/contrib/llvm/tools/lld/docs/ReleaseNotes.rst b/contrib/llvm/tools/lld/docs/ReleaseNotes.rst
new file mode 100644
index 000000000000..0bebfb3fb1ce
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/ReleaseNotes.rst
@@ -0,0 +1,126 @@
+lld 8.0.0 Release Notes
+.. contents::
+ :local:
+.. warning::
+ These are in-progress notes for the upcoming LLVM 8.0.0 release.
+ Release notes for previous releases can be found on
+ `the Download Page <https://releases.llvm.org/download.html>`_.
+lld is a high-performance linker that supports ELF (Unix), COFF (Windows),
+Mach-O (macOS), MinGW and WebAssembly. lld is command-line-compatible with
+GNU linkers and Microsoft link.exe and is significantly faster than the
+system default linkers.
+nlld 8.0.0 has lots of feature improvements and bug fixes.
+Non-comprehensive list of changes in this release
+ELF Improvements
+* lld now supports RISC-V. (`r339364
+ <https://reviews.llvm.org/rL339364>`_)
+* Default image base address has changed from 65536 to 2 MiB for i386
+ and 4 MiB for AArch64 to make lld-generated executables work better
+ with automatic superpage promotion. FreeBSD can promote contiguous
+ non-superpages to a superpage if they are aligned to the superpage
+ size. (`r342746 <https://reviews.llvm.org/rL342746>`_)
+* lld now attempts to place a ``.note`` segment in the first page of a
+ generated file, so that you can find some important information
+ (``.note.gnu.build-id`` in particular) in a core file even if a core
+ file is truncated by ulimit.
+ (`r349524 <https://reviews.llvm.org/rL349524>`_)
+* lld now reports an error if ``_GLOBAL_OFFSET_TABLE_`` symbol is
+ defined by an input object file, as the symbol is supposed to be
+ synthesized by the linker.
+ (`r347854 <https://reviews.llvm.org/rL347854>`_)
+* lld/Hexagon can now link Linux kernel and musl libc for Qualcomm
+ Hexagon ISA.
+* Initial MSP430 ISA support has landed.
+* lld now uses the ``sigrie`` instruction as a trap instruction for
+ MIPS targets.
+* lld now creates a TLS segment for AArch64 with a slightly larger
+ alignment requirement, so that the loader makes a few bytes room
+ before each TLS segment at runtime. The aim of this change is to
+ make room to accomodate nonstandard Android TLS slots while keeping
+ the compatibility with the standard AArch64 ABI.
+ (`r350681 <https://reviews.llvm.org/rL350681>`_)
+* The following flags have been added: ``--call-graph-profile``,
+ ``--no-call-graph-profile``, ``--warn-ifunc-textrel``,
+ ``-z interpose``, ``-z global``, ``-z nodefaultlib``
+COFF Improvements
+* PDB GUID is set to hash of PDB contents instead to a random byte
+ sequence for build reproducibility.
+* ``/pdbsourcepath:`` is now also used to make ``"cwd"``, ``"exe"``, ``"pdb"``
+ in the env block of PDB outputs absolute if they are relative, and to make
+ paths to obj files referenced in PDB outputs absolute if they are relative.
+ Together with the previous item, this makes it possible to generate
+ executables and PDBs that are fully deterministic and independent of the
+ absolute path to the build directory, so that different machines building
+ the same code in different directories can produce exactly the same output.
+* The following flags have been added: ``/force:multiple``
+* lld now can link against import libraries produced by GNU tools.
+* lld can create thunks for ARM and ARM64, to allow linking larger images
+ (over 16 MB for ARM and over 128 MB for ARM64)
+* Several speed and memory usage improvements.
+* lld now creates debug info for typedefs.
+* lld can now link obj files produced by ``cl.exe /Z7 /Yc``.
+* lld now understands ``%_PDB%`` and ``%_EXT%`` in ``/pdbaltpath:``.
+* Undefined symbols are now printed in demangled form in addition to raw form.
+MinGW Improvements
+* lld can now automatically import data variables from DLLs without the
+ use of the dllimport attribute.
+* lld can now use existing normal MinGW sysroots with import libraries and
+ CRT startup object files for GNU binutils. lld can handle most object
+ files produced by GCC, and thus works as a drop-in replacement for
+ ld.bfd in such environments. (There are known issues with linking crtend.o
+ from GCC in setups with DWARF exceptions though, where object files are
+ linked in a different order than with GNU ld, inserting a DWARF exception
+ table terminator too early.)
+* lld now supports COFF embedded directives for linking to nondefault
+ libraries, just like for the normal COFF target.
+* Actually generate a codeview build id signature, even if not creating a PDB.
+ Previously, the ``--build-id`` option did not actually generate a build id
+ unless ``--pdb`` was specified.
+WebAssembly Improvements
+* Add initial support for creating shared libraries (-shared).
+ Note: The shared library format is still under active development and may
+ undergo significant changes in future versions.
+ See: https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
diff --git a/contrib/llvm/tools/lld/docs/WebAssembly.rst b/contrib/llvm/tools/lld/docs/WebAssembly.rst
new file mode 100644
index 000000000000..424c1a10c7e7
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/WebAssembly.rst
@@ -0,0 +1,114 @@
+WebAssembly lld port
+The WebAssembly version of lld takes WebAssembly binaries as inputs and produces
+a WebAssembly binary as its output. For the most part it tries to mimic the
+behaviour of traditional ELF linkers and specifically the ELF lld port. Where
+possible that command line flags and the semantics should be the same.
+Object file format
+The format the input object files that lld expects is specified as part of the
+the WebAssembly tool conventions
+This is object format that the llvm will produce when run with the
+``wasm32-unknown-unknown`` target. To build llvm with WebAssembly support
+currently requires enabling the experimental backed using
+The WebAssembly version of lld is installed as **wasm-ld**. It shared many
+common linker flags with **ld.lld** but also includes several
+WebAssembly-specific options:
+.. option:: --no-entry
+ Don't search for the entry point symbol (by default ``_start``).
+.. option:: --export-table
+ Export the function table to the environment.
+.. option:: --import-table
+ Import the function table from the environment.
+.. option:: --export-all
+ Export all symbols (normally combined with --no-gc-sections)
+.. option:: --export-dynamic
+ When building an executable, export any non-hidden symbols. By default only
+ the entry point and any symbols marked with --export/--export-all are
+ exported.
+.. option:: --global-base=<value>
+ Address at which to place global data.
+.. option:: --no-merge-data-segments
+ Disable merging of data segments.
+.. option:: --stack-first
+ Place stack at start of linear memory rather than after data.
+.. option:: --compress-relocations
+ Relocation targets in the code section 5-bytes wide in order to potentially
+ occomate the largest LEB128 value. This option will cause the linker to
+ shirnk the code section to remove any padding from the final output. However
+ because it effects code offset, this option is not comatible with outputing
+ debug information.
+.. option:: --allow-undefined
+ Allow undefined symbols in linked binary.
+.. option:: --import-memory
+ Import memory from the environment.
+.. option:: --initial-memory=<value>
+ Initial size of the linear memory. Default: static data size.
+.. option:: --max-memory=<value>
+ Maximum size of the linear memory. Default: unlimited.
+By default the function table is neither imported nor exported, but defined
+for internal use only.
+When building shared libraries symbols are exported if they are marked
+as ``visibility=default``. When building executables only the entry point is
+exported by default. In addition any symbol included on the command line via
+``--export`` is also exported.
+Since WebAssembly is designed with size in mind the linker defaults to
+``--gc-sections`` which means that all unused functions and data segments will
+be stripped from the binary.
+The symbols which are preserved by default are:
+- The entry point (by default ``_start``).
+- Any symbol which is to be exported.
+- Any symbol transitively referenced by the above.
+Missing features
+- Merging of data section similar to ``SHF_MERGE`` in the ELF world is not
+ supported.
+- No support for creating shared libraries. The spec for shared libraries in
+ WebAssembly is still in flux:
+ https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
diff --git a/contrib/llvm/tools/lld/docs/_static/favicon.ico b/contrib/llvm/tools/lld/docs/_static/favicon.ico
new file mode 100644
index 000000000000..724ad6e12dd4
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/_static/favicon.ico
Binary files differ
diff --git a/contrib/llvm/tools/lld/docs/_templates/indexsidebar.html b/contrib/llvm/tools/lld/docs/_templates/indexsidebar.html
new file mode 100644
index 000000000000..588be9309bde
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/_templates/indexsidebar.html
@@ -0,0 +1,4 @@
+<p>lld bugs should be reported at the
+ LLVM <a href="https://bugs.llvm.org/">Bugzilla</a>.</p>
diff --git a/contrib/llvm/tools/lld/docs/_templates/layout.html b/contrib/llvm/tools/lld/docs/_templates/layout.html
new file mode 100644
index 000000000000..519a24bce63a
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/_templates/layout.html
@@ -0,0 +1,12 @@
+{% extends "!layout.html" %}
+{% block extrahead %}
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+{% endblock %}
+{% block rootrellink %}
+ <li><a href="{{ pathto('index') }}">lld Home</a>&nbsp;|&nbsp;</li>
+{% endblock %}
diff --git a/contrib/llvm/tools/lld/docs/conf.py b/contrib/llvm/tools/lld/docs/conf.py
new file mode 100644
index 000000000000..62404b275450
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/conf.py
@@ -0,0 +1,255 @@
+# -*- coding: utf-8 -*-
+# lld documentation build configuration file.
+# This file is execfile()d with the current directory set to its containing dir.
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+import sys, os
+from datetime import date
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+# -- General configuration -----------------------------------------------------
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo']
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+# The suffix of source filenames.
+source_suffix = '.rst'
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+# The master toctree document.
+master_doc = 'index'
+# General information about the project.
+project = u'lld'
+copyright = u'2011-%d, LLVM Project' % date.today().year
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+# The short version.
+version = '8'
+# The full version, including alpha/beta/rc tags.
+release = '8'
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+today_fmt = '%Y-%m-%d'
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+show_authors = True
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'friendly'
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+# -- Options for HTML output ---------------------------------------------------
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'llvm-theme'
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ["."]
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+# If given, this must be the name of an image file (path relative to the
+# configuration directory) that is the favicon of the docs. Modern browsers use
+# this as icon for tabs, windows and bookmarks. It should be a Windows-style
+# icon file (.ico), which is 16x16 or 32x32 pixels large. Default: None. The
+# image file will be copied to the _static directory of the output HTML, but
+# only if the file does not already exist there.
+html_favicon = '_static/favicon.ico'
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+html_last_updated_fmt = '%Y-%m-%d'
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+# Custom sidebar templates, maps document names to template names.
+html_sidebars = {'index': 'indexsidebar.html'}
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+# html_additional_pages = {'index': 'index.html'}
+# If false, no module index is generated.
+#html_domain_indices = True
+# If false, no index is generated.
+#html_use_index = True
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+# If true, links to the reST sources are added to the pages.
+html_show_sourcelink = True
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'llddoc'
+# -- Options for LaTeX output --------------------------------------------------
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('contents', 'lld.tex', u'lld Documentation',
+ u'LLVM project', 'manual'),
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+# If false, no module index is generated.
+#latex_domain_indices = True
+# -- Options for manual page output --------------------------------------------
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('contents', 'lld', u'lld Documentation',
+ [u'LLVM project'], 1)
+# If true, show URL addresses after external links.
+#man_show_urls = False
+# -- Options for Texinfo output ------------------------------------------------
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ ('contents', 'lld', u'lld Documentation',
+ u'LLVM project', 'lld', 'One line description of project.',
+ 'Miscellaneous'),
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+# FIXME: Define intersphinx configration.
+intersphinx_mapping = {}
+# -- Options for extensions ----------------------------------------------------
+# Enable this if you want TODOs to show up in the generated documentation.
+todo_include_todos = True
diff --git a/contrib/llvm/tools/lld/docs/design.rst b/contrib/llvm/tools/lld/docs/design.rst
new file mode 100644
index 000000000000..1e111f979bb5
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/design.rst
@@ -0,0 +1,421 @@
+.. _design:
+Linker Design
+Note: this document discuss Mach-O port of LLD. For ELF and COFF,
+see :doc:`index`.
+lld is a new generation of linker. It is not "section" based like traditional
+linkers which mostly just interlace sections from multiple object files into the
+output file. Instead, lld is based on "Atoms". Traditional section based
+linking work well for simple linking, but their model makes advanced linking
+features difficult to implement. Features like dead code stripping, reordering
+functions for locality, and C++ coalescing require the linker to work at a finer
+An atom is an indivisible chunk of code or data. An atom has a set of
+attributes, such as: name, scope, content-type, alignment, etc. An atom also
+has a list of References. A Reference contains: a kind, an optional offset, an
+optional addend, and an optional target atom.
+The Atom model allows the linker to use standard graph theory models for linking
+data structures. Each atom is a node, and each Reference is an edge. The
+feature of dead code stripping is implemented by following edges to mark all
+live atoms, and then delete the non-live atoms.
+Atom Model
+An atom is an indivisible chunk of code or data. Typically each user written
+function or global variable is an atom. In addition, the compiler may emit
+other atoms, such as for literal c-strings or floating point constants, or for
+runtime data structures like dwarf unwind info or pointers to initializers.
+A simple "hello world" object file would be modeled like this:
+.. image:: hello.png
+There are three atoms: main, a proxy for printf, and an anonymous atom
+containing the c-string literal "hello world". The Atom "main" has two
+references. One is the call site for the call to printf, and the other is a
+reference for the instruction that loads the address of the c-string literal.
+There are only four different types of atoms:
+ * DefinedAtom
+ 95% of all atoms. This is a chunk of code or data
+ * UndefinedAtom
+ This is a place holder in object files for a reference to some atom
+ outside the translation unit.During core linking it is usually replaced
+ by (coalesced into) another Atom.
+ * SharedLibraryAtom
+ If a required symbol name turns out to be defined in a dynamic shared
+ library (and not some object file). A SharedLibraryAtom is the
+ placeholder Atom used to represent that fact.
+ It is similar to an UndefinedAtom, but it also tracks information
+ about the associated shared library.
+ * AbsoluteAtom
+ This is for embedded support where some stuff is implemented in ROM at
+ some fixed address. This atom has no content. It is just an address
+ that the Writer needs to fix up any references to point to.
+File Model
+The linker views the input files as basically containers of Atoms and
+References, and just a few attributes of their own. The linker works with three
+kinds of files: object files, static libraries, and dynamic shared libraries.
+Each kind of file has reader object which presents the file in the model
+expected by the linker.
+Object File
+An object file is just a container of atoms. When linking an object file, a
+reader is instantiated which parses the object file and instantiates a set of
+atoms representing all content in the .o file. The linker adds all those atoms
+to a master graph.
+Static Library (Archive)
+This is the traditional unix static archive which is just a collection of object
+files with a "table of contents". When linking with a static library, by default
+nothing is added to the master graph of atoms. Instead, if after merging all
+atoms from object files into a master graph, if any "undefined" atoms are left
+remaining in the master graph, the linker reads the table of contents for each
+static library to see if any have the needed definitions. If so, the set of
+atoms from the specified object file in the static library is added to the
+master graph of atoms.
+Dynamic Library (Shared Object)
+Dynamic libraries are different than object files and static libraries in that
+they don't directly add any content. Their purpose is to check at build time
+that the remaining undefined references can be resolved at runtime, and provide
+a list of dynamic libraries (SO_NEEDED) that will be needed at runtime. The way
+this is modeled in the linker is that a dynamic library contributes no atoms to
+the initial graph of atoms. Instead, (like static libraries) if there are
+"undefined" atoms in the master graph of all atoms, then each dynamic library is
+checked to see if exports the required symbol. If so, a "shared library" atom is
+instantiated by the by the reader which the linker uses to replace the
+"undefined" atom.
+Linking Steps
+Through the use of abstract Atoms, the core of linking is architecture
+independent and file format independent. All command line parsing is factored
+out into a separate "options" abstraction which enables the linker to be driven
+with different command line sets.
+The overall steps in linking are:
+ #. Command line processing
+ #. Parsing input files
+ #. Resolving
+ #. Passes/Optimizations
+ #. Generate output file
+The Resolving and Passes steps are done purely on the master graph of atoms, so
+they have no notion of file formats such as mach-o or ELF.
+Input Files
+Existing developer tools using different file formats for object files.
+A goal of lld is to be file format independent. This is done
+through a plug-in model for reading object files. The lld::Reader is the base
+class for all object file readers. A Reader follows the factory method pattern.
+A Reader instantiates an lld::File object (which is a graph of Atoms) from a
+given object file (on disk or in-memory).
+Every Reader subclass defines its own "options" class (for instance the mach-o
+Reader defines the class ReaderOptionsMachO). This options class is the
+one-and-only way to control how the Reader operates when parsing an input file
+into an Atom graph. For instance, you may want the Reader to only accept
+certain architectures. The options class can be instantiated from command
+line options, or it can be subclassed and the ivars programmatically set.
+The resolving step takes all the atoms' graphs from each object file and
+combines them into one master object graph. Unfortunately, it is not as simple
+as appending the atom list from each file into one big list. There are many
+cases where atoms need to be coalesced. That is, two or more atoms need to be
+coalesced into one atom. This is necessary to support: C language "tentative
+definitions", C++ weak symbols for templates and inlines defined in headers,
+replacing undefined atoms with actual definition atoms, and for merging copies
+of constants like c-strings and floating point constants.
+The linker support coalescing by-name and by-content. By-name is used for
+tentative definitions and weak symbols. By-content is used for constant data
+that can be merged.
+The resolving process maintains some global linking "state", including a "symbol
+table" which is a map from llvm::StringRef to lld::Atom*. With these data
+structures, the linker iterates all atoms in all input files. For each atom, it
+checks if the atom is named and has a global or hidden scope. If so, the atom
+is added to the symbol table map. If there already is a matching atom in that
+table, that means the current atom needs to be coalesced with the found atom, or
+it is a multiple definition error.
+When all initial input file atoms have been processed by the resolver, a scan is
+made to see if there are any undefined atoms in the graph. If there are, the
+linker scans all libraries (both static and dynamic) looking for definitions to
+replace the undefined atoms. It is an error if any undefined atoms are left
+Dead code stripping (if requested) is done at the end of resolving. The linker
+does a simple mark-and-sweep. It starts with "root" atoms (like "main" in a main
+executable) and follows each references and marks each Atom that it visits as
+"live". When done, all atoms not marked "live" are removed.
+The result of the Resolving phase is the creation of an lld::File object. The
+goal is that the lld::File model is **the** internal representation
+throughout the linker. The file readers parse (mach-o, ELF, COFF) into an
+lld::File. The file writers (mach-o, ELF, COFF) taken an lld::File and produce
+their file kind, and every Pass only operates on an lld::File. This is not only
+a simpler, consistent model, but it enables the state of the linker to be dumped
+at any point in the link for testing purposes.
+The Passes step is an open ended set of routines that each get a change to
+modify or enhance the current lld::File object. Some example Passes are:
+ * stub (PLT) generation
+ * GOT instantiation
+ * order_file optimization
+ * branch island generation
+ * branch shim generation
+ * Objective-C optimizations (Darwin specific)
+ * TLV instantiation (Darwin specific)
+ * DTrace probe processing (Darwin specific)
+ * compact unwind encoding (Darwin specific)
+Some of these passes are specific to Darwin's runtime environments. But many of
+the passes are applicable to any OS (such as generating branch island for out of
+range branch instructions).
+The general structure of a pass is to iterate through the atoms in the current
+lld::File object, inspecting each atom and doing something. For instance, the
+stub pass, looks for call sites to shared library atoms (e.g. call to printf).
+It then instantiates a "stub" atom (PLT entry) and a "lazy pointer" atom for
+each proxy atom needed, and these new atoms are added to the current lld::File
+object. Next, all the noted call sites to shared library atoms have their
+References altered to point to the stub atom instead of the shared library atom.
+Generate Output File
+Once the passes are done, the output file writer is given current lld::File
+object. The writer's job is to create the executable content file wrapper and
+place the content of the atoms into it.
+lld uses a plug-in model for writing output files. All concrete writers (e.g.
+ELF, mach-o, etc) are subclasses of the lld::Writer class.
+Unlike the Reader class which has just one method to instantiate an lld::File,
+the Writer class has multiple methods. The crucial method is to generate the
+output file, but there are also methods which allow the Writer to contribute
+Atoms to the resolver and specify passes to run.
+An example of contributing
+atoms is that if the Writer knows a main executable is being linked and such
+an executable requires a specially named entry point (e.g. "_main"), the Writer
+can add an UndefinedAtom with that special name to the resolver. This will
+cause the resolver to issue an error if that symbol is not defined.
+Sometimes a Writer supports lazily created symbols, such as names for the start
+of sections. To support this, the Writer can create a File object which vends
+no initial atoms, but does lazily supply atoms by name as needed.
+Every Writer subclass defines its own "options" class (for instance the mach-o
+Writer defines the class WriterOptionsMachO). This options class is the
+one-and-only way to control how the Writer operates when producing an output
+file from an Atom graph. For instance, you may want the Writer to optimize
+the output for certain OS versions, or strip local symbols, etc. The options
+class can be instantiated from command line options, or it can be subclassed
+and the ivars programmatically set.
+lld::File representations
+Just as LLVM has three representations of its IR model, lld has two
+representations of its File/Atom/Reference model:
+ * In memory, abstract C++ classes (lld::Atom, lld::Reference, and lld::File).
+ * textual (in YAML)
+Textual representations in YAML
+In designing a textual format we want something easy for humans to read and easy
+for the linker to parse. Since an atom has lots of attributes most of which are
+usually just the default, we should define default values for every attribute so
+that those can be omitted from the text representation. Here is the atoms for a
+simple hello world program expressed in YAML::
+ target-triple: x86_64-apple-darwin11
+ atoms:
+ - name: _main
+ scope: global
+ type: code
+ content: [ 55, 48, 89, e5, 48, 8d, 3d, 00, 00, 00, 00, 30, c0, e8, 00, 00,
+ 00, 00, 31, c0, 5d, c3 ]
+ fixups:
+ - offset: 07
+ kind: pcrel32
+ target: 2
+ - offset: 0E
+ kind: call32
+ target: _fprintf
+ - type: c-string
+ content: [ 73, 5A, 00 ]
+ ...
+The biggest use for the textual format will be writing test cases. Writing test
+cases in C is problematic because the compiler may vary its output over time for
+its own optimization reasons which my inadvertently disable or break the linker
+feature trying to be tested. By writing test cases in the linkers own textual
+format, we can exactly specify every attribute of every atom and thus target
+specific linker logic.
+The textual/YAML format follows the ReaderWriter patterns used in lld. The lld
+library comes with the classes: ReaderYAML and WriterYAML.
+The lld project contains a test suite which is being built up as new code is
+added to lld. All new lld functionality should have a tests added to the test
+suite. The test suite is `lit <http://llvm.org/cmds/lit.html/>`_ driven. Each
+test is a text file with comments telling lit how to run the test and check the
+result To facilitate testing, the lld project builds a tool called lld-core.
+This tool reads a YAML file (default from stdin), parses it into one or more
+lld::File objects in memory and then feeds those lld::File objects to the
+resolver phase.
+Resolver testing
+Basic testing is the "core linking" or resolving phase. That is where the
+linker merges object files. All test cases are written in YAML. One feature of
+YAML is that it allows multiple "documents" to be encoding in one YAML stream.
+That means one text file can appear to the linker as multiple .o files - the
+normal case for the linker.
+Here is a simple example of a core linking test case. It checks that an
+undefined atom from one file will be replaced by a definition from another
+ # RUN: lld-core %s | FileCheck %s
+ #
+ # Test that undefined atoms are replaced with defined atoms.
+ #
+ ---
+ atoms:
+ - name: foo
+ definition: undefined
+ ---
+ atoms:
+ - name: foo
+ scope: global
+ type: code
+ ...
+ # CHECK: name: foo
+ # CHECK: scope: global
+ # CHECK: type: code
+ # CHECK-NOT: name: foo
+ # CHECK: ...
+Passes testing
+Since Passes just operate on an lld::File object, the lld-core tool has the
+option to run a particular pass (after resolving). Thus, you can write a YAML
+test case with carefully crafted input to exercise areas of a Pass and the check
+the resulting lld::File object as represented in YAML.
+Design Issues
+There are a number of open issues in the design of lld. The plan is to wait and
+make these design decisions when we need to.
+Debug Info
+Currently, the lld model says nothing about debug info. But the most popular
+debug format is DWARF and there is some impedance mismatch with the lld model
+and DWARF. In lld there are just Atoms and only Atoms that need to be in a
+special section at runtime have an associated section. Also, Atoms do not have
+addresses. The way DWARF is spec'ed different parts of DWARF are supposed to go
+into specially named sections and the DWARF references function code by address.
+CPU and OS specific functionality
+Currently, lld has an abstract "Platform" that deals with any CPU or OS specific
+differences in linking. We just keep adding virtual methods to the base
+Platform class as we find linking areas that might need customization. At some
+point we'll need to structure this better.
+File Attributes
+Currently, lld::File just has a path and a way to iterate its atoms. We will
+need to add more attributes on a File. For example, some equivalent to the
+target triple. There is also a number of cached or computed attributes that
+could make various Passes more efficient. For instance, on Darwin there are a
+number of Objective-C optimizations that can be done by a Pass. But it would
+improve the plain C case if the Objective-C optimization Pass did not have to
+scan all atoms looking for any Objective-C data structures. This could be done
+if the lld::File object had an attribute that said if the file had any
+Objective-C data in it. The Resolving phase would then be required to "merge"
+that attribute as object files are added.
diff --git a/contrib/llvm/tools/lld/docs/development.rst b/contrib/llvm/tools/lld/docs/development.rst
new file mode 100644
index 000000000000..ce91341d665f
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/development.rst
@@ -0,0 +1,45 @@
+.. _development:
+Note: this document discuss Mach-O port of LLD. For ELF and COFF,
+see :doc:`index`.
+lld is developed as part of the `LLVM <http://llvm.org>`_ project.
+Creating a Reader
+See the :ref:`Creating a Reader <Readers>` guide.
+Modifying the Driver
+See :doc:`Driver`.
+You can run lld with ``-mllvm -debug`` command line options to enable debugging
+printouts. If you want to enable debug information for some specific pass, you
+can run it with ``-mllvm '-debug-only=<pass>'``, where pass is a name used in
+the ``DEBUG_WITH_TYPE()`` macro.
+The project documentation is written in reStructuredText and generated using the
+`Sphinx <http://sphinx.pocoo.org/>`_ documentation generator. For more
+information on writing documentation for the project, see the
+.. toctree::
+ :hidden:
+ Readers
+ Driver
diff --git a/contrib/llvm/tools/lld/docs/getting_started.rst b/contrib/llvm/tools/lld/docs/getting_started.rst
new file mode 100644
index 000000000000..97c3d1bccbda
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/getting_started.rst
@@ -0,0 +1,106 @@
+.. _getting_started:
+Getting Started: Building and Running lld
+This page gives you the shortest path to checking out and building lld. If you
+run into problems, please file bugs in the `LLVM Bugzilla`__
+__ http://llvm.org/bugs/
+Building lld
+On Unix-like Systems
+1. Get the required tools.
+ * `CMake 2.8`_\+.
+ * make (or any build system CMake supports).
+ * `Clang 3.1`_\+ or GCC 4.7+ (C++11 support is required).
+ * If using Clang, you will also need `libc++`_.
+ * `Python 2.4`_\+ (not 3.x) for running tests.
+.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html
+.. _Clang 3.1: http://clang.llvm.org/
+.. _libc++: http://libcxx.llvm.org/
+.. _Python 2.4: http://python.org/download/
+2. Check out LLVM::
+ $ cd path/to/llvm-project
+ $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
+3. Check out lld::
+ $ cd llvm/tools
+ $ svn co http://llvm.org/svn/llvm-project/lld/trunk lld
+ * lld can also be checked out to ``path/to/llvm-project`` and built as an external
+ project.
+4. Build LLVM and lld::
+ $ cd path/to/llvm-build/llvm (out of source build required)
+ $ cmake -G "Unix Makefiles" path/to/llvm-project/llvm
+ $ make
+ * If you want to build with clang and it is not the default compiler or
+ it is installed in an alternate location, you'll need to tell the cmake tool
+ the location of the C and C++ compiler via CMAKE_C_COMPILER and
+ CMAKE_CXX_COMPILER. For example::
+ $ cmake -DCMAKE_CXX_COMPILER=/path/to/clang++ -DCMAKE_C_COMPILER=/path/to/clang ...
+5. Test::
+ $ make check-lld
+Using Visual Studio
+#. Get the required tools.
+ * `CMake 2.8`_\+.
+ * `Visual Studio 12 (2013) or later`_ (required for C++11 support)
+ * `Python 2.4`_\+ (not 3.x) for running tests.
+.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html
+.. _Visual Studio 12 (2013) or later: http://www.microsoft.com/visualstudio/11/en-us
+.. _Python 2.4: http://python.org/download/
+#. Check out LLVM::
+ $ cd path/to/llvm-project
+ $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
+#. Check out lld::
+ $ cd llvm/tools
+ $ svn co http://llvm.org/svn/llvm-project/lld/trunk lld
+ * lld can also be checked out to ``path/to/llvm-project`` and built as an external
+ project.
+#. Generate Visual Studio project files::
+ $ cd path/to/llvm-build/llvm (out of source build required)
+ $ cmake -G "Visual Studio 11" path/to/llvm-project/llvm
+#. Build
+ * Open LLVM.sln in Visual Studio.
+ * Build the ``ALL_BUILD`` target.
+#. Test
+ * Build the ``lld-test`` target.
+More Information
+For more information on using CMake see the `LLVM CMake guide`_.
+.. _LLVM CMake guide: http://llvm.org/docs/CMake.html
diff --git a/contrib/llvm/tools/lld/docs/hello.png b/contrib/llvm/tools/lld/docs/hello.png
new file mode 100644
index 000000000000..70df111f1abd
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/hello.png
Binary files differ
diff --git a/contrib/llvm/tools/lld/docs/index.rst b/contrib/llvm/tools/lld/docs/index.rst
new file mode 100644
index 000000000000..2564e9b6310f
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/index.rst
@@ -0,0 +1,177 @@
+LLD - The LLVM Linker
+LLD is a linker from the LLVM project that is a drop-in replacement
+for system linkers and runs much faster than them. It also provides
+features that are useful for toolchain developers.
+The linker supports ELF (Unix), PE/COFF (Windows), Mach-O (macOS) and
+WebAssembly in descending order of completeness. Internally, LLD consists of
+several different linkers. The ELF port is the one that will be described in
+this document. The PE/COFF port is complete, including
+Windows debug info (PDB) support. The WebAssembly port is still a work in
+progress (See :doc:`WebAssembly`). The Mach-O port is built based on a
+different architecture than the others. For the details about Mach-O, please
+read :doc:`AtomLLD`.
+- LLD is a drop-in replacement for the GNU linkers that accepts the
+ same command line arguments and linker scripts as GNU.
+ We are currently working closely with the FreeBSD project to make
+ LLD default system linker in future versions of the operating
+ system, so we are serious about addressing compatibility issues. As
+ of February 2017, LLD is able to link the entire FreeBSD/amd64 base
+ system including the kernel. With a few work-in-progress patches it
+ can link approximately 95% of the ports collection on AMD64. For the
+ details, see `FreeBSD quarterly status report
+ <https://www.freebsd.org/news/status/report-2016-10-2016-12.html#Using-LLVM%27s-LLD-Linker-as-FreeBSD%27s-System-Linker>`_.
+- LLD is very fast. When you link a large program on a multicore
+ machine, you can expect that LLD runs more than twice as fast as the GNU
+ gold linker. Your milage may vary, though.
+- It supports various CPUs/ABIs including x86-64, x86, x32, AArch64,
+ ARM, MIPS 32/64 big/little-endian, PowerPC, PowerPC 64 and AMDGPU.
+ Among these, x86-64, AArch64, and ARM (>= v6) are production quality.
+ MIPS seems decent too. x86 should be OK but is not well tested yet.
+- It is always a cross-linker, meaning that it always supports all the
+ above targets however it was built. In fact, we don't provide a
+ build-time option to enable/disable each target. This should make it
+ easy to use our linker as part of a cross-compile toolchain.
+- You can embed LLD in your program to eliminate dependencies on
+ external linkers. All you have to do is to construct object files
+ and command line arguments just like you would do to invoke an
+ external linker and then call the linker's main function,
+ ``lld::elf::link``, from your code.
+- It is small. We are using LLVM libObject library to read from object
+ files, so it is not a completely fair comparison, but as of February
+ 2017, LLD/ELF consists only of 21k lines of C++ code while GNU gold
+ consists of 198k lines of C++ code.
+- Link-time optimization (LTO) is supported by default. Essentially,
+ all you have to do to do LTO is to pass the ``-flto`` option to clang.
+ Then clang creates object files not in the native object file format
+ but in LLVM bitcode format. LLD reads bitcode object files, compile
+ them using LLVM and emit an output file. Because in this way LLD can
+ see the entire program, it can do the whole program optimization.
+- Some very old features for ancient Unix systems (pre-90s or even
+ before that) have been removed. Some default settings have been
+ tuned for the 21st century. For example, the stack is marked as
+ non-executable by default to tighten security.
+This is a link time comparison on a 2-socket 20-core 40-thread Xeon
+E5-2680 2.80 GHz machine with an SSD drive. We ran gold and lld with
+or without multi-threading support. To disable multi-threading, we
+added ``-no-threads`` to the command lines.
+============ =========== ============ ==================== ================== =============== =============
+Program Output size GNU ld GNU gold w/o threads GNU gold w/threads lld w/o threads lld w/threads
+ffmpeg dbg 92 MiB 1.72s 1.16s 1.01s 0.60s 0.35s
+mysqld dbg 154 MiB 8.50s 2.96s 2.68s 1.06s 0.68s
+clang dbg 1.67 GiB 104.03s 34.18s 23.49s 14.82s 5.28s
+chromium dbg 1.14 GiB 209.05s [1]_ 64.70s 60.82s 27.60s 16.70s
+============ =========== ============ ==================== ================== =============== =============
+As you can see, lld is significantly faster than GNU linkers.
+Note that this is just a benchmark result of our environment.
+Depending on number of available cores, available amount of memory or
+disk latency/throughput, your results may vary.
+.. [1] Since GNU ld doesn't support the ``-icf=all`` and
+ ``-gdb-index`` options, we removed them from the command line
+ for GNU ld. GNU ld would have been slower than this if it had
+ these options.
+If you have already checked out LLVM using SVN, you can check out LLD
+under ``tools`` directory just like you probably did for clang. For the
+details, see `Getting Started with the LLVM System
+If you haven't checked out LLVM, the easiest way to build LLD is to
+check out the entire LLVM projects/sub-projects from a git mirror and
+build that tree. You need `cmake` and of course a C++ compiler.
+.. code-block:: console
+ $ git clone https://github.com/llvm-project/llvm-project-20170507 llvm-project
+ $ mkdir build
+ $ cd build
+ $ cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=lld -DCMAKE_INSTALL_PREFIX=/usr/local ../llvm-project/llvm
+ $ make install
+Using LLD
+LLD is installed as ``ld.lld``. On Unix, linkers are invoked by
+compiler drivers, so you are not expected to use that command
+directly. There are a few ways to tell compiler drivers to use ld.lld
+instead of the default linker.
+The easiest way to do that is to overwrite the default linker. After
+installing LLD to somewhere on your disk, you can create a symbolic
+link by doing ``ln -s /path/to/ld.lld /usr/bin/ld`` so that
+``/usr/bin/ld`` is resolved to LLD.
+If you don't want to change the system setting, you can use clang's
+``-fuse-ld`` option. In this way, you want to set ``-fuse-ld=lld`` to
+LDFLAGS when building your programs.
+LLD leaves its name and version number to a ``.comment`` section in an
+output. If you are in doubt whether you are successfully using LLD or
+not, run ``readelf --string-dump .comment <output-file>`` and examine the
+output. If the string "Linker: LLD" is included in the output, you are
+using LLD.
+Here is a brief project history of the ELF and COFF ports.
+- May 2015: We decided to rewrite the COFF linker and did that.
+ Noticed that the new linker is much faster than the MSVC linker.
+- July 2015: The new ELF port was developed based on the COFF linker
+ architecture.
+- September 2015: The first patches to support MIPS and AArch64 landed.
+- October 2015: Succeeded to self-host the ELF port. We have noticed
+ that the linker was faster than the GNU linkers, but we weren't sure
+ at the time if we would be able to keep the gap as we would add more
+ features to the linker.
+- July 2016: Started working on improving the linker script support.
+- December 2016: Succeeded to build the entire FreeBSD base system
+ including the kernel. We had widen the performance gap against the
+ GNU linkers.
+For the internals of the linker, please read :doc:`NewLLD`. It is a bit
+outdated but the fundamental concepts remain valid. We'll update the
+document soon.
+.. toctree::
+ :maxdepth: 1
+ NewLLD
+ AtomLLD
+ WebAssembly
+ windows_support
+ missingkeyfunction
+ ReleaseNotes
diff --git a/contrib/llvm/tools/lld/docs/ld.lld.1 b/contrib/llvm/tools/lld/docs/ld.lld.1
new file mode 100644
index 000000000000..04bf19d6b23c
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/ld.lld.1
@@ -0,0 +1,626 @@
+.\" This file is distributed under the University of Illinois Open Source
+.\" License. See LICENSE.TXT for details.
+.\" This man page documents only lld's ELF linking support, obtained originally
+.\" from FreeBSD.
+.Dd September 26, 2018
+.Dt LD.LLD 1
+.Nm ld.lld
+.Nd ELF linker from the LLVM project
+.Nm ld.lld
+.Op Ar options
+.Ar objfile ...
+A linker takes one or more object, archive, and library files, and combines
+them into an output file (an executable, a shared library, or another object
+It relocates code and data from the input files and resolves symbol
+references between them.
+is a drop-in replacement for the GNU BFD and gold linkers.
+It accepts most of the same command line arguments and linker scripts
+as GNU linkers.
+currently supports i386, x86-64, ARM, AArch64, PowerPC32, PowerPC64,
+MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets.
+acts as a Microsoft link.exe-compatible linker if invoked as
+.Nm lld-link
+and as macOS's ld if invoked as
+.Nm ld.ld64.
+All these targets are always supported however
+was built, so you can always use
+as a native linker as well as a cross linker.
+Many options have both a single-letter and long form.
+When using the long form options other than those beginning with the
+.Cm o
+may be specified using either one or two dashes preceding the option name.
+Long options beginning with
+.Cm o
+require two dashes to avoid confusion with the
+.Fl o Ar path
+.Bl -tag -width indent
+.It Fl -allow-multiple-definition
+Do not error if a symbol is defined multiple times.
+The first definition will be used.
+.It Fl -apply-dynamic-relocs
+Apply link-time values for dynamic relocations.
+.It Fl -as-needed
+Only set
+for shared libraries if used.
+.It Fl -auxiliary Ns = Ns Ar value
+Set the
+field to the specified name.
+.It Fl -Bdynamic , Fl -dy
+Link against shared libraries.
+.It Fl -Bstatic , Fl -static , Fl -dn
+Do not link against shared libraries.
+.It Fl -Bsymbolic
+Bind defined symbols locally.
+.It Fl -Bsymbolic-functions
+Bind defined function symbols locally.
+.It Fl -build-id Ns = Ns Ar value
+Generate a build ID note.
+.Ar value
+may be one of
+.Cm fast ,
+.Cm md5 ,
+.Cm sha1 ,
+.Cm tree ,
+.Cm uuid ,
+.Cm 0x Ns Ar hex-string ,
+.Cm none .
+.Cm tree
+is an alias for
+.Cm sha1 .
+Build-IDs of type
+.Cm fast ,
+.Cm md5 ,
+.Cm sha1 ,
+.Cm tree
+are calculated from the object contents.
+.Cm fast
+is not intended to be cryptographically secure.
+.It Fl -build-id
+Synonym for
+.Fl -build-id Ns = Ns Cm fast .
+.It Fl -color-diagnostics Ns = Ns Ar value
+Use colors in diagnostics.
+.Ar value
+may be one of
+.Cm always ,
+.Cm auto ,
+.Cm never .
+.Cm auto
+enables color if and only if output is to a terminal.
+.It Fl -color-diagnostics
+Alias for
+.Fl -color-diagnostics Ns = Ns Cm auto .
+.It Fl -compress-debug-sections Ns = Ns Ar value
+Compress DWARF debug sections.
+.Ar value
+may be
+.Cm none
+.Cm zlib .
+.It Fl -cref
+Output cross reference table.
+.It Fl -define-common , Fl d
+Assign space to common symbols.
+.It Fl -defsym Ns = Ns Ar symbol Ns = Ns Ar expression
+Define a symbol alias.
+.Ar expression
+may be another symbol or a linker script expression.
+For example,
+.Ql --defsym=foo=bar
+.Ql --defsym=foo=bar+0x100 .
+.It Fl -demangle
+Demangle symbol names.
+.It Fl -disable-new-dtags
+Disable new dynamic tags.
+.It Fl -discard-all , Fl x
+Delete all local symbols.
+.It Fl -discard-locals , Fl X
+Delete temporary local symbols.
+.It Fl -discard-none
+Keep all symbols in the symbol table.
+.It Fl -dynamic-linker Ns = Ns Ar value
+Specify the dynamic linker to be used for a dynamically linked executable.
+This is recorded in an ELF segment of type
+.It Fl -dynamic-list Ns = Ns Ar file
+Read a list of dynamic symbols from
+.Ar file .
+.It Fl -eh-frame-hdr
+Request creation of
+.Li .eh_frame_hdr
+section and
+segment header.
+.It Fl -emit-relocs , Fl q
+Generate relocations in the output.
+.It Fl -enable-new-dtags
+Enable new dynamic tags.
+.It Fl -end-lib
+End a grouping of objects that should be treated as if they were together
+in an archive.
+.It Fl -entry Ns = Ns Ar entry
+Name of entry point symbol.
+.It Fl -error-limit Ns = Ns Ar value
+Maximum number of errors to emit before stopping.
+A value of zero indicates that there is no limit.
+.It Fl -error-unresolved-symbols
+Report unresolved symbols as errors.
+.It Fl -execute-only
+Mark executable sections unreadable. This option is currently only
+supported on AArch64.
+.It Fl -exclude-libs Ns = Ns Ar value
+Exclude static libraries from automatic export.
+.It Fl -export-dynamic , Fl E
+Put symbols in the dynamic symbol table.
+.It Fl -export-dynamic-symbol Ns = Ns Ar symbol
+.Ar symbol
+in the dynamic symbol table.
+.It Fl -fatal-warnings
+Treat warnings as errors.
+.It Fl -filter Ns = Ns Ar value , Fl F Ar value
+Set the
+field to the specified value.
+.It Fl -fini Ns = Ns Ar symbol
+Specify a finalizer function.
+.It Fl -format Ns = Ns Ar input-format , Fl b Ar input-format
+Specify the format of the inputs following this option.
+.Ar input-format
+may be one of
+.Cm binary ,
+.Cm elf ,
+.Cm default .
+.Cm default
+is a synonym for
+.Cm elf .
+.It Fl -gc-sections
+Enable garbage collection of unused sections.
+.It Fl -gdb-index
+.Li .gdb_index
+.It Fl -hash-style Ns = Ns Ar value
+Specify hash style.
+.Ar value
+may be
+.Cm sysv ,
+.Cm gnu ,
+.Cm both .
+.Cm both
+is the default.
+.It Fl -help
+Print a help message.
+.It Fl -icf Ns = Ns Cm all
+Enable identical code folding.
+.It Fl -icf Ns = Ns Cm safe
+Enable safe identical code folding.
+.It Fl -icf Ns = Ns Cm none
+Disable identical code folding.
+.It Fl -image-base Ns = Ns Ar value
+Set the base address to
+.Ar value .
+.It Fl -init Ns = Ns Ar symbol
+Specify an initializer function.
+.It Fl -keep-unique Ns = Ns Ar symbol
+Do not fold
+.Ar symbol
+during ICF.
+.It Fl l Ar libName, Fl -library Ns = Ns Ar libName
+Root name of library to use.
+.It Fl L Ar dir , Fl -library-path Ns = Ns Ar dir
+Add a directory to the library search path.
+.It Fl -lto-aa-pipeline Ns = Ns Ar value
+AA pipeline to run during LTO.
+Used in conjunction with
+.Fl -lto-newpm-passes .
+.It Fl -lto-newpm-passes Ns = Ns Ar value
+Passes to run during LTO.
+.It Fl -lto-O Ns Ar opt-level
+Optimization level for LTO.
+.It Fl -lto-partitions Ns = Ns Ar value
+Number of LTO codegen partitions.
+.It Fl m Ar value
+Set target emulation.
+.It Fl -Map Ns = Ns Ar file , Fl M Ar file
+Print a link map to
+.Ar file .
+.It Fl -no-as-needed
+Always set
+for shared libraries.
+.It Fl -no-color-diagnostics
+Do not use colors in diagnostics.
+.It Fl -no-define-common
+Do not assign space to common symbols.
+.It Fl -no-demangle
+Do not demangle symbol names.
+.It Fl -no-dynamic-linker
+Inhibit output of an
+.Li .interp
+.It Fl -no-gc-sections
+Disable garbage collection of unused sections.
+.It Fl -no-gnu-unique
+Disable STB_GNU_UNIQUE symbol binding.
+.It Fl -no-rosegment
+Do not put read-only non-executable sections in their own segment.
+.It Fl -no-threads
+Do not run the linker multi-threaded.
+.It Fl -no-undefined-version
+Report version scripts that refer undefined symbols.
+.It Fl -no-undefined
+Report unresolved symbols even if the linker is creating a shared library.
+.It Fl -no-whole-archive
+Restores the default behavior of loading archive members.
+.It Fl -no-pie
+Do not create a position independent executable.
+.It Fl -noinhibit-exec
+Retain the executable output file whenever it is still usable.
+.It Fl -nostdlib
+Only search directories specified on the command line.
+.It Fl o Ar path
+Write the output executable, library, or object to
+.Ar path .
+If not specified,
+.Dv a.out
+is used as a default.
+.It Fl O Ns Ar value
+Optimize output file size.
+.Ar value
+may be:
+.Bl -tag -width 2n -compact
+.It Cm 0
+Disable string merging.
+.It Cm 1
+Enable string merging.
+.It Cm 2
+Enable string tail merging.
+.Fl O Ns Cm 1
+is the default.
+.It Fl -oformat Ns = Ns Ar format
+Specify the format for the output object file.
+The only supported
+.Ar format
+.Cm binary ,
+which produces output with no ELF header.
+.It Fl -omagic , Fl N
+Set the text and data sections to be readable and writable.
+.It Fl -opt-remarks-filename Ar file
+Write optimization remarks in YAML format to
+.Ar file .
+.It Fl -opt-remarks-with-hotness
+Include hotness information in the optimization remarks file.
+.It Fl -pic-veneer
+Always generate position independent thunks.
+.It Fl -pie
+Create a position independent executable.
+.It Fl -print-gc-sections
+List removed unused sections.
+.It Fl -print-icf-sections
+List identical folded sections.
+.It Fl -print-map
+Print a link map to the standard output.
+.It Fl -push-state
+Save the current state of
+.Fl -as-needed ,
+.Fl -static ,
+.Fl -whole-archive.
+.It Fl -pop-state
+Undo the effect of
+.Fl -push-state.
+.It Fl -relocatable , Fl r
+Create relocatable object file.
+.It Fl -reproduce Ns = Ns Ar value
+Dump linker invocation and input files for debugging.
+.It Fl -retain-symbols-file Ns = Ns Ar file
+Retain only the symbols listed in the file.
+.It Fl -rpath Ns = Ns Ar value , Fl R Ar value
+Add a
+to the output.
+.It Fl -rsp-quoting Ns = Ns Ar value
+Quoting style for response files.
+The supported values are
+.Cm windows
+.Cm posix .
+.It Fl -script Ns = Ns Ar file , Fl T Ar file
+Read linker script from
+.Ar file .
+.It Fl -section-start Ns = Ns Ar section Ns = Ns Ar address
+Set address of section.
+.It Fl -shared , Fl -Bsharable
+Build a shared object.
+.It Fl -soname Ns = Ns Ar value , Fl h Ar value
+.Ar value .
+.It Fl -sort-section Ns = Ns Ar value
+Specifies sections sorting rule when linkerscript is used.
+.It Fl -start-lib
+Start a grouping of objects that should be treated as if they were together
+in an archive.
+.It Fl -strip-all , Fl s
+Strip all symbols.
+.It Fl -strip-debug , Fl S
+Strip debugging information.
+.It Fl -symbol-ordering-file Ns = Ns Ar file
+Lay out sections in the order specified by
+.Ar file .
+.It Fl -sysroot Ns = Ns Ar value
+Set the system root.
+.It Fl -target1-abs
+.Dv R_ARM_ABS32 .
+.It Fl -target1-rel
+.Dv R_ARM_REL32 .
+.It Fl -target2 Ns = Ns Ar type
+.Ar type ,
+.Ar type
+is one of
+.Cm rel ,
+.Cm abs ,
+.Cm got-rel .
+.It Fl -Tbss Ns = Ns Ar value
+Same as
+.Fl -section-start
+.Li .bss
+as the sectionname.
+.It Fl -Tdata Ns = Ns Ar value
+Same as
+.Fl -section-start
+.Li .data
+as the sectionname.
+.It Fl -Ttext Ns = Ns Ar value
+Same as
+.Fl -section-start
+.Li .text
+as the sectionname.
+.It Fl -thinlto-cache-dir Ns = Ns Ar value
+Path to ThinLTO cached object file directory.
+.It Fl -thinlto-cache-policy Ns = Ns Ar value
+Pruning policy for the ThinLTO cache.
+.It Fl -thinlto-jobs Ns = Ns Ar value
+Number of ThinLTO jobs.
+.It Fl -threads
+Run the linker multi-threaded.
+This option is enabled by default.
+.It Fl -trace
+Print the names of the input files.
+.It Fl -trace-symbol Ns = Ns Ar symbol , Fl y Ar symbol
+Trace references to
+.Ar symbol .
+.It Fl -undefined Ns = Ns Ar symbol , Fl u Ar symbol
+.Ar symbol
+to be an undefined symbol during linking.
+.It Fl -unresolved-symbols Ns = Ns Ar value
+Determine how to handle unresolved symbols.
+.It Fl v
+Display the version number and proceed with linking if object files are
+.It Fl V , Fl -version
+Display the version number and exit.
+.It Fl -verbose
+Verbose mode.
+.It Fl -version-script Ns = Ns Ar file
+Read version script from
+.Ar file .
+.It Fl -warn-backrefs
+Warn about reverse or cyclic dependencies to or between static archives.
+This can be used to ensure linker invocation remains compatible with
+traditional Unix-like linkers.
+.It Fl -warn-common
+Warn about duplicate common symbols.
+.It Fl -warn-ifunc-textrel
+Warn about using ifunc symbols in conjunction with text relocations.
+Older versions of glibc library (2.28 and earlier) has a bug that causes
+the segment that includes ifunc symbols to be marked as not executable when
+they are relocated. As a result, although the program compiles and links
+successfully, it gives segmentation fault when the instruction pointer reaches
+an ifunc symbol. Use -warn-ifunc-textrel to let lld give a warning, if the
+code may include ifunc symbols, may do text relocations and be linked with
+an older glibc version. Otherwise, there is no need to use it, as the default
+value does not give a warning. This flag has been introduced in late 2018,
+has no counter part in ld and gold linkers, and may be removed in the future.
+.It Fl -warn-unresolved-symbols
+Report unresolved symbols as warnings.
+.It Fl -whole-archive
+Force load of all members in a static library.
+.It Fl -wrap Ns = Ns Ar symbol
+Use wrapper functions for symbol.
+.It Fl z Ar option
+Linker option extensions.
+.Bl -tag -width indent
+.It Cm execstack
+Make the main stack executable.
+Stack permissions are recorded in the
+.It Cm global
+Sets the
+.Dv DF_1_GLOBAL flag in the
+Different loaders can decide how to handle this flag on their own.
+.It Cm ifunc-noplt
+Do not emit PLT entries for GNU ifuncs.
+Instead, preserve relocations for ifunc call sites so that they may
+be applied by a run-time loader.
+Note that this feature requires special loader support and will
+generally result in application crashes when used outside of freestanding
+.It Cm initfirst
+Sets the
+flag to indicate the module should be initialized first.
+.It Cm interpose
+Set the
+flag to indicate to the runtime linker that the object is an interposer.
+During symbol resolution interposers are searched after the application
+but before other dependencies.
+.It Cm muldefs
+Do not error if a symbol is defined multiple times.
+The first definition will be used.
+This is a synonym for
+.Fl -allow-multiple-definition.
+.It Cm nocombreloc
+Disable combining and sorting multiple relocation sections.
+.It Cm nocopyreloc
+Disable the creation of copy relocations.
+.It Cm nodefaultlib
+Set the
+flag to indicate that default library search paths should be ignored.
+.It Cm nodelete
+Set the
+flag to indicate that the object cannot be unloaded from a process.
+.It Cm nodlopen
+Set the
+flag to indicate that the object may not be opened by
+.Xr dlopen 3 .
+.It Cm norelro
+Do not indicate that portions of the object shold be mapped read-only
+after initial relocation processing.
+The object will omit the
+.It Cm notext
+Allow relocations against read-only segments.
+Sets the
+.Dv DT_TEXTREL flag in the
+.It Cm now
+Set the
+flag to indicate that the run-time loader should perform all relocation
+processing as part of object initialization.
+By default relocations may be performed on demand.
+.It Cm origin
+Set the
+flag to indicate that the object requires
+.It Cm retpolineplt
+Emit retpoline format PLT entries as a mitigation for CVE-2017-5715.
+.It Cm rodynamic
+Make the
+.Li .dynamic
+section read-only.
+tag will not be emitted.
+.It Cm stack-size Ns = Ns Ar size
+Set the main thread's stack size to
+.Ar size .
+The stack size is recorded as the size of the
+.Ar size .
+program segment.
+.It Cm text
+Do not allow relocations against read-only segments.
+This is the default.
+.It Cm wxneeded
+Create a
+.Nm Ap s
+handing of archive files (those with a
+.Pa .a
+file extension) is different from traditional linkers used on Unix-like
+Traditional linkers maintain a set of undefined symbols during linking.
+The linker processes each file in the order in which it appears on the
+command line, until the set of undefined symbols becomes empty.
+An object file is linked into the output object when it is encountered,
+with its undefined symbols added to the set.
+Upon encountering an archive file a traditional linker searches the objects
+contained therein, and processes those that satisfy symbols in the unresolved
+Handling mutually dependent archives may be awkward when using a traditional
+Archive files may have to be specified multiple times, or the special command
+line options
+.Fl -start-group
+.Fl -end-group
+may be used to have the linker loop over the files in the group until no new
+symbols are added to the set.
+records all symbols found in objects and archives as it iterates over
+command line arguments.
+encounters an undefined symbol that can be resolved by an object file
+contained in a previously processed archive file, it immediately extracts
+and links it into the output object.
+With certain archive inputs
+may produce different results compared to traditional linkers.
+In practice, large bodies of third party software have been linked with
+without material issues.
+.Fl -warn-backrefs
+option may be used to identify a linker invocation that may be incompatible
+with traditional Unix-like linker behavior.
diff --git a/contrib/llvm/tools/lld/docs/llvm-theme/layout.html b/contrib/llvm/tools/lld/docs/llvm-theme/layout.html
new file mode 100644
index 000000000000..0cd0918eac2a
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/llvm-theme/layout.html
@@ -0,0 +1,22 @@
+ sphinxdoc/layout.html
+ ~~~~~~~~~~~~~~~~~~~~~
+ Sphinx layout template for the sphinxdoc theme.
+ :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+{% extends "basic/layout.html" %}
+{% block relbar1 %}
+<div class="logo">
+<a href="{{ pathto('index') }}"><img src="{{
+pathto("_static/logo.png", 1) }}" alt="LLVM Documentation"/></a>
+{{ super() }}
+{% endblock %}
+{# put the sidebar before the body #}
+{% block sidebar1 %}{{ sidebar() }}{% endblock %}
+{% block sidebar2 %}{% endblock %}
diff --git a/contrib/llvm/tools/lld/docs/llvm-theme/static/contents.png b/contrib/llvm/tools/lld/docs/llvm-theme/static/contents.png
new file mode 100644
index 000000000000..7fb82154a174
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/llvm-theme/static/contents.png
Binary files differ
diff --git a/contrib/llvm/tools/lld/docs/llvm-theme/static/llvm.css b/contrib/llvm/tools/lld/docs/llvm-theme/static/llvm.css
new file mode 100644
index 000000000000..32802bb6a2d0
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/llvm-theme/static/llvm.css
@@ -0,0 +1,345 @@
+ * sphinxdoc.css_t
+ * ~~~~~~~~~~~~~~~
+ *
+ * Sphinx stylesheet -- sphinxdoc theme. Originally created by
+ * Armin Ronacher for Werkzeug.
+ *
+ * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+@import url("basic.css");
+/* -- page layout ----------------------------------------------------------- */
+body {
+ font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
+ 'Verdana', sans-serif;
+ font-size: 14px;
+ letter-spacing: -0.01em;
+ line-height: 150%;
+ text-align: center;
+ background-color: #BFD1D4;
+ color: black;
+ padding: 0;
+ border: 1px solid #aaa;
+ margin: 0px 80px 0px 80px;
+ min-width: 740px;
+div.logo {
+ background-color: white;
+ text-align: left;
+ padding: 10px 10px 15px 15px;
+div.document {
+ background-color: white;
+ text-align: left;
+ background-image: url(contents.png);
+ background-repeat: repeat-x;
+div.bodywrapper {
+ margin: 0 240px 0 0;
+ border-right: 1px solid #ccc;
+div.body {
+ margin: 0;
+ padding: 0.5em 20px 20px 20px;
+div.related {
+ font-size: 1em;
+div.related ul {
+ background-image: url(navigation.png);
+ height: 2em;
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+div.related ul li {
+ margin: 0;
+ padding: 0;
+ height: 2em;
+ float: left;
+div.related ul li.right {
+ float: right;
+ margin-right: 5px;
+div.related ul li a {
+ margin: 0;
+ padding: 0 5px 0 5px;
+ line-height: 1.75em;
+ color: #EE9816;
+div.related ul li a:hover {
+ color: #3CA8E7;
+div.sphinxsidebarwrapper {
+ padding: 0;
+div.sphinxsidebar {
+ margin: 0;
+ padding: 0.5em 15px 15px 0;
+ width: 210px;
+ float: right;
+ font-size: 1em;
+ text-align: left;
+div.sphinxsidebar h3, div.sphinxsidebar h4 {
+ margin: 1em 0 0.5em 0;
+ font-size: 1em;
+ padding: 0.1em 0 0.1em 0.5em;
+ color: white;
+ border: 1px solid #86989B;
+ background-color: #AFC1C4;
+div.sphinxsidebar h3 a {
+ color: white;
+div.sphinxsidebar ul {
+ padding-left: 1.5em;
+ margin-top: 7px;
+ padding: 0;
+ line-height: 130%;
+div.sphinxsidebar ul ul {
+ margin-left: 20px;
+div.footer {
+ background-color: #E3EFF1;
+ color: #86989B;
+ padding: 3px 8px 3px 0;
+ clear: both;
+ font-size: 0.8em;
+ text-align: right;
+div.footer a {
+ color: #86989B;
+ text-decoration: underline;
+/* -- body styles ----------------------------------------------------------- */
+p {
+ margin: 0.8em 0 0.5em 0;
+a {
+ color: #CA7900;
+ text-decoration: none;
+a:hover {
+ color: #2491CF;
+div.body a {
+ text-decoration: underline;
+h1 {
+ margin: 0;
+ padding: 0.7em 0 0.3em 0;
+ font-size: 1.5em;
+ color: #11557C;
+h2 {
+ margin: 1.3em 0 0.2em 0;
+ font-size: 1.35em;
+ padding: 0;
+h3 {
+ margin: 1em 0 -0.3em 0;
+ font-size: 1.2em;
+div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a {
+ color: black!important;
+h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor {
+ display: none;
+ margin: 0 0 0 0.3em;
+ padding: 0 0.2em 0 0.2em;
+ color: #aaa!important;
+h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor,
+h5:hover a.anchor, h6:hover a.anchor {
+ display: inline;
+h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover,
+h5 a.anchor:hover, h6 a.anchor:hover {
+ color: #777;
+ background-color: #eee;
+a.headerlink {
+ color: #c60f0f!important;
+ font-size: 1em;
+ margin-left: 6px;
+ padding: 0 4px 0 4px;
+ text-decoration: none!important;
+a.headerlink:hover {
+ background-color: #ccc;
+ color: white!important;
+cite, code, tt {
+ font-family: 'Consolas', 'Deja Vu Sans Mono',
+ 'Bitstream Vera Sans Mono', monospace;
+ font-size: 0.95em;
+ letter-spacing: 0.01em;
+tt {
+ background-color: #f2f2f2;
+ border-bottom: 1px solid #ddd;
+ color: #333;
+tt.descname, tt.descclassname, tt.xref {
+ border: 0;
+hr {
+ border: 1px solid #abc;
+ margin: 2em;
+a tt {
+ border: 0;
+ color: #CA7900;
+a tt:hover {
+ color: #2491CF;
+pre {
+ font-family: 'Consolas', 'Deja Vu Sans Mono',
+ 'Bitstream Vera Sans Mono', monospace;
+ font-size: 0.95em;
+ letter-spacing: 0.015em;
+ line-height: 120%;
+ padding: 0.5em;
+ border: 1px solid #ccc;
+ background-color: #f8f8f8;
+pre a {
+ color: inherit;
+ text-decoration: underline;
+td.linenos pre {
+ padding: 0.5em 0;
+div.quotebar {
+ background-color: #f8f8f8;
+ max-width: 250px;
+ float: right;
+ padding: 2px 7px;
+ border: 1px solid #ccc;
+div.topic {
+ background-color: #f8f8f8;
+table {
+ border-collapse: collapse;
+ margin: 0 -0.5em 0 -0.5em;
+table td, table th {
+ padding: 0.2em 0.5em 0.2em 0.5em;
+div.admonition, div.warning {
+ font-size: 0.9em;
+ margin: 1em 0 1em 0;
+ border: 1px solid #86989B;
+ background-color: #f7f7f7;
+ padding: 0;
+div.admonition p, div.warning p {
+ margin: 0.5em 1em 0.5em 1em;
+ padding: 0;
+div.admonition pre, div.warning pre {
+ margin: 0.4em 1em 0.4em 1em;
+div.admonition p.admonition-title,
+div.warning p.admonition-title {
+ margin: 0;
+ padding: 0.1em 0 0.1em 0.5em;
+ color: white;
+ border-bottom: 1px solid #86989B;
+ font-weight: bold;
+ background-color: #AFC1C4;
+div.warning {
+ border: 1px solid #940000;
+div.warning p.admonition-title {
+ background-color: #CF0000;
+ border-bottom-color: #940000;
+div.admonition ul, div.admonition ol,
+div.warning ul, div.warning ol {
+ margin: 0.1em 0.5em 0.5em 3em;
+ padding: 0;
+div.versioninfo {
+ margin: 1em 0 0 0;
+ border: 1px solid #ccc;
+ background-color: #DDEAF0;
+ padding: 8px;
+ line-height: 1.3em;
+ font-size: 0.9em;
+.viewcode-back {
+ font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
+ 'Verdana', sans-serif;
+div.viewcode-block:target {
+ background-color: #f4debf;
+ border-top: 1px solid #ac9;
+ border-bottom: 1px solid #ac9;
diff --git a/contrib/llvm/tools/lld/docs/llvm-theme/static/logo.png b/contrib/llvm/tools/lld/docs/llvm-theme/static/logo.png
new file mode 100644
index 000000000000..4fc899028dc6
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/llvm-theme/static/logo.png
Binary files differ
diff --git a/contrib/llvm/tools/lld/docs/llvm-theme/static/navigation.png b/contrib/llvm/tools/lld/docs/llvm-theme/static/navigation.png
new file mode 100644
index 000000000000..1081dc1439fb
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/llvm-theme/static/navigation.png
Binary files differ
diff --git a/contrib/llvm/tools/lld/docs/llvm-theme/theme.conf b/contrib/llvm/tools/lld/docs/llvm-theme/theme.conf
new file mode 100644
index 000000000000..330fc92ffa18
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/llvm-theme/theme.conf
@@ -0,0 +1,4 @@
+inherit = basic
+stylesheet = llvm.css
+pygments_style = friendly
diff --git a/contrib/llvm/tools/lld/docs/make.bat b/contrib/llvm/tools/lld/docs/make.bat
new file mode 100644
index 000000000000..8471252d709f
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/make.bat
@@ -0,0 +1,190 @@
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+set BUILDDIR=_build
+if NOT "%PAPER%" == "" (
+ set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+if "%1" == "" goto help
+if "%1" == "help" (
+ :help
+ echo.Please use `make ^<target^>` where ^<target^> is one of
+ echo. html to make standalone HTML files
+ echo. dirhtml to make HTML files named index.html in directories
+ echo. singlehtml to make a single large HTML file
+ echo. pickle to make pickle files
+ echo. json to make JSON files
+ echo. htmlhelp to make HTML files and a HTML help project
+ echo. qthelp to make HTML files and a qthelp project
+ echo. devhelp to make HTML files and a Devhelp project
+ echo. epub to make an epub
+ echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+ echo. text to make text files
+ echo. man to make manual pages
+ echo. texinfo to make Texinfo files
+ echo. gettext to make PO message catalogs
+ echo. changes to make an overview over all changed/added/deprecated items
+ echo. linkcheck to check all external links for integrity
+ echo. doctest to run all doctests embedded in the documentation if enabled
+ goto end
+if "%1" == "clean" (
+ for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+ del /q /s %BUILDDIR%\*
+ goto end
+if "%1" == "html" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+ goto end
+if "%1" == "dirhtml" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+ goto end
+if "%1" == "singlehtml" (
+ %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+ goto end
+if "%1" == "pickle" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can process the pickle files.
+ goto end
+if "%1" == "json" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can process the JSON files.
+ goto end
+if "%1" == "htmlhelp" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+ goto end
+if "%1" == "qthelp" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+ echo.^> qcollectiongenerator %BUILDDIR%\qthelp\lld.qhcp
+ echo.To view the help file:
+ echo.^> assistant -collectionFile %BUILDDIR%\qthelp\lld.ghc
+ goto end
+if "%1" == "devhelp" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished.
+ goto end
+if "%1" == "epub" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The epub file is in %BUILDDIR%/epub.
+ goto end
+if "%1" == "latex" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+ goto end
+if "%1" == "text" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The text files are in %BUILDDIR%/text.
+ goto end
+if "%1" == "man" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The manual pages are in %BUILDDIR%/man.
+ goto end
+if "%1" == "texinfo" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+ goto end
+if "%1" == "gettext" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+ goto end
+if "%1" == "changes" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.The overview file is in %BUILDDIR%/changes.
+ goto end
+if "%1" == "linkcheck" (
+ %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+ goto end
+if "%1" == "doctest" (
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+ goto end
diff --git a/contrib/llvm/tools/lld/docs/missingkeyfunction.rst b/contrib/llvm/tools/lld/docs/missingkeyfunction.rst
new file mode 100644
index 000000000000..54ad3251f794
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/missingkeyfunction.rst
@@ -0,0 +1,84 @@
+Missing Key Function
+If your build failed with a linker error something like this::
+ foo.cc:28: error: undefined reference to 'vtable for C'
+ the vtable symbol may be undefined because the class is missing its key function (see https://lld.llvm.org/missingkeyfunction)
+it's likely that your class C has a key function (defined by the ABI as the first
+non-pure, non-inline, virtual method), but you haven't actually defined it.
+When a class has a key function, the compiler emits the vtable (and some other
+things as well) only in the translation unit that defines that key function. Thus,
+if you're missing the key function, you'll also be missing the vtable. If no other
+function calls your missing method, you won't see any undefined reference errors
+for it, but you will see undefined references to the vtable symbol.
+When a class has no non-pure, non-inline, virtual methods, there is no key
+method, and the compiler is forced to emit the vtable in every translation unit
+that references the class. In this case, it is emitted in a COMDAT section,
+which allows the linker to eliminate all duplicate copies. This is still
+wasteful in terms of object file size and link time, so it's always advisable to
+ensure there is at least one eligible method that can serve as the key function.
+Here are the most common mistakes that lead to this error:
+Failing to define a virtual destructor
+Say you have a base class declared in a header file::
+ class B {
+ public:
+ B();
+ virtual ~B();
+ ...
+ };
+Here, ``~B`` is the first non-pure, non-inline, virtual method, so it is the key
+method. If you forget to define ``B::~B`` in your source file, the compiler will
+not emit the vtable for ``B``, and you'll get an undefined reference to "vtable
+for B".
+This is just an example of the more general mistake of forgetting to define the
+key function, but it's quite common because virtual destructors are likely to be
+the first eligible key function and it's easy to forget to implement them. It's
+also more likely that you won't have any direct references to the destructor, so
+you won't see any undefined reference errors that point directly to the problem.
+The solution in this case is to implement the missing method.
+Forgetting to declare a virtual method in an abstract class as pure
+Say you have an abstract base class declared in a header file::
+ class A {
+ public:
+ A();
+ virtual ~A() {}
+ virtual int foo() = 0;
+ ...
+ virtual int bar();
+ ...
+ };
+This base class is intended to be abstract, but you forgot to mark one of the
+methods pure. Here, ``A::bar``, being non-pure, is nominated as the key function,
+and as a result, the vtable for ``A`` is not emitted, because the compiler is
+waiting for a translation unit that defines ``A::bar``.
+The solution in this case is to add the missing ``= 0`` to the declaration of
+Key method is defined, but the linker doesn't see it
+It's also possible that you have defined the key function somewhere, but the
+object file containing the definition of that method isn't being linked into
+your application.
+The solution in this case is to check your dependencies to make sure that
+the object file or the library file containing the key function is given to
+the linker.
diff --git a/contrib/llvm/tools/lld/docs/open_projects.rst b/contrib/llvm/tools/lld/docs/open_projects.rst
new file mode 100644
index 000000000000..36edca4e96dc
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/open_projects.rst
@@ -0,0 +1,9 @@
+.. _open_projects:
+Open Projects
+Documentation TODOs
+.. todolist::
diff --git a/contrib/llvm/tools/lld/docs/sphinx_intro.rst b/contrib/llvm/tools/lld/docs/sphinx_intro.rst
new file mode 100644
index 000000000000..6bb9816b5ab4
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/sphinx_intro.rst
@@ -0,0 +1,127 @@
+.. _sphinx_intro:
+Sphinx Introduction for LLVM Developers
+This document is intended as a short and simple introduction to the Sphinx
+documentation generation system for LLVM developers.
+To get started writing documentation, you will need to:
+ 1. Have the Sphinx tools :ref:`installed <installing_sphinx>`.
+ 2. Understand how to :ref:`build the documentation
+ <building_the_documentation>`.
+ 3. Start :ref:`writing documentation <writing_documentation>`!
+.. _installing_sphinx:
+Installing Sphinx
+You should be able to install Sphinx using the standard Python package
+installation tool ``easy_install``, as follows::
+ $ sudo easy_install sphinx
+ Searching for sphinx
+ Reading http://pypi.python.org/simple/sphinx/
+ Reading http://sphinx.pocoo.org/
+ Best match: Sphinx 1.1.3
+ ... more lines here ..
+If you do not have root access (or otherwise want to avoid installing Sphinx in
+system directories) see the section on :ref:`installing_sphinx_in_a_venv` .
+If you do not have the ``easy_install`` tool on your system, you should be able
+to install it using:
+ Linux
+ Use your distribution's standard package management tool to install it,
+ i.e., ``apt-get install easy_install`` or ``yum install easy_install``.
+ Mac OS X
+ All modern Mac OS X systems come with ``easy_install`` as part of the base
+ system.
+ Windows
+ See the `setuptools <http://pypi.python.org/pypi/setuptools>`_ package web
+ page for instructions.
+.. _building_the_documentation:
+Building the documentation
+In order to build the documentation need to add ``-DLLVM_ENABLE_SPHINX=ON`` to
+your ``cmake`` command. Once you do this you can build the docs using
+``docs-lld-html`` build (``ninja`` or ``make``) target.
+That build target will invoke ``sphinx-build`` with the appropriate options for
+the project, and generate the HTML documentation in a ``tools/lld/docs/html``
+.. _writing_documentation:
+Writing documentation
+The documentation itself is written in the reStructuredText (ReST) format, and
+Sphinx defines additional tags to support features like cross-referencing.
+The ReST format itself is organized around documents mostly being readable
+plaintext documents. You should generally be able to write new documentation
+easily just by following the style of the existing documentation.
+If you want to understand the formatting of the documents more, the best place
+to start is Sphinx's own `ReST Primer <http://sphinx.pocoo.org/rest.html>`_.
+Learning More
+If you want to learn more about the Sphinx system, the best place to start is
+the Sphinx documentation itself, available `here
+.. _installing_sphinx_in_a_venv:
+Installing Sphinx in a Virtual Environment
+Most Python developers prefer to work with tools inside a *virtualenv* (virtual
+environment) instance, which functions as an application sandbox. This avoids
+polluting your system installation with different packages used by various
+projects (and ensures that dependencies for different packages don't conflict
+with one another). Of course, you need to first have the virtualenv software
+itself which generally would be installed at the system level::
+ $ sudo easy_install virtualenv
+but after that you no longer need to install additional packages in the system
+Once you have the *virtualenv* tool itself installed, you can create a
+virtualenv for Sphinx using::
+ $ virtualenv ~/my-sphinx-install
+ New python executable in /Users/dummy/my-sphinx-install/bin/python
+ Installing setuptools............done.
+ Installing pip...............done.
+ $ ~/my-sphinx-install/bin/easy_install sphinx
+ ... install messages here ...
+and from now on you can "activate" the *virtualenv* using::
+ $ source ~/my-sphinx-install/bin/activate
+which will change your PATH to ensure the sphinx-build tool from inside the
+virtual environment will be used. See the `virtualenv website
+<http://www.virtualenv.org/en/latest/index.html>`_ for more information on using
+virtual environments.
diff --git a/contrib/llvm/tools/lld/docs/windows_support.rst b/contrib/llvm/tools/lld/docs/windows_support.rst
new file mode 100644
index 000000000000..c9723c42fcc8
--- /dev/null
+++ b/contrib/llvm/tools/lld/docs/windows_support.rst
@@ -0,0 +1,97 @@
+.. raw:: html
+ <style type="text/css">
+ .none { background-color: #FFCCCC }
+ .partial { background-color: #FFFF99 }
+ .good { background-color: #CCFF99 }
+ </style>
+.. role:: none
+.. role:: partial
+.. role:: good
+Windows support
+LLD supports Windows operating system. When invoked as ``lld-link.exe`` or with
+``-flavor link``, the driver for Windows operating system is used to parse
+command line options, and it drives further linking processes. LLD accepts
+almost all command line options that the linker shipped with Microsoft Visual
+C++ (link.exe) supports.
+The current status is that LLD is used to link production builds of large
+real-world binaries such as Firefox and Chromium.
+Development status
+ :good:`Mostly done`. Some exotic command line options that are not usually
+ used for application develompent, such as ``/DRIVER``, are not supported.
+Linking against DLL
+ :good:`Done`. LLD can read import libraries needed to link against DLL. Both
+ export-by-name and export-by-ordinal are supported.
+Linking against static library
+ :good:`Done`. The format of static library (.lib) on Windows is actually the
+ same as on Unix (.a). LLD can read it.
+Creating DLL
+ :good:`Done`. LLD creates a DLL if ``/DLL`` option is given. Exported
+ functions can be specified either via command line (``/EXPORT``) or via
+ module-definition file (.def). Both export-by-name and export-by-ordinal are
+ supported.
+Windows resource files support
+ :good:`Done`. If an ``.res`` file is given, LLD converts the file to a COFF
+ file using LLVM's Object library.
+Safe Structured Exception Handler (SEH)
+ :good:`Done` for both x86 and x64.
+Module-definition file
+ :partial:`Partially done`. LLD currently recognizes these directives:
+ ``EXPORTS``, ``HEAPSIZE``, ``STACKSIZE``, ``NAME``, and ``VERSION``.
+Debug info
+ :good:`Done`. LLD can emit PDBs that are at parity with those generated by
+ link.exe. However, LLD does not support /DEBUG:FASTLINK.
+Downloading LLD
+The Windows version of LLD is included in the `pre-built binaries of LLVM's
+releases <https://releases.llvm.org/download.html>`_ and in the `LLVM Snapshot
+Builds <https://llvm.org/builds/>`_.
+Building LLD
+Using Visual Studio IDE/MSBuild
+1. Check out LLVM and LLD from the LLVM SVN repository (or Git mirror),
+#. run ``cmake -G "Visual Studio 12" <llvm-source-dir>`` from VS command prompt,
+#. open LLVM.sln with Visual Studio, and
+#. build ``lld`` target in ``lld executables`` folder
+Alternatively, you can use msbuild if you don't like to work in an IDE::
+ msbuild LLVM.sln /m /target:"lld executables\lld"
+MSBuild.exe had been shipped as a component of the .NET framework, but since
+2013 it's part of Visual Studio. You can find it at "C:\\Program Files
+You can build LLD as a 64 bit application. To do that, open VS2013 x64 command
+prompt and run cmake for "Visual Studio 12 Win64" target.
+Using Ninja
+1. Check out LLVM and LLD from the LLVM SVN repository (or Git mirror),
+#. run ``cmake -G ninja <llvm-source-dir>`` from VS command prompt,
+#. run ``ninja lld``
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Args.h b/contrib/llvm/tools/lld/include/lld/Common/Args.h
new file mode 100644
index 000000000000..769d4840cf06
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Args.h
@@ -0,0 +1,38 @@
+//===- Args.h ---------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_ARGS_H
+#define LLD_ARGS_H
+#include "lld/Common/LLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <vector>
+namespace llvm {
+namespace opt {
+class InputArgList;
+} // namespace llvm
+namespace lld {
+namespace args {
+int getInteger(llvm::opt::InputArgList &Args, unsigned Key, int Default);
+std::vector<StringRef> getStrings(llvm::opt::InputArgList &Args, int Id);
+uint64_t getZOptionValue(llvm::opt::InputArgList &Args, int Id, StringRef Key,
+ uint64_t Default);
+std::vector<StringRef> getLines(MemoryBufferRef MB);
+StringRef getFilenameWithoutExe(StringRef Path);
+} // namespace args
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Driver.h b/contrib/llvm/tools/lld/include/lld/Common/Driver.h
new file mode 100644
index 000000000000..f6d92933af62
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Driver.h
@@ -0,0 +1,43 @@
+//===- lld/Common/Driver.h - Linker Driver Emulator -----------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/raw_ostream.h"
+namespace lld {
+namespace coff {
+bool link(llvm::ArrayRef<const char *> Args, bool CanExitEarly,
+ llvm::raw_ostream &Diag = llvm::errs());
+namespace mingw {
+bool link(llvm::ArrayRef<const char *> Args,
+ llvm::raw_ostream &Diag = llvm::errs());
+namespace elf {
+bool link(llvm::ArrayRef<const char *> Args, bool CanExitEarly,
+ llvm::raw_ostream &Diag = llvm::errs());
+namespace mach_o {
+bool link(llvm::ArrayRef<const char *> Args, bool CanExitEarly,
+ llvm::raw_ostream &Diag = llvm::errs());
+namespace wasm {
+bool link(llvm::ArrayRef<const char *> Args, bool CanExitEarly,
+ llvm::raw_ostream &Diag = llvm::errs());
diff --git a/contrib/llvm/tools/lld/include/lld/Common/ErrorHandler.h b/contrib/llvm/tools/lld/include/lld/Common/ErrorHandler.h
new file mode 100644
index 000000000000..c169f7b50de8
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/ErrorHandler.h
@@ -0,0 +1,160 @@
+//===- ErrorHandler.h -------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// We designed lld's error handlers with the following goals in mind:
+// - Errors can occur at any place where we handle user input, but we don't
+// want them to affect the normal execution path too much. Ideally,
+// handling errors should be as simple as reporting them and exit (but
+// without actually doing exit).
+// In particular, the design to wrap all functions that could fail with
+// ErrorOr<T> is rejected because otherwise we would have to wrap a large
+// number of functions in lld with ErrorOr. With that approach, if some
+// function F can fail, not only F but all functions that transitively call
+// F have to be wrapped with ErrorOr. That seemed too much.
+// - Finding only one error at a time is not sufficient. We want to find as
+// many errors as possible with one execution of the linker. That means the
+// linker needs to keep running after a first error and give up at some
+// checkpoint (beyond which it would find cascading, false errors caused by
+// the previous errors).
+// - We want a simple interface to report errors. Unlike Clang, the data we
+// handle is compiled binary, so we don't need an error reporting mechanism
+// that's as sophisticated as the one that Clang has.
+// The current lld's error handling mechanism is simple:
+// - When you find an error, report it using error() and continue as far as
+// you can. An internal error counter is incremented by one every time you
+// call error().
+// A common idiom to handle an error is calling error() and then returning
+// a reasonable default value. For example, if your function handles a
+// user-supplied alignment value, and if you find an invalid alignment
+// (e.g. 17 which is not 2^n), you may report it using error() and continue
+// as if it were alignment 1 (which is the simplest reasonable value).
+// Note that you should not continue with an invalid value; that breaks the
+// internal consistency. You need to maintain all variables have some sane
+// value even after an error occurred. So, when you have to continue with
+// some value, always use a dummy value.
+// - Find a reasonable checkpoint at where you want to stop the linker, and
+// add code to return from the function if errorCount() > 0. In most cases,
+// a checkpoint already exists, so you don't need to do anything for this.
+// This interface satisfies all the goals that we mentioned above.
+// You should never call fatal() except for reporting a corrupted input file.
+// fatal() immediately terminates the linker, so the function is not desirable
+// if you are using lld as a subroutine in other program, and with that you
+// can find only one error at a time.
+// warn() doesn't do anything but printing out a given message.
+// It is not recommended to use llvm::outs() or llvm::errs() directly in lld
+// because they are not thread-safe. The functions declared in this file are
+// thread-safe.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileOutputBuffer.h"
+namespace llvm {
+class DiagnosticInfo;
+namespace lld {
+class ErrorHandler {
+ uint64_t ErrorCount = 0;
+ uint64_t ErrorLimit = 20;
+ StringRef ErrorLimitExceededMsg = "too many errors emitted, stopping now";
+ StringRef LogName = "lld";
+ llvm::raw_ostream *ErrorOS = &llvm::errs();
+ bool ColorDiagnostics = llvm::errs().has_colors();
+ bool ExitEarly = true;
+ bool FatalWarnings = false;
+ bool Verbose = false;
+ void error(const Twine &Msg);
+ LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg);
+ void log(const Twine &Msg);
+ void message(const Twine &Msg);
+ void warn(const Twine &Msg);
+ std::unique_ptr<llvm::FileOutputBuffer> OutputBuffer;
+ void print(StringRef S, raw_ostream::Colors C);
+/// Returns the default error handler.
+ErrorHandler &errorHandler();
+inline void error(const Twine &Msg) { errorHandler().error(Msg); }
+inline LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg) {
+ errorHandler().fatal(Msg);
+inline void log(const Twine &Msg) { errorHandler().log(Msg); }
+inline void message(const Twine &Msg) { errorHandler().message(Msg); }
+inline void warn(const Twine &Msg) { errorHandler().warn(Msg); }
+inline uint64_t errorCount() { return errorHandler().ErrorCount; }
+LLVM_ATTRIBUTE_NORETURN void exitLld(int Val);
+void diagnosticHandler(const llvm::DiagnosticInfo &DI);
+void checkError(Error E);
+// check functions are convenient functions to strip errors
+// from error-or-value objects.
+template <class T> T check(ErrorOr<T> E) {
+ if (auto EC = E.getError())
+ fatal(EC.message());
+ return std::move(*E);
+template <class T> T check(Expected<T> E) {
+ if (!E)
+ fatal(llvm::toString(E.takeError()));
+ return std::move(*E);
+template <class T>
+T check2(ErrorOr<T> E, llvm::function_ref<std::string()> Prefix) {
+ if (auto EC = E.getError())
+ fatal(Prefix() + ": " + EC.message());
+ return std::move(*E);
+template <class T>
+T check2(Expected<T> E, llvm::function_ref<std::string()> Prefix) {
+ if (!E)
+ fatal(Prefix() + ": " + toString(E.takeError()));
+ return std::move(*E);
+inline std::string toString(const Twine &S) { return S.str(); }
+// To evaluate the second argument lazily, we use C macro.
+#define CHECK(E, S) check2((E), [&] { return toString(S); })
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Common/LLVM.h b/contrib/llvm/tools/lld/include/lld/Common/LLVM.h
new file mode 100644
index 000000000000..95a2aa903957
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/LLVM.h
@@ -0,0 +1,99 @@
+//===--- LLVM.h - Import various common LLVM datatypes ----------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file forward declares and imports various common LLVM datatypes that
+// lld wants to use unqualified.
+// This should be the only #include, force #includes of all the others on
+// clients.
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Casting.h"
+#include <utility>
+namespace llvm {
+// ADT's.
+class raw_ostream;
+class Error;
+class StringRef;
+class Twine;
+class MemoryBuffer;
+class MemoryBufferRef;
+template <typename T> class ArrayRef;
+template <unsigned InternalLen> class SmallString;
+template <typename T, unsigned N> class SmallVector;
+template <typename T> class ErrorOr;
+template <typename T> class Expected;
+namespace object {
+class WasmObjectFile;
+struct WasmSection;
+struct WasmSegment;
+class WasmSymbol;
+} // namespace object
+namespace wasm {
+struct WasmEvent;
+struct WasmEventType;
+struct WasmFunction;
+struct WasmGlobal;
+struct WasmGlobalType;
+struct WasmRelocation;
+struct WasmSignature;
+} // namespace wasm
+} // namespace llvm
+namespace lld {
+// Casting operators.
+using llvm::cast;
+using llvm::cast_or_null;
+using llvm::dyn_cast;
+using llvm::dyn_cast_or_null;
+using llvm::isa;
+// ADT's.
+using llvm::ArrayRef;
+using llvm::Error;
+using llvm::ErrorOr;
+using llvm::Expected;
+using llvm::MemoryBuffer;
+using llvm::MemoryBufferRef;
+using llvm::raw_ostream;
+using llvm::SmallString;
+using llvm::SmallVector;
+using llvm::StringRef;
+using llvm::Twine;
+using llvm::object::WasmObjectFile;
+using llvm::object::WasmSection;
+using llvm::object::WasmSegment;
+using llvm::object::WasmSymbol;
+using llvm::wasm::WasmEvent;
+using llvm::wasm::WasmEventType;
+using llvm::wasm::WasmFunction;
+using llvm::wasm::WasmGlobal;
+using llvm::wasm::WasmGlobalType;
+using llvm::wasm::WasmRelocation;
+using llvm::wasm::WasmSignature;
+} // end namespace lld.
+namespace std {
+template <> struct hash<llvm::StringRef> {
+ size_t operator()(const llvm::StringRef &s) const {
+ return llvm::hash_value(s);
+ }
+} // namespace std
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Memory.h b/contrib/llvm/tools/lld/include/lld/Common/Memory.h
new file mode 100644
index 000000000000..699f7c1654cd
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Memory.h
@@ -0,0 +1,60 @@
+//===- Memory.h -------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file defines arena allocators.
+// Almost all large objects, such as files, sections or symbols, are
+// used for the entire lifetime of the linker once they are created.
+// This usage characteristic makes arena allocator an attractive choice
+// where the entire linker is one arena. With an arena, newly created
+// objects belong to the arena and freed all at once when everything is done.
+// Arena allocators are efficient and easy to understand.
+// Most objects are allocated using the arena allocators defined by this file.
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/StringSaver.h"
+#include <vector>
+namespace lld {
+// Use this arena if your object doesn't have a destructor.
+extern llvm::BumpPtrAllocator BAlloc;
+extern llvm::StringSaver Saver;
+void freeArena();
+// These two classes are hack to keep track of all
+// SpecificBumpPtrAllocator instances.
+struct SpecificAllocBase {
+ SpecificAllocBase() { Instances.push_back(this); }
+ virtual ~SpecificAllocBase() = default;
+ virtual void reset() = 0;
+ static std::vector<SpecificAllocBase *> Instances;
+template <class T> struct SpecificAlloc : public SpecificAllocBase {
+ void reset() override { Alloc.DestroyAll(); }
+ llvm::SpecificBumpPtrAllocator<T> Alloc;
+// Use this arena if your object has a destructor.
+// Your destructor will be invoked from freeArena().
+template <typename T, typename... U> T *make(U &&... Args) {
+ static SpecificAlloc<T> Alloc;
+ return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...);
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Reproduce.h b/contrib/llvm/tools/lld/include/lld/Common/Reproduce.h
new file mode 100644
index 000000000000..0f425de269c7
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Reproduce.h
@@ -0,0 +1,39 @@
+//===- Reproduce.h - Utilities for creating reproducers ---------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+namespace llvm {
+namespace opt { class Arg; }
+namespace lld {
+// Makes a given pathname an absolute path first, and then remove
+// beginning /. For example, "../foo.o" is converted to "home/john/foo.o",
+// assuming that the current directory is "/home/john/bar".
+std::string relativeToRoot(StringRef Path);
+// Quote a given string if it contains a space character.
+std::string quote(StringRef S);
+// Rewrite the given path if a file exists with that pathname, otherwise
+// returns the original path.
+std::string rewritePath(StringRef S);
+// Returns the string form of the given argument.
+std::string toString(const llvm::opt::Arg &Arg);
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Strings.h b/contrib/llvm/tools/lld/include/lld/Common/Strings.h
new file mode 100644
index 000000000000..566030e43aa6
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Strings.h
@@ -0,0 +1,46 @@
+//===- Strings.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_STRINGS_H
+#define LLD_STRINGS_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/GlobPattern.h"
+#include <string>
+#include <vector>
+namespace lld {
+// Returns a demangled C++ symbol name. If Name is not a mangled
+// name, it returns Optional::None.
+llvm::Optional<std::string> demangleItanium(llvm::StringRef Name);
+llvm::Optional<std::string> demangleMSVC(llvm::StringRef S);
+std::vector<uint8_t> parseHex(llvm::StringRef S);
+bool isValidCIdentifier(llvm::StringRef S);
+// Write the contents of the a buffer to a file
+void saveBuffer(llvm::StringRef Buffer, const llvm::Twine &Path);
+// This class represents multiple glob patterns.
+class StringMatcher {
+ StringMatcher() = default;
+ explicit StringMatcher(llvm::ArrayRef<llvm::StringRef> Pat);
+ bool match(llvm::StringRef S) const;
+ std::vector<llvm::GlobPattern> Patterns;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Common/TargetOptionsCommandFlags.h b/contrib/llvm/tools/lld/include/lld/Common/TargetOptionsCommandFlags.h
new file mode 100644
index 000000000000..2eaecb72759e
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/TargetOptionsCommandFlags.h
@@ -0,0 +1,23 @@
+//===-- TargetOptionsCommandFlags.h ----------------------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Helper to create TargetOptions from command line flags.
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetOptions.h"
+namespace lld {
+llvm::TargetOptions InitTargetOptionsFromCodeGenFlags();
+llvm::Optional<llvm::CodeModel::Model> GetCodeModelFromCMModel();
+std::string GetCPUStr();
+std::vector<std::string> GetMAttrs();
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Threads.h b/contrib/llvm/tools/lld/include/lld/Common/Threads.h
new file mode 100644
index 000000000000..1425abd12922
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Threads.h
@@ -0,0 +1,86 @@
+//===- Threads.h ------------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// LLD supports threads to distribute workloads to multiple cores. Using
+// multicore is most effective when more than one core are idle. At the
+// last step of a build, it is often the case that a linker is the only
+// active process on a computer. So, we are naturally interested in using
+// threads wisely to reduce latency to deliver results to users.
+// That said, we don't want to do "too clever" things using threads.
+// Complex multi-threaded algorithms are sometimes extremely hard to
+// reason about and can easily mess up the entire design.
+// Fortunately, when a linker links large programs (when the link time is
+// most critical), it spends most of the time to work on massive number of
+// small pieces of data of the same kind, and there are opportunities for
+// large parallelism there. Here are examples:
+// - We have hundreds of thousands of input sections that need to be
+// copied to a result file at the last step of link. Once we fix a file
+// layout, each section can be copied to its destination and its
+// relocations can be applied independently.
+// - We have tens of millions of small strings when constructing a
+// mergeable string section.
+// For the cases such as the former, we can just use parallelForEach
+// instead of std::for_each (or a plain for loop). Because tasks are
+// completely independent from each other, we can run them in parallel
+// without any coordination between them. That's very easy to understand
+// and reason about.
+// For the cases such as the latter, we can use parallel algorithms to
+// deal with massive data. We have to write code for a tailored algorithm
+// for each problem, but the complexity of multi-threading is isolated in
+// a single pass and doesn't affect the linker's overall design.
+// The above approach seems to be working fairly well. As an example, when
+// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
+// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
+// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
+// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
+// speedup is not linear, but as you add more cores, it gets faster.
+// On a final note, if you are trying to optimize, keep the axiom "don't
+// guess, measure!" in mind. Some important passes of the linker are not
+// that slow. For example, resolving all symbols is not a very heavy pass,
+// although it would be very hard to parallelize it. You want to first
+// identify a slow pass and then optimize it.
+#include "llvm/Support/Parallel.h"
+#include <functional>
+namespace lld {
+extern bool ThreadsEnabled;
+template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
+ if (ThreadsEnabled)
+ for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
+ else
+ for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
+inline void parallelForEachN(size_t Begin, size_t End,
+ llvm::function_ref<void(size_t)> Fn) {
+ if (ThreadsEnabled)
+ for_each_n(llvm::parallel::par, Begin, End, Fn);
+ else
+ for_each_n(llvm::parallel::seq, Begin, End, Fn);
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Timer.h b/contrib/llvm/tools/lld/include/lld/Common/Timer.h
new file mode 100644
index 000000000000..6654af626919
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Timer.h
@@ -0,0 +1,59 @@
+//===- Timer.h ----------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <assert.h>
+#include <chrono>
+#include <map>
+#include <memory>
+namespace lld {
+class Timer;
+struct ScopedTimer {
+ explicit ScopedTimer(Timer &T);
+ ~ScopedTimer();
+ void stop();
+ Timer *T = nullptr;
+class Timer {
+ Timer(llvm::StringRef Name, Timer &Parent);
+ static Timer &root();
+ void start();
+ void stop();
+ void print();
+ double millis() const;
+ explicit Timer(llvm::StringRef Name);
+ void print(int Depth, double TotalDuration, bool Recurse = true) const;
+ std::chrono::time_point<std::chrono::high_resolution_clock> StartTime;
+ std::chrono::nanoseconds Total;
+ std::vector<Timer *> Children;
+ std::string Name;
+ Timer *Parent;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Version.h b/contrib/llvm/tools/lld/include/lld/Common/Version.h
new file mode 100644
index 000000000000..23a10ed6dbf3
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Version.h
@@ -0,0 +1,25 @@
+//===- lld/Common/Version.h - LLD Version Number ----------------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Defines a version-related utility function.
+#ifndef LLD_VERSION_H
+#define LLD_VERSION_H
+#include "lld/Common/Version.inc"
+#include "llvm/ADT/StringRef.h"
+namespace lld {
+/// Retrieves a string representing the complete lld version.
+std::string getLLDVersion();
+#endif // LLD_VERSION_H
diff --git a/contrib/llvm/tools/lld/include/lld/Common/Version.inc.in b/contrib/llvm/tools/lld/include/lld/Common/Version.inc.in
new file mode 100644
index 000000000000..2789a5c46089
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Common/Version.inc.in
@@ -0,0 +1,6 @@
diff --git a/contrib/llvm/tools/lld/include/lld/Core/AbsoluteAtom.h b/contrib/llvm/tools/lld/include/lld/Core/AbsoluteAtom.h
new file mode 100644
index 000000000000..ed25297cea81
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/AbsoluteAtom.h
@@ -0,0 +1,43 @@
+//===- Core/AbsoluteAtom.h - An absolute Atom -----------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Atom.h"
+namespace lld {
+/// An AbsoluteAtom has no content.
+/// It exists to represent content at fixed addresses in memory.
+class AbsoluteAtom : public Atom {
+ virtual uint64_t value() const = 0;
+ /// scope - The visibility of this atom to other atoms. C static functions
+ /// have scope scopeTranslationUnit. Regular C functions have scope
+ /// scopeGlobal. Functions compiled with visibility=hidden have scope
+ /// scopeLinkageUnit so they can be see by other atoms being linked but not
+ /// by the OS loader.
+ virtual Scope scope() const = 0;
+ static bool classof(const Atom *a) {
+ return a->definition() == definitionAbsolute;
+ }
+ static bool classof(const AbsoluteAtom *) { return true; }
+ AbsoluteAtom() : Atom(definitionAbsolute) {}
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/ArchiveLibraryFile.h b/contrib/llvm/tools/lld/include/lld/Core/ArchiveLibraryFile.h
new file mode 100644
index 000000000000..2c736e7d6c61
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/ArchiveLibraryFile.h
@@ -0,0 +1,47 @@
+//===- Core/ArchiveLibraryFile.h - Models static library ------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/File.h"
+#include <set>
+namespace lld {
+/// The ArchiveLibraryFile subclass of File is used to represent unix
+/// static library archives. These libraries provide no atoms to the
+/// initial set of atoms linked. Instead, when the Resolver will query
+/// ArchiveLibraryFile instances for specific symbols names using the
+/// find() method. If the archive contains an object file which has a
+/// DefinedAtom whose scope is not translationUnit, then that entire
+/// object file File is returned.
+class ArchiveLibraryFile : public File {
+ static bool classof(const File *f) {
+ return f->kind() == kindArchiveLibrary;
+ }
+ /// Check if any member of the archive contains an Atom with the
+ /// specified name and return the File object for that member, or nullptr.
+ virtual File *find(StringRef name) = 0;
+ virtual std::error_code
+ parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0;
+ /// only subclasses of ArchiveLibraryFile can be instantiated
+ ArchiveLibraryFile(StringRef path) : File(path, kindArchiveLibrary) {}
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Atom.h b/contrib/llvm/tools/lld/include/lld/Core/Atom.h
new file mode 100644
index 000000000000..149c3d5ee2c5
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Atom.h
@@ -0,0 +1,131 @@
+//===- Core/Atom.h - A node in linking graph --------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_CORE_ATOM_H
+#define LLD_CORE_ATOM_H
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+namespace lld {
+class File;
+template<typename T>
+class OwningAtomPtr;
+/// The linker has a Graph Theory model of linking. An object file is seen
+/// as a set of Atoms with References to other Atoms. Each Atom is a node
+/// and each Reference is an edge. An Atom can be a DefinedAtom which has
+/// content or a UndefinedAtom which is a placeholder and represents an
+/// undefined symbol (extern declaration).
+class Atom {
+ template<typename T> friend class OwningAtomPtr;
+ /// Whether this atom is defined or a proxy for an undefined symbol
+ enum Definition {
+ definitionRegular, ///< Normal C/C++ function or global variable.
+ definitionAbsolute, ///< Asm-only (foo = 10). Not tied to any content.
+ definitionUndefined, ///< Only in .o files to model reference to undef.
+ definitionSharedLibrary ///< Only in shared libraries to model export.
+ };
+ /// The scope in which this atom is acessible to other atoms.
+ enum Scope {
+ scopeTranslationUnit, ///< Accessible only to atoms in the same translation
+ /// unit (e.g. a C static).
+ scopeLinkageUnit, ///< Accessible to atoms being linked but not visible
+ /// to runtime loader (e.g. visibility=hidden).
+ scopeGlobal ///< Accessible to all atoms and visible to runtime
+ /// loader (e.g. visibility=default).
+ };
+ /// file - returns the File that produced/owns this Atom
+ virtual const File& file() const = 0;
+ /// name - The name of the atom. For a function atom, it is the (mangled)
+ /// name of the function.
+ virtual StringRef name() const = 0;
+ /// definition - Whether this atom is a definition or represents an undefined
+ /// symbol.
+ Definition definition() const { return _definition; }
+ static bool classof(const Atom *a) { return true; }
+ /// Atom is an abstract base class. Only subclasses can access constructor.
+ explicit Atom(Definition def) : _definition(def) {}
+ /// The memory for Atom objects is always managed by the owning File
+ /// object. Therefore, no one but the owning File object should call
+ /// delete on an Atom. In fact, some File objects may bulk allocate
+ /// an array of Atoms, so they cannot be individually deleted by anyone.
+ virtual ~Atom() = default;
+ Definition _definition;
+/// Class which owns an atom pointer and runs the atom destructor when the
+/// owning pointer goes out of scope.
+template<typename T>
+class OwningAtomPtr {
+ OwningAtomPtr(const OwningAtomPtr &) = delete;
+ void operator=(const OwningAtomPtr &) = delete;
+ OwningAtomPtr() = default;
+ OwningAtomPtr(T *atom) : atom(atom) { }
+ ~OwningAtomPtr() {
+ if (atom)
+ runDestructor(atom);
+ }
+ void runDestructor(Atom *atom) {
+ atom->~Atom();
+ }
+ OwningAtomPtr(OwningAtomPtr &&ptr) : atom(ptr.atom) {
+ ptr.atom = nullptr;
+ }
+ void operator=(OwningAtomPtr&& ptr) {
+ if (atom)
+ runDestructor(atom);
+ atom = ptr.atom;
+ ptr.atom = nullptr;
+ }
+ T *const &get() const {
+ return atom;
+ }
+ T *&get() {
+ return atom;
+ }
+ T *release() {
+ auto *v = atom;
+ atom = nullptr;
+ return v;
+ }
+ T *atom = nullptr;
+} // end namespace lld
+#endif // LLD_CORE_ATOM_H
diff --git a/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h b/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h
new file mode 100644
index 000000000000..ba10b45411f1
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/DefinedAtom.h
@@ -0,0 +1,374 @@
+//===- Core/DefinedAtom.h - An Atom with content --------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Atom.h"
+#include "lld/Core/Reference.h"
+#include "llvm/Support/ErrorHandling.h"
+namespace lld {
+class File;
+/// The fundamental unit of linking.
+/// A C function or global variable is an atom. An atom has content and
+/// attributes. The content of a function atom is the instructions that
+/// implement the function. The content of a global variable atom is its
+/// initial bytes.
+/// Here are some example attribute sets for common atoms. If a particular
+/// attribute is not listed, the default values are: definition=regular,
+/// sectionChoice=basedOnContent, scope=translationUnit, merge=no,
+/// deadStrip=normal, interposable=no
+/// C function: void foo() {} <br>
+/// name=foo, type=code, perm=r_x, scope=global
+/// C static function: staic void func() {} <br>
+/// name=func, type=code, perm=r_x
+/// C global variable: int count = 1; <br>
+/// name=count, type=data, perm=rw_, scope=global
+/// C tentative definition: int bar; <br>
+/// name=bar, type=zerofill, perm=rw_, scope=global,
+/// merge=asTentative, interposable=yesAndRuntimeWeak
+/// Uninitialized C static variable: static int stuff; <br>
+/// name=stuff, type=zerofill, perm=rw_
+/// Weak C function: __attribute__((weak)) void foo() {} <br>
+/// name=foo, type=code, perm=r_x, scope=global, merge=asWeak
+/// Hidden C function: __attribute__((visibility("hidden"))) void foo() {}<br>
+/// name=foo, type=code, perm=r_x, scope=linkageUnit
+/// No-dead-strip function: __attribute__((used)) void foo() {} <br>
+/// name=foo, type=code, perm=r_x, scope=global, deadStrip=never
+/// Non-inlined C++ inline method: inline void Foo::doit() {} <br>
+/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
+/// mergeDupes=asWeak
+/// Non-inlined C++ inline method whose address is taken:
+/// inline void Foo::doit() {} <br>
+/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
+/// mergeDupes=asAddressedWeak
+/// literal c-string: "hello" <br>
+/// name="" type=cstring, perm=r__, scope=linkageUnit
+/// literal double: 1.234 <br>
+/// name="" type=literal8, perm=r__, scope=linkageUnit
+/// constant: { 1,2,3 } <br>
+/// name="" type=constant, perm=r__, scope=linkageUnit
+/// Pointer to initializer function: <br>
+/// name="" type=initializer, perm=rw_l,
+/// sectionChoice=customRequired
+/// C function place in custom section: __attribute__((section("__foo")))
+/// void foo() {} <br>
+/// name=foo, type=code, perm=r_x, scope=global,
+/// sectionChoice=customRequired, customSectionName=__foo
+class DefinedAtom : public Atom {
+ enum Interposable {
+ interposeNo, // linker can directly bind uses of this atom
+ interposeYes, // linker must indirect (through GOT) uses
+ interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final
+ // linked image
+ };
+ enum Merge {
+ mergeNo, // Another atom with same name is error
+ mergeAsTentative, // Is ANSI C tentative definition, can be coalesced
+ mergeAsWeak, // Is C++ inline definition that was not inlined,
+ // but address was not taken, so atom can be hidden
+ // by linker
+ mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose
+ // address was taken.
+ mergeSameNameAndSize, // Another atom with different size is error
+ mergeByLargestSection, // Choose an atom whose section is the largest.
+ mergeByContent, // Merge with other constants with same content.
+ };
+ enum ContentType {
+ typeUnknown, // for use with definitionUndefined
+ typeMachHeader, // atom representing mach_header [Darwin]
+ typeCode, // executable code
+ typeResolver, // function which returns address of target
+ typeBranchIsland, // linker created for large binaries
+ typeBranchShim, // linker created to switch thumb mode
+ typeStub, // linker created for calling external function
+ typeStubHelper, // linker created for initial stub binding
+ typeConstant, // a read-only constant
+ typeCString, // a zero terminated UTF8 C string
+ typeUTF16String, // a zero terminated UTF16 string
+ typeCFI, // a FDE or CIE from dwarf unwind info
+ typeLSDA, // extra unwinding info
+ typeLiteral4, // a four-btye read-only constant
+ typeLiteral8, // an eight-btye read-only constant
+ typeLiteral16, // a sixteen-btye read-only constant
+ typeData, // read-write data
+ typeDataFast, // allow data to be quickly accessed
+ typeZeroFill, // zero-fill data
+ typeZeroFillFast, // allow zero-fill data to be quicky accessed
+ typeConstData, // read-only data after dynamic linker is done
+ typeObjC1Class, // ObjC1 class [Darwin]
+ typeLazyPointer, // pointer through which a stub jumps
+ typeLazyDylibPointer, // pointer through which a stub jumps [Darwin]
+ typeNonLazyPointer, // pointer to external symbol
+ typeCFString, // NS/CFString object [Darwin]
+ typeGOT, // pointer to external symbol
+ typeInitializerPtr, // pointer to initializer function
+ typeTerminatorPtr, // pointer to terminator function
+ typeCStringPtr, // pointer to UTF8 C string [Darwin]
+ typeObjCClassPtr, // pointer to ObjC class [Darwin]
+ typeObjC2CategoryList, // pointers to ObjC category [Darwin]
+ typeObjCImageInfo, // pointer to ObjC class [Darwin]
+ typeObjCMethodList, // pointer to ObjC method list [Darwin]
+ typeDTraceDOF, // runtime data for Dtrace [Darwin]
+ typeInterposingTuples, // tuples of interposing info for dyld [Darwin]
+ typeTempLTO, // temporary atom for bitcode reader
+ typeCompactUnwindInfo, // runtime data for unwinder [Darwin]
+ typeProcessedUnwindInfo,// compressed compact unwind info [Darwin]
+ typeThunkTLV, // thunk used to access a TLV [Darwin]
+ typeTLVInitialData, // initial data for a TLV [Darwin]
+ typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin]
+ typeTLVInitializerPtr, // pointer to thread local initializer [Darwin]
+ typeDSOHandle, // atom representing DSO handle [Darwin]
+ typeSectCreate, // Created via the -sectcreate option [Darwin]
+ };
+ // Permission bits for atoms and segments. The order of these values are
+ // important, because the layout pass may sort atoms by permission if other
+ // attributes are the same.
+ enum ContentPermissions {
+ perm___ = 0, // mapped as unaccessible
+ permR__ = 8, // mapped read-only
+ permRW_ = 8 + 2, // mapped readable and writable
+ permRW_L = 8 + 2 + 1, // initially mapped r/w, then made read-only
+ // loader writable
+ permR_X = 8 + 4, // mapped readable and executable
+ permRWX = 8 + 2 + 4, // mapped readable and writable and executable
+ permUnknown = 16 // unknown or invalid permissions
+ };
+ enum SectionChoice {
+ sectionBasedOnContent, // linker infers final section based on content
+ sectionCustomPreferred, // linker may place in specific section
+ sectionCustomRequired // linker must place in specific section
+ };
+ enum DeadStripKind {
+ deadStripNormal, // linker may dead strip this atom
+ deadStripNever, // linker must never dead strip this atom
+ deadStripAlways // linker must remove this atom if unused
+ };
+ enum DynamicExport {
+ /// The linker may or may not export this atom dynamically depending
+ /// on the output type and other context of the link.
+ dynamicExportNormal,
+ /// The linker will always export this atom dynamically.
+ dynamicExportAlways,
+ };
+ // Attributes describe a code model used by the atom.
+ enum CodeModel {
+ codeNA, // no specific code model
+ // MIPS code models
+ codeMipsPIC, // PIC function in a PIC / non-PIC mixed file
+ codeMipsMicro, // microMIPS instruction encoding
+ codeMipsMicroPIC, // microMIPS instruction encoding + PIC
+ codeMips16, // MIPS-16 instruction encoding
+ // ARM code models
+ codeARMThumb, // ARM Thumb instruction set
+ codeARM_a, // $a-like mapping symbol (for ARM code)
+ codeARM_d, // $d-like mapping symbol (for data)
+ codeARM_t, // $t-like mapping symbol (for Thumb code)
+ };
+ struct Alignment {
+ Alignment(int v, int m = 0) : value(v), modulus(m) {}
+ uint16_t value;
+ uint16_t modulus;
+ bool operator==(const Alignment &rhs) const {
+ return (value == rhs.value) && (modulus == rhs.modulus);
+ }
+ };
+ /// returns a value for the order of this Atom within its file.
+ ///
+ /// This is used by the linker to order the layout of Atoms so that the
+ /// resulting image is stable and reproducible.
+ virtual uint64_t ordinal() const = 0;
+ /// the number of bytes of space this atom's content will occupy in the
+ /// final linked image.
+ ///
+ /// For a function atom, it is the number of bytes of code in the function.
+ virtual uint64_t size() const = 0;
+ /// The size of the section from which the atom is instantiated.
+ ///
+ /// Merge::mergeByLargestSection is defined in terms of section size
+ /// and not in terms of atom size, so we need this function separate
+ /// from size().
+ virtual uint64_t sectionSize() const { return 0; }
+ /// The visibility of this atom to other atoms.
+ ///
+ /// C static functions have scope scopeTranslationUnit. Regular C functions
+ /// have scope scopeGlobal. Functions compiled with visibility=hidden have
+ /// scope scopeLinkageUnit so they can be see by other atoms being linked but
+ /// not by the OS loader.
+ virtual Scope scope() const = 0;
+ /// Whether the linker should use direct or indirect access to this
+ /// atom.
+ virtual Interposable interposable() const = 0;
+ /// how the linker should handle if multiple atoms have the same name.
+ virtual Merge merge() const = 0;
+ /// The type of this atom, such as code or data.
+ virtual ContentType contentType() const = 0;
+ /// The alignment constraints on how this atom must be laid out in the
+ /// final linked image (e.g. 16-byte aligned).
+ virtual Alignment alignment() const = 0;
+ /// Whether this atom must be in a specially named section in the final
+ /// linked image, or if the linker can infer the section based on the
+ /// contentType().
+ virtual SectionChoice sectionChoice() const = 0;
+ /// If sectionChoice() != sectionBasedOnContent, then this return the
+ /// name of the section the atom should be placed into.
+ virtual StringRef customSectionName() const = 0;
+ /// constraints on whether the linker may dead strip away this atom.
+ virtual DeadStripKind deadStrip() const = 0;
+ /// Under which conditions should this atom be dynamically exported.
+ virtual DynamicExport dynamicExport() const {
+ return dynamicExportNormal;
+ }
+ /// Code model used by the atom.
+ virtual CodeModel codeModel() const { return codeNA; }
+ /// Returns the OS memory protections required for this atom's content
+ /// at runtime.
+ ///
+ /// A function atom is R_X, a global variable is RW_, and a read-only constant
+ /// is R__.
+ virtual ContentPermissions permissions() const;
+ /// returns a reference to the raw (unrelocated) bytes of this Atom's
+ /// content.
+ virtual ArrayRef<uint8_t> rawContent() const = 0;
+ /// This class abstracts iterating over the sequence of References
+ /// in an Atom. Concrete instances of DefinedAtom must implement
+ /// the derefIterator() and incrementIterator() methods.
+ class reference_iterator {
+ public:
+ reference_iterator(const DefinedAtom &a, const void *it)
+ : _atom(a), _it(it) { }
+ const Reference *operator*() const {
+ return _atom.derefIterator(_it);
+ }
+ const Reference *operator->() const {
+ return _atom.derefIterator(_it);
+ }
+ bool operator==(const reference_iterator &other) const {
+ return _it == other._it;
+ }
+ bool operator!=(const reference_iterator &other) const {
+ return !(*this == other);
+ }
+ reference_iterator &operator++() {
+ _atom.incrementIterator(_it);
+ return *this;
+ }
+ private:
+ const DefinedAtom &_atom;
+ const void *_it;
+ };
+ /// Returns an iterator to the beginning of this Atom's References.
+ virtual reference_iterator begin() const = 0;
+ /// Returns an iterator to the end of this Atom's References.
+ virtual reference_iterator end() const = 0;
+ /// Adds a reference to this atom.
+ virtual void addReference(Reference::KindNamespace ns,
+ Reference::KindArch arch,
+ Reference::KindValue kindValue, uint64_t off,
+ const Atom *target, Reference::Addend a) {
+ llvm_unreachable("Subclass does not permit adding references");
+ }
+ static bool classof(const Atom *a) {
+ return a->definition() == definitionRegular;
+ }
+ /// Utility for deriving permissions from content type
+ static ContentPermissions permissions(ContentType type);
+ /// Utility function to check if the atom occupies file space
+ bool occupiesDiskSpace() const {
+ ContentType atomContentType = contentType();
+ return !(atomContentType == DefinedAtom::typeZeroFill ||
+ atomContentType == DefinedAtom::typeZeroFillFast ||
+ atomContentType == DefinedAtom::typeTLVInitialZeroFill);
+ }
+ /// Utility function to check if relocations in this atom to other defined
+ /// atoms can be implicitly generated, and so we don't need to explicitly
+ /// emit those relocations.
+ bool relocsToDefinedCanBeImplicit() const {
+ ContentType atomContentType = contentType();
+ return atomContentType == typeCFI;
+ }
+ // DefinedAtom is an abstract base class. Only subclasses can access
+ // constructor.
+ DefinedAtom() : Atom(definitionRegular) { }
+ ~DefinedAtom() override = default;
+ /// Returns a pointer to the Reference object that the abstract
+ /// iterator "points" to.
+ virtual const Reference *derefIterator(const void *iter) const = 0;
+ /// Adjusts the abstract iterator to "point" to the next Reference
+ /// object for this Atom.
+ virtual void incrementIterator(const void *&iter) const = 0;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Error.h b/contrib/llvm/tools/lld/include/lld/Core/Error.h
new file mode 100644
index 000000000000..36a36724987a
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Error.h
@@ -0,0 +1,68 @@
+//===- Error.h - system_error extensions for lld ----------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This declares a new error_category for the lld library.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include <system_error>
+namespace lld {
+const std::error_category &YamlReaderCategory();
+enum class YamlReaderError {
+ unknown_keyword,
+ illegal_value
+inline std::error_code make_error_code(YamlReaderError e) {
+ return std::error_code(static_cast<int>(e), YamlReaderCategory());
+/// Creates an error_code object that has associated with it an arbitrary
+/// error messsage. The value() of the error_code will always be non-zero
+/// but its value is meaningless. The messsage() will be (a copy of) the
+/// supplied error string.
+/// Note: Once ErrorOr<> is updated to work with errors other than error_code,
+/// this can be updated to return some other kind of error.
+std::error_code make_dynamic_error_code(StringRef msg);
+/// Generic error.
+/// For errors that don't require their own specific sub-error (most errors)
+/// this class can be used to describe the error via a string message.
+class GenericError : public llvm::ErrorInfo<GenericError> {
+ static char ID;
+ GenericError(Twine Msg);
+ const std::string &getMessage() const { return Msg; }
+ void log(llvm::raw_ostream &OS) const override;
+ std::error_code convertToErrorCode() const override {
+ return make_dynamic_error_code(getMessage());
+ }
+ std::string Msg;
+} // end namespace lld
+namespace std {
+template <> struct is_error_code_enum<lld::YamlReaderError> : std::true_type {};
diff --git a/contrib/llvm/tools/lld/include/lld/Core/File.h b/contrib/llvm/tools/lld/include/lld/Core/File.h
new file mode 100644
index 000000000000..54f533576a4b
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/File.h
@@ -0,0 +1,276 @@
+//===- Core/File.h - A Container of Atoms ---------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_CORE_FILE_H
+#define LLD_CORE_FILE_H
+#include "lld/Core/AbsoluteAtom.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/SharedLibraryAtom.h"
+#include "lld/Core/UndefinedAtom.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <vector>
+namespace lld {
+class LinkingContext;
+/// Every Atom is owned by some File. A common scenario is for a single
+/// object file (.o) to be parsed by some reader and produce a single
+/// File object that represents the content of that object file.
+/// To iterate through the Atoms in a File there are four methods that
+/// return collections. For instance to iterate through all the DefinedAtoms
+/// in a File object use:
+/// for (const DefinedAtoms *atom : file->defined()) {
+/// }
+/// The Atom objects in a File are owned by the File object. The Atom objects
+/// are destroyed when the File object is destroyed.
+class File {
+ virtual ~File();
+ /// Kinds of files that are supported.
+ enum Kind {
+ kindErrorObject, ///< a error object file (.o)
+ kindNormalizedObject, ///< a normalized file (.o)
+ kindMachObject, ///< a MachO object file (.o)
+ kindCEntryObject, ///< a file for CEntries
+ kindHeaderObject, ///< a file for file headers
+ kindEntryObject, ///< a file for the entry
+ kindUndefinedSymsObject, ///< a file for undefined symbols
+ kindStubHelperObject, ///< a file for stub helpers
+ kindResolverMergedObject, ///< the resolver merged file.
+ kindSectCreateObject, ///< a sect create object file (.o)
+ kindSharedLibrary, ///< shared library (.so)
+ kindArchiveLibrary ///< archive (.a)
+ };
+ /// Returns file kind. Need for dyn_cast<> on File objects.
+ Kind kind() const {
+ return _kind;
+ }
+ /// This returns the path to the file which was used to create this object
+ /// (e.g. "/tmp/foo.o"). If the file is a member of an archive file, the
+ /// returned string includes the archive file name.
+ StringRef path() const {
+ if (_archivePath.empty())
+ return _path;
+ if (_archiveMemberPath.empty())
+ _archiveMemberPath = (_archivePath + "(" + _path + ")").str();
+ return _archiveMemberPath;
+ }
+ /// Returns the path of the archive file name if this file is instantiated
+ /// from an archive file. Otherwise returns the empty string.
+ StringRef archivePath() const { return _archivePath; }
+ void setArchivePath(StringRef path) { _archivePath = path; }
+ /// Returns the path name of this file. It doesn't include archive file name.
+ StringRef memberPath() const { return _path; }
+ /// Returns the command line order of the file.
+ uint64_t ordinal() const {
+ assert(_ordinal != UINT64_MAX);
+ return _ordinal;
+ }
+ /// Returns true/false depending on whether an ordinal has been set.
+ bool hasOrdinal() const { return (_ordinal != UINT64_MAX); }
+ /// Sets the command line order of the file.
+ void setOrdinal(uint64_t ordinal) const { _ordinal = ordinal; }
+ /// Returns the ordinal for the next atom to be defined in this file.
+ uint64_t getNextAtomOrdinalAndIncrement() const {
+ return _nextAtomOrdinal++;
+ }
+ /// For allocating any objects owned by this File.
+ llvm::BumpPtrAllocator &allocator() const {
+ return _allocator;
+ }
+ /// The type of atom mutable container.
+ template <typename T> using AtomVector = std::vector<OwningAtomPtr<T>>;
+ /// The range type for the atoms.
+ template <typename T> class AtomRange {
+ public:
+ AtomRange(AtomVector<T> &v) : _v(v) {}
+ AtomRange(const AtomVector<T> &v) : _v(const_cast<AtomVector<T> &>(v)) {}
+ using ConstDerefFn = const T* (*)(const OwningAtomPtr<T>&);
+ using DerefFn = T* (*)(OwningAtomPtr<T>&);
+ typedef llvm::mapped_iterator<typename AtomVector<T>::const_iterator,
+ ConstDerefFn> ConstItTy;
+ typedef llvm::mapped_iterator<typename AtomVector<T>::iterator,
+ DerefFn> ItTy;
+ static const T* DerefConst(const OwningAtomPtr<T> &p) {
+ return p.get();
+ }
+ static T* Deref(OwningAtomPtr<T> &p) {
+ return p.get();
+ }
+ ConstItTy begin() const {
+ return ConstItTy(_v.begin(), ConstDerefFn(DerefConst));
+ }
+ ConstItTy end() const {
+ return ConstItTy(_v.end(), ConstDerefFn(DerefConst));
+ }
+ ItTy begin() {
+ return ItTy(_v.begin(), DerefFn(Deref));
+ }
+ ItTy end() {
+ return ItTy(_v.end(), DerefFn(Deref));
+ }
+ llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() {
+ return llvm::make_range(_v.begin(), _v.end());
+ }
+ llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() const {
+ return llvm::make_range(_v.begin(), _v.end());
+ }
+ bool empty() const {
+ return _v.empty();
+ }
+ size_t size() const {
+ return _v.size();
+ }
+ const OwningAtomPtr<T> &operator[](size_t idx) const {
+ return _v[idx];
+ }
+ OwningAtomPtr<T> &operator[](size_t idx) {
+ return _v[idx];
+ }
+ private:
+ AtomVector<T> &_v;
+ };
+ /// Must be implemented to return the AtomVector object for
+ /// all DefinedAtoms in this File.
+ virtual const AtomRange<DefinedAtom> defined() const = 0;
+ /// Must be implemented to return the AtomVector object for
+ /// all UndefinedAtomw in this File.
+ virtual const AtomRange<UndefinedAtom> undefined() const = 0;
+ /// Must be implemented to return the AtomVector object for
+ /// all SharedLibraryAtoms in this File.
+ virtual const AtomRange<SharedLibraryAtom> sharedLibrary() const = 0;
+ /// Must be implemented to return the AtomVector object for
+ /// all AbsoluteAtoms in this File.
+ virtual const AtomRange<AbsoluteAtom> absolute() const = 0;
+ /// Drop all of the atoms owned by this file. This will result in all of
+ /// the atoms running their destructors.
+ /// This is required because atoms may be allocated on a BumpPtrAllocator
+ /// of a different file. We need to destruct all atoms before any files.
+ virtual void clearAtoms() = 0;
+ /// If a file is parsed using a different method than doParse(),
+ /// one must use this method to set the last error status, so that
+ /// doParse will not be called twice. Only YAML reader uses this
+ /// (because YAML reader does not read blobs but structured data).
+ void setLastError(std::error_code err) { _lastError = err; }
+ std::error_code parse();
+ // Usually each file owns a std::unique_ptr<MemoryBuffer>.
+ // However, there's one special case. If a file is an archive file,
+ // the archive file and its children all shares the same memory buffer.
+ // This method is used by the ArchiveFile to give its children
+ // co-ownership of the buffer.
+ void setSharedMemoryBuffer(std::shared_ptr<MemoryBuffer> mb) {
+ _sharedMemoryBuffer = mb;
+ }
+ /// only subclasses of File can be instantiated
+ File(StringRef p, Kind kind)
+ : _path(p), _kind(kind), _ordinal(UINT64_MAX),
+ _nextAtomOrdinal(0) {}
+ /// Subclasses should override this method to parse the
+ /// memory buffer passed to this file's constructor.
+ virtual std::error_code doParse() { return std::error_code(); }
+ static AtomVector<DefinedAtom> _noDefinedAtoms;
+ static AtomVector<UndefinedAtom> _noUndefinedAtoms;
+ static AtomVector<SharedLibraryAtom> _noSharedLibraryAtoms;
+ static AtomVector<AbsoluteAtom> _noAbsoluteAtoms;
+ mutable llvm::BumpPtrAllocator _allocator;
+ StringRef _path;
+ std::string _archivePath;
+ mutable std::string _archiveMemberPath;
+ Kind _kind;
+ mutable uint64_t _ordinal;
+ mutable uint64_t _nextAtomOrdinal;
+ std::shared_ptr<MemoryBuffer> _sharedMemoryBuffer;
+ llvm::Optional<std::error_code> _lastError;
+ std::mutex _parseMutex;
+/// An ErrorFile represents a file that doesn't exist.
+/// If you try to parse a file which doesn't exist, an instance of this
+/// class will be returned. That's parse method always returns an error.
+/// This is useful to delay erroring on non-existent files, so that we
+/// can do unit testing a driver using non-existing file paths.
+class ErrorFile : public File {
+ ErrorFile(StringRef path, std::error_code ec)
+ : File(path, kindErrorObject), _ec(ec) {}
+ std::error_code doParse() override { return _ec; }
+ const AtomRange<DefinedAtom> defined() const override {
+ llvm_unreachable("internal error");
+ }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ llvm_unreachable("internal error");
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ llvm_unreachable("internal error");
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ llvm_unreachable("internal error");
+ }
+ void clearAtoms() override {
+ }
+ std::error_code _ec;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Instrumentation.h b/contrib/llvm/tools/lld/include/lld/Core/Instrumentation.h
new file mode 100644
index 000000000000..939d64557587
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Instrumentation.h
@@ -0,0 +1,132 @@
+//===- include/Core/Instrumentation.h - Instrumentation API ---------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// Provide an Instrumentation API that optionally uses VTune interfaces.
+#include "llvm/Support/Compiler.h"
+#include <utility>
+# include <ittnotify.h>
+namespace lld {
+/// A unique global scope for instrumentation data.
+/// Domains last for the lifetime of the application and cannot be destroyed.
+/// Multiple Domains created with the same name represent the same domain.
+class Domain {
+ __itt_domain *_domain;
+ explicit Domain(const char *name) : _domain(__itt_domain_createA(name)) {}
+ operator __itt_domain *() const { return _domain; }
+ __itt_domain *operator->() const { return _domain; }
+/// A global reference to a string constant.
+/// These are uniqued by the ITT runtime and cannot be deleted. They are not
+/// specific to a domain.
+/// Prefer reusing a single StringHandle over passing a ntbs when the same
+/// string will be used often.
+class StringHandle {
+ __itt_string_handle *_handle;
+ StringHandle(const char *name) : _handle(__itt_string_handle_createA(name)) {}
+ operator __itt_string_handle *() const { return _handle; }
+/// A task on a single thread. Nests within other tasks.
+/// Each thread has its own task stack and tasks nest recursively on that stack.
+/// A task cannot transfer threads.
+/// SBRM is used to ensure task starts and ends are ballanced. The lifetime of
+/// a task is either the lifetime of this object, or until end is called.
+class ScopedTask {
+ __itt_domain *_domain;
+ ScopedTask(const ScopedTask &) = delete;
+ ScopedTask &operator=(const ScopedTask &) = delete;
+ /// Create a task in Domain \p d named \p s.
+ ScopedTask(const Domain &d, const StringHandle &s) : _domain(d) {
+ __itt_task_begin(d, __itt_null, __itt_null, s);
+ }
+ ScopedTask(ScopedTask &&other) {
+ *this = std::move(other);
+ }
+ ScopedTask &operator=(ScopedTask &&other) {
+ _domain = other._domain;
+ other._domain = nullptr;
+ return *this;
+ }
+ /// Prematurely end this task.
+ void end() {
+ if (_domain)
+ __itt_task_end(_domain);
+ _domain = nullptr;
+ }
+ ~ScopedTask() { end(); }
+/// A specific point in time. Allows metadata to be associated.
+class Marker {
+ Marker(const Domain &d, const StringHandle &s) {
+ __itt_marker(d, __itt_null, s, __itt_scope_global);
+ }
+class Domain {
+ Domain(const char *name) {}
+class StringHandle {
+ StringHandle(const char *name) {}
+class ScopedTask {
+ ScopedTask(const Domain &d, const StringHandle &s) {}
+ void end() {}
+class Marker {
+ Marker(const Domain &d, const StringHandle &s) {}
+inline const Domain &getDefaultDomain() {
+ static Domain domain("org.llvm.lld");
+ return domain;
+} // end namespace lld.
diff --git a/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h b/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h
new file mode 100644
index 000000000000..52ab1a2480e8
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/LinkingContext.h
@@ -0,0 +1,257 @@
+//===- lld/Core/LinkingContext.h - Linker Target Info Interface -*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Node.h"
+#include "lld/Core/Reader.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+namespace lld {
+class PassManager;
+class File;
+class Writer;
+class Node;
+class SharedLibraryFile;
+/// The LinkingContext class encapsulates "what and how" to link.
+/// The base class LinkingContext contains the options needed by core linking.
+/// Subclasses of LinkingContext have additional options needed by specific
+/// Writers.
+class LinkingContext {
+ virtual ~LinkingContext();
+ /// \name Methods needed by core linking
+ /// @{
+ /// Name of symbol linker should use as "entry point" to program,
+ /// usually "main" or "start".
+ virtual StringRef entrySymbolName() const { return _entrySymbolName; }
+ /// Whether core linking should remove Atoms not reachable by following
+ /// References from the entry point Atom or from all global scope Atoms
+ /// if globalsAreDeadStripRoots() is true.
+ bool deadStrip() const { return _deadStrip; }
+ /// Only used if deadStrip() returns true. Means all global scope Atoms
+ /// should be marked live (along with all Atoms they reference). Usually
+ /// this method returns false for main executables, but true for dynamic
+ /// shared libraries.
+ bool globalsAreDeadStripRoots() const { return _globalsAreDeadStripRoots; }
+ /// Only used if deadStrip() returns true. This method returns the names
+ /// of DefinedAtoms that should be marked live (along with all Atoms they
+ /// reference). Only Atoms with scope scopeLinkageUnit or scopeGlobal can
+ /// be kept live using this method.
+ ArrayRef<StringRef> deadStripRoots() const {
+ return _deadStripRoots;
+ }
+ /// Add the given symbol name to the dead strip root set. Only used if
+ /// deadStrip() returns true.
+ void addDeadStripRoot(StringRef symbolName) {
+ assert(!symbolName.empty() && "Empty symbol cannot be a dead strip root");
+ _deadStripRoots.push_back(symbolName);
+ }
+ /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
+ /// SharedLibraryAtom for the link to be successful. This method controls
+ /// whether core linking prints out a list of remaining UndefinedAtoms.
+ ///
+ /// \todo This should be a method core linking calls with a list of the
+ /// UndefinedAtoms so that different drivers can format the error message
+ /// as needed.
+ bool printRemainingUndefines() const { return _printRemainingUndefines; }
+ /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
+ /// SharedLibraryAtom for the link to be successful. This method controls
+ /// whether core linking considers remaining undefines to be an error.
+ bool allowRemainingUndefines() const { return _allowRemainingUndefines; }
+ /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
+ /// SharedLibraryAtom for the link to be successful. This method controls
+ /// whether core linking considers remaining undefines from the shared library
+ /// to be an error.
+ bool allowShlibUndefines() const { return _allowShlibUndefines; }
+ /// If true, core linking will write the path to each input file to stdout
+ /// (i.e. llvm::outs()) as it is used. This is used to implement the -t
+ /// linker option.
+ ///
+ /// \todo This should be a method core linking calls so that drivers can
+ /// format the line as needed.
+ bool logInputFiles() const { return _logInputFiles; }
+ /// Parts of LLVM use global variables which are bound to command line
+ /// options (see llvm::cl::Options). This method returns "command line"
+ /// options which are used to configure LLVM's command line settings.
+ /// For instance the -debug-only XXX option can be used to dynamically
+ /// trace different parts of LLVM and lld.
+ ArrayRef<const char *> llvmOptions() const { return _llvmOptions; }
+ /// \name Methods used by Drivers to configure TargetInfo
+ /// @{
+ void setOutputPath(StringRef str) { _outputPath = str; }
+ // Set the entry symbol name. You may also need to call addDeadStripRoot() for
+ // the symbol if your platform supports dead-stripping, so that the symbol
+ // will not be removed from the output.
+ void setEntrySymbolName(StringRef name) {
+ _entrySymbolName = name;
+ }
+ void setDeadStripping(bool enable) { _deadStrip = enable; }
+ void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; }
+ void setPrintRemainingUndefines(bool print) {
+ _printRemainingUndefines = print;
+ }
+ void setAllowRemainingUndefines(bool allow) {
+ _allowRemainingUndefines = allow;
+ }
+ void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; }
+ void setLogInputFiles(bool log) { _logInputFiles = log; }
+ void appendLLVMOption(const char *opt) { _llvmOptions.push_back(opt); }
+ std::vector<std::unique_ptr<Node>> &getNodes() { return _nodes; }
+ const std::vector<std::unique_ptr<Node>> &getNodes() const { return _nodes; }
+ /// This method adds undefined symbols specified by the -u option to the to
+ /// the list of undefined symbols known to the linker. This option essentially
+ /// forces an undefined symbol to be created. You may also need to call
+ /// addDeadStripRoot() for the symbol if your platform supports dead
+ /// stripping, so that the symbol will not be removed from the output.
+ void addInitialUndefinedSymbol(StringRef symbolName) {
+ _initialUndefinedSymbols.push_back(symbolName);
+ }
+ /// Iterators for symbols that appear on the command line.
+ typedef std::vector<StringRef> StringRefVector;
+ typedef StringRefVector::iterator StringRefVectorIter;
+ typedef StringRefVector::const_iterator StringRefVectorConstIter;
+ /// Create linker internal files containing atoms for the linker to include
+ /// during link. Flavors can override this function in their LinkingContext
+ /// to add more internal files. These internal files are positioned before
+ /// the actual input files.
+ virtual void createInternalFiles(std::vector<std::unique_ptr<File>> &) const;
+ /// Return the list of undefined symbols that are specified in the
+ /// linker command line, using the -u option.
+ ArrayRef<StringRef> initialUndefinedSymbols() const {
+ return _initialUndefinedSymbols;
+ }
+ /// After all set* methods are called, the Driver calls this method
+ /// to validate that there are no missing options or invalid combinations
+ /// of options. If there is a problem, a description of the problem
+ /// is written to the global error handler.
+ ///
+ /// \returns true if there is an error with the current settings.
+ bool validate();
+ /// Formats symbol name for use in error messages.
+ virtual std::string demangle(StringRef symbolName) const = 0;
+ /// @}
+ /// \name Methods used by Driver::link()
+ /// @{
+ /// Returns the file system path to which the linked output should be written.
+ ///
+ /// \todo To support in-memory linking, we need an abstraction that allows
+ /// the linker to write to an in-memory buffer.
+ StringRef outputPath() const { return _outputPath; }
+ /// Accessor for Register object embedded in LinkingContext.
+ const Registry &registry() const { return _registry; }
+ Registry &registry() { return _registry; }
+ /// This method is called by core linking to give the Writer a chance
+ /// to add file format specific "files" to set of files to be linked. This is
+ /// how file format specific atoms can be added to the link.
+ virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) = 0;
+ /// This method is called by core linking to build the list of Passes to be
+ /// run on the merged/linked graph of all input files.
+ virtual void addPasses(PassManager &pm) = 0;
+ /// Calls through to the writeFile() method on the specified Writer.
+ ///
+ /// \param linkedFile This is the merged/linked graph of all input file Atoms.
+ virtual llvm::Error writeFile(const File &linkedFile) const;
+ /// Return the next ordinal and Increment it.
+ virtual uint64_t getNextOrdinalAndIncrement() const { return _nextOrdinal++; }
+ // This function is called just before the Resolver kicks in.
+ // Derived classes may use it to change the list of input files.
+ virtual void finalizeInputFiles() = 0;
+ /// Callback invoked for each file the Resolver decides we are going to load.
+ /// This can be used to update context state based on the file, and emit
+ /// errors for any differences between the context state and a loaded file.
+ /// For example, we can error if we try to load a file which is a different
+ /// arch from that being linked.
+ virtual llvm::Error handleLoadedFile(File &file) = 0;
+ /// @}
+ LinkingContext(); // Must be subclassed
+ /// Abstract method to lazily instantiate the Writer.
+ virtual Writer &writer() const = 0;
+ /// Method to create an internal file for the entry symbol
+ virtual std::unique_ptr<File> createEntrySymbolFile() const;
+ std::unique_ptr<File> createEntrySymbolFile(StringRef filename) const;
+ /// Method to create an internal file for an undefined symbol
+ virtual std::unique_ptr<File> createUndefinedSymbolFile() const;
+ std::unique_ptr<File> createUndefinedSymbolFile(StringRef filename) const;
+ StringRef _outputPath;
+ StringRef _entrySymbolName;
+ bool _deadStrip = false;
+ bool _globalsAreDeadStripRoots = false;
+ bool _printRemainingUndefines = true;
+ bool _allowRemainingUndefines = false;
+ bool _logInputFiles = false;
+ bool _allowShlibUndefines = false;
+ std::vector<StringRef> _deadStripRoots;
+ std::vector<const char *> _llvmOptions;
+ StringRefVector _initialUndefinedSymbols;
+ std::vector<std::unique_ptr<Node>> _nodes;
+ mutable llvm::BumpPtrAllocator _allocator;
+ mutable uint64_t _nextOrdinal = 0;
+ Registry _registry;
+ /// Validate the subclass bits. Only called by validate.
+ virtual bool validateImpl() = 0;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Node.h b/contrib/llvm/tools/lld/include/lld/Core/Node.h
new file mode 100644
index 000000000000..c30482409e7a
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Node.h
@@ -0,0 +1,75 @@
+//===- lld/Core/Node.h - Input file class -----------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// The classes in this file represents inputs to the linker.
+#ifndef LLD_CORE_NODE_H
+#define LLD_CORE_NODE_H
+#include "lld/Core/File.h"
+#include <algorithm>
+#include <memory>
+namespace lld {
+// A Node represents a FileNode or other type of Node. In the latter case,
+// the node contains meta information about the input file list.
+// Currently only GroupEnd node is defined as a meta node.
+class Node {
+ enum class Kind { File, GroupEnd };
+ explicit Node(Kind type) : _kind(type) {}
+ virtual ~Node() = default;
+ virtual Kind kind() const { return _kind; }
+ Kind _kind;
+// This is a marker for --end-group. getSize() returns the number of
+// files between the corresponding --start-group and this marker.
+class GroupEnd : public Node {
+ explicit GroupEnd(int size) : Node(Kind::GroupEnd), _size(size) {}
+ int getSize() const { return _size; }
+ static bool classof(const Node *a) {
+ return a->kind() == Kind::GroupEnd;
+ }
+ int _size;
+// A container of File.
+class FileNode : public Node {
+ explicit FileNode(std::unique_ptr<File> f)
+ : Node(Node::Kind::File), _file(std::move(f)) {}
+ static bool classof(const Node *a) {
+ return a->kind() == Node::Kind::File;
+ }
+ File *getFile() { return _file.get(); }
+ std::unique_ptr<File> _file;
+} // end namespace lld
+#endif // LLD_CORE_NODE_H
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Pass.h b/contrib/llvm/tools/lld/include/lld/Core/Pass.h
new file mode 100644
index 000000000000..bfe3f9b10e0c
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Pass.h
@@ -0,0 +1,43 @@
+//===------ Core/Pass.h - Base class for linker passes ----------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#ifndef LLD_CORE_PASS_H
+#define LLD_CORE_PASS_H
+#include "llvm/Support/Error.h"
+namespace lld {
+class SimpleFile;
+/// Once the core linking is done (which resolves references, coalesces atoms
+/// and produces a complete Atom graph), the linker runs a series of passes
+/// on the Atom graph. The graph is modeled as a File, which means the pass
+/// has access to all the atoms and to File level attributes. Each pass does
+/// a particular transformation to the Atom graph or to the File attributes.
+/// This is the abstract base class for all passes. A Pass does its
+/// actual work in it perform() method. It can iterator over Atoms in the
+/// graph using the *begin()/*end() atom iterator of the File. It can add
+/// new Atoms to the graph using the File's addAtom() method.
+class Pass {
+ virtual ~Pass() = default;
+ /// Do the actual work of the Pass.
+ virtual llvm::Error perform(SimpleFile &mergedFile) = 0;
+ // Only subclassess can be instantiated.
+ Pass() = default;
+} // end namespace lld
+#endif // LLD_CORE_PASS_H
diff --git a/contrib/llvm/tools/lld/include/lld/Core/PassManager.h b/contrib/llvm/tools/lld/include/lld/Core/PassManager.h
new file mode 100644
index 000000000000..f2ef10f406f2
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/PassManager.h
@@ -0,0 +1,48 @@
+//===- lld/Core/PassManager.h - Manage linker passes ----------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Pass.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+#include <vector>
+namespace lld {
+class SimpleFile;
+class Pass;
+/// Owns and runs a collection of passes.
+/// This class is currently just a container for passes and a way to run them.
+/// In the future this should handle timing pass runs, running parallel passes,
+/// and validate/satisfy pass dependencies.
+class PassManager {
+ void add(std::unique_ptr<Pass> pass) {
+ _passes.push_back(std::move(pass));
+ }
+ llvm::Error runOnFile(SimpleFile &file) {
+ for (std::unique_ptr<Pass> &pass : _passes)
+ if (llvm::Error EC = pass->perform(file))
+ return EC;
+ return llvm::Error::success();
+ }
+ /// Passes in the order they should run.
+ std::vector<std::unique_ptr<Pass>> _passes;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reader.h b/contrib/llvm/tools/lld/include/lld/Core/Reader.h
new file mode 100644
index 000000000000..6cf6282ff39c
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Reader.h
@@ -0,0 +1,155 @@
+//===- lld/Core/Reader.h - Abstract File Format Reading Interface ---------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Reference.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+#include <vector>
+namespace llvm {
+namespace yaml {
+class IO;
+} // end namespace yaml
+} // end namespace llvm
+namespace lld {
+class File;
+class LinkingContext;
+class MachOLinkingContext;
+/// An abstract class for reading object files, library files, and
+/// executable files.
+/// Each file format (e.g. mach-o, etc) has a concrete subclass of Reader.
+class Reader {
+ virtual ~Reader() = default;
+ /// Sniffs the file to determine if this Reader can parse it.
+ /// The method is called with:
+ /// 1) the file_magic enumeration returned by identify_magic()
+ /// 2) the whole file content buffer if the above is not enough.
+ virtual bool canParse(llvm::file_magic magic, MemoryBufferRef mb) const = 0;
+ /// Parse a supplied buffer (already filled with the contents of a
+ /// file) and create a File object.
+ /// The resulting File object takes ownership of the MemoryBuffer.
+ virtual ErrorOr<std::unique_ptr<File>>
+ loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &) const = 0;
+/// An abstract class for handling alternate yaml representations
+/// of object files.
+/// The YAML syntax allows "tags" which are used to specify the type of
+/// the YAML node. In lld, top level YAML documents can be in many YAML
+/// representations (e.g mach-o encoded as yaml, etc). A tag is used to
+/// specify which representation is used in the following YAML document.
+/// To work, there must be a YamlIOTaggedDocumentHandler registered that
+/// handles each tag type.
+class YamlIOTaggedDocumentHandler {
+ virtual ~YamlIOTaggedDocumentHandler();
+ /// This method is called on each registered YamlIOTaggedDocumentHandler
+ /// until one returns true. If the subclass handles tag type !xyz, then
+ /// this method should call io.mapTag("!xzy") to see if that is the current
+ /// document type, and if so, process the rest of the document using
+ /// YAML I/O, then convert the result into an lld::File* and return it.
+ virtual bool handledDocTag(llvm::yaml::IO &io, const lld::File *&f) const = 0;
+/// A registry to hold the list of currently registered Readers and
+/// tables which map Reference kind values to strings.
+/// The linker does not directly invoke Readers. Instead, it registers
+/// Readers based on it configuration and command line options, then calls
+/// the Registry object to parse files.
+class Registry {
+ Registry();
+ /// Walk the list of registered Readers and find one that can parse the
+ /// supplied file and parse it.
+ ErrorOr<std::unique_ptr<File>>
+ loadFile(std::unique_ptr<MemoryBuffer> mb) const;
+ /// Walk the list of registered kind tables to convert a Reference Kind
+ /// name to a value.
+ bool referenceKindFromString(StringRef inputStr, Reference::KindNamespace &ns,
+ Reference::KindArch &a,
+ Reference::KindValue &value) const;
+ /// Walk the list of registered kind tables to convert a Reference Kind
+ /// value to a string.
+ bool referenceKindToString(Reference::KindNamespace ns, Reference::KindArch a,
+ Reference::KindValue value, StringRef &) const;
+ /// Walk the list of registered tag handlers and have the one that handles
+ /// the current document type process the yaml into an lld::File*.
+ bool handleTaggedDoc(llvm::yaml::IO &io, const lld::File *&file) const;
+ // These methods are called to dynamically add support for various file
+ // formats. The methods are also implemented in the appropriate lib*.a
+ // library, so that the code for handling a format is only linked in, if this
+ // method is used. Any options that a Reader might need must be passed
+ // as parameters to the addSupport*() method.
+ void addSupportArchives(bool logLoading);
+ void addSupportYamlFiles();
+ void addSupportMachOObjects(MachOLinkingContext &);
+ /// To convert between kind values and names, the registry walks the list
+ /// of registered kind tables. Each table is a zero terminated array of
+ /// KindStrings elements.
+ struct KindStrings {
+ Reference::KindValue value;
+ StringRef name;
+ };
+ /// A Reference Kind value is a tuple of <namespace, arch, value>. All
+ /// entries in a conversion table have the same <namespace, arch>. The
+ /// array then contains the value/name pairs.
+ void addKindTable(Reference::KindNamespace ns, Reference::KindArch arch,
+ const KindStrings array[]);
+ struct KindEntry {
+ Reference::KindNamespace ns;
+ Reference::KindArch arch;
+ const KindStrings *array;
+ };
+ void add(std::unique_ptr<Reader>);
+ void add(std::unique_ptr<YamlIOTaggedDocumentHandler>);
+ std::vector<std::unique_ptr<Reader>> _readers;
+ std::vector<std::unique_ptr<YamlIOTaggedDocumentHandler>> _yamlHandlers;
+ std::vector<KindEntry> _kindEntries;
+// Utilities for building a KindString table. For instance:
+// static const Registry::KindStrings table[] = {
+// };
+#define LLD_KIND_STRING_ENTRY(name) { name, #name }
+#define LLD_KIND_STRING_END { 0, "" }
+} // end namespace lld
+#endif // LLD_CORE_READER_H
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Reference.h b/contrib/llvm/tools/lld/include/lld/Core/Reference.h
new file mode 100644
index 000000000000..1d3003c84616
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Reference.h
@@ -0,0 +1,119 @@
+//===- Core/References.h - A Reference to Another Atom ----------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include <cstdint>
+namespace lld {
+class Atom;
+/// The linker has a Graph Theory model of linking. An object file is seen
+/// as a set of Atoms with References to other Atoms. Each Atom is a node
+/// and each Reference is an edge.
+/// For example if a function contains a call site to "malloc" 40 bytes into
+/// the Atom, then the function Atom will have a Reference of: offsetInAtom=40,
+/// kind=callsite, target=malloc, addend=0.
+/// Besides supporting traditional "relocations", references are also used
+/// forcing layout (one atom must follow another), marking data-in-code
+/// (jump tables or ARM constants), etc.
+/// The "kind" of a reference is a tuple of <namespace, arch, value>. This
+/// enable us to re-use existing relocation types definded for various
+/// file formats and architectures.
+/// References and atoms form a directed graph. The dead-stripping pass
+/// traverses them starting from dead-strip root atoms to garbage collect
+/// unreachable ones.
+/// References of any kind are considered as directed edges. In addition to
+/// that, references of some kind is considered as bidirected edges.
+class Reference {
+ /// Which universe defines the kindValue().
+ enum class KindNamespace {
+ all = 0,
+ testing = 1,
+ mach_o = 2,
+ };
+ KindNamespace kindNamespace() const { return (KindNamespace)_kindNamespace; }
+ void setKindNamespace(KindNamespace ns) { _kindNamespace = (uint8_t)ns; }
+ // Which architecture the kind value is for.
+ enum class KindArch { all, AArch64, ARM, x86, x86_64};
+ KindArch kindArch() const { return (KindArch)_kindArch; }
+ void setKindArch(KindArch a) { _kindArch = (uint8_t)a; }
+ typedef uint16_t KindValue;
+ KindValue kindValue() const { return _kindValue; }
+ /// setKindValue() is needed because during linking, some optimizations may
+ /// change the codegen and hence the reference kind.
+ void setKindValue(KindValue value) {
+ _kindValue = value;
+ }
+ /// KindValues used with KindNamespace::all and KindArch::all.
+ enum {
+ // kindLayoutAfter is treated as a bidirected edge by the dead-stripping
+ // pass.
+ kindLayoutAfter = 1,
+ kindAssociate,
+ };
+ // A value to be added to the value of a target
+ typedef int64_t Addend;
+ /// If the reference is a fixup in the Atom, then this returns the
+ /// byte offset into the Atom's content to do the fix up.
+ virtual uint64_t offsetInAtom() const = 0;
+ /// Returns the atom this reference refers to.
+ virtual const Atom *target() const = 0;
+ /// During linking, the linker may merge graphs which coalesces some nodes
+ /// (i.e. Atoms). To switch the target of a reference, this method is called.
+ virtual void setTarget(const Atom *) = 0;
+ /// Some relocations require a symbol and a value (e.g. foo + 4).
+ virtual Addend addend() const = 0;
+ /// During linking, some optimzations may change addend value.
+ virtual void setAddend(Addend) = 0;
+ /// Returns target specific attributes of the reference.
+ virtual uint32_t tag() const { return 0; }
+ /// Reference is an abstract base class. Only subclasses can use constructor.
+ Reference(KindNamespace ns, KindArch a, KindValue value)
+ : _kindValue(value), _kindNamespace((uint8_t)ns), _kindArch((uint8_t)a) {}
+ /// The memory for Reference objects is always managed by the owning File
+ /// object. Therefore, no one but the owning File object should call
+ /// delete on an Reference. In fact, some File objects may bulk allocate
+ /// an array of References, so they cannot be individually deleted by anyone.
+ virtual ~Reference() = default;
+ KindValue _kindValue;
+ uint8_t _kindNamespace;
+ uint8_t _kindArch;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Resolver.h b/contrib/llvm/tools/lld/include/lld/Core/Resolver.h
new file mode 100644
index 000000000000..5157c9fddc1a
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Resolver.h
@@ -0,0 +1,106 @@
+//===- Core/Resolver.h - Resolves Atom References -------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/ArchiveLibraryFile.h"
+#include "lld/Core/File.h"
+#include "lld/Core/SharedLibraryFile.h"
+#include "lld/Core/Simple.h"
+#include "lld/Core/SymbolTable.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/ErrorOr.h"
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+namespace lld {
+class Atom;
+class LinkingContext;
+/// The Resolver is responsible for merging all input object files
+/// and producing a merged graph.
+class Resolver {
+ Resolver(LinkingContext &ctx) : _ctx(ctx), _result(new MergedFile()) {}
+ // InputFiles::Handler methods
+ void doDefinedAtom(OwningAtomPtr<DefinedAtom> atom);
+ bool doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom);
+ void doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom);
+ void doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom);
+ // Handle files, this adds atoms from the current file thats
+ // being processed by the resolver
+ llvm::Expected<bool> handleFile(File &);
+ // Handle an archive library file.
+ llvm::Expected<bool> handleArchiveFile(File &);
+ // Handle a shared library file.
+ llvm::Error handleSharedLibrary(File &);
+ /// do work of merging and resolving and return list
+ bool resolve();
+ std::unique_ptr<SimpleFile> resultFile() { return std::move(_result); }
+ typedef std::function<llvm::Expected<bool>(StringRef)> UndefCallback;
+ bool undefinesAdded(int begin, int end);
+ File *getFile(int &index);
+ /// The main function that iterates over the files to resolve
+ bool resolveUndefines();
+ void updateReferences();
+ void deadStripOptimize();
+ bool checkUndefines();
+ void removeCoalescedAwayAtoms();
+ llvm::Expected<bool> forEachUndefines(File &file, UndefCallback callback);
+ void markLive(const Atom *atom);
+ class MergedFile : public SimpleFile {
+ public:
+ MergedFile() : SimpleFile("<linker-internal>", kindResolverMergedObject) {}
+ void addAtoms(llvm::MutableArrayRef<OwningAtomPtr<Atom>> atoms);
+ };
+ LinkingContext &_ctx;
+ SymbolTable _symbolTable;
+ std::vector<OwningAtomPtr<Atom>> _atoms;
+ std::set<const Atom *> _deadStripRoots;
+ llvm::DenseSet<const Atom *> _liveAtoms;
+ llvm::DenseSet<const Atom *> _deadAtoms;
+ std::unique_ptr<MergedFile> _result;
+ std::unordered_multimap<const Atom *, const Atom *> _reverseRef;
+ // --start-group and --end-group
+ std::vector<File *> _files;
+ std::map<File *, bool> _newUndefinesAdded;
+ // List of undefined symbols.
+ std::vector<StringRef> _undefines;
+ // Start position in _undefines for each archive/shared library file.
+ // Symbols from index 0 to the start position are already searched before.
+ // Searching them again would never succeed. When we look for undefined
+ // symbols from an archive/shared library file, start from its start
+ // position to save time.
+ std::map<File *, size_t> _undefineIndex;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryAtom.h b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryAtom.h
new file mode 100644
index 000000000000..7fec7a3e3d29
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryAtom.h
@@ -0,0 +1,53 @@
+//===- Core/SharedLibraryAtom.h - A Shared Library Atom -------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Atom.h"
+namespace lld {
+/// A SharedLibraryAtom has no content.
+/// It exists to represent a symbol which will be bound at runtime.
+class SharedLibraryAtom : public Atom {
+ enum class Type : uint32_t {
+ Unknown,
+ Code,
+ Data,
+ };
+ /// Returns shared library name used to load it at runtime.
+ /// On Darwin it is the LC_DYLIB_LOAD dylib name.
+ virtual StringRef loadName() const = 0;
+ /// Returns if shared library symbol can be missing at runtime and if
+ /// so the loader should silently resolve address of symbol to be nullptr.
+ virtual bool canBeNullAtRuntime() const = 0;
+ virtual Type type() const = 0;
+ virtual uint64_t size() const = 0;
+ static bool classof(const Atom *a) {
+ return a->definition() == definitionSharedLibrary;
+ }
+ static inline bool classof(const SharedLibraryAtom *) { return true; }
+ SharedLibraryAtom() : Atom(definitionSharedLibrary) {}
+ ~SharedLibraryAtom() override = default;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryFile.h b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryFile.h
new file mode 100644
index 000000000000..53bf967b0236
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/SharedLibraryFile.h
@@ -0,0 +1,70 @@
+//===- Core/SharedLibraryFile.h - Models shared libraries as Atoms --------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/File.h"
+namespace lld {
+/// The SharedLibraryFile subclass of File is used to represent dynamic
+/// shared libraries being linked against.
+class SharedLibraryFile : public File {
+ static bool classof(const File *f) {
+ return f->kind() == kindSharedLibrary;
+ }
+ /// Check if the shared library exports a symbol with the specified name.
+ /// If so, return a SharedLibraryAtom which represents that exported
+ /// symbol. Otherwise return nullptr.
+ virtual OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const = 0;
+ // Returns the install name.
+ virtual StringRef getDSOName() const = 0;
+ const AtomRange<DefinedAtom> defined() const override {
+ return _definedAtoms;
+ }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ return _undefinedAtoms;
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ return _sharedLibraryAtoms;
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ return _absoluteAtoms;
+ }
+ void clearAtoms() override {
+ _definedAtoms.clear();
+ _undefinedAtoms.clear();
+ _sharedLibraryAtoms.clear();
+ _absoluteAtoms.clear();
+ }
+ /// only subclasses of SharedLibraryFile can be instantiated
+ explicit SharedLibraryFile(StringRef path) : File(path, kindSharedLibrary) {}
+ AtomVector<DefinedAtom> _definedAtoms;
+ AtomVector<UndefinedAtom> _undefinedAtoms;
+ AtomVector<SharedLibraryAtom> _sharedLibraryAtoms;
+ AtomVector<AbsoluteAtom> _absoluteAtoms;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Simple.h b/contrib/llvm/tools/lld/include/lld/Core/Simple.h
new file mode 100644
index 000000000000..feeed6ae473b
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Simple.h
@@ -0,0 +1,271 @@
+//===- lld/Core/Simple.h - Simple implementations of Atom and File --------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// Provide simple implementations for Atoms and File.
+#include "lld/Core/AbsoluteAtom.h"
+#include "lld/Core/Atom.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/SharedLibraryAtom.h"
+#include "lld/Core/UndefinedAtom.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+namespace lld {
+class SimpleFile : public File {
+ SimpleFile(StringRef path, File::Kind kind)
+ : File(path, kind) {}
+ ~SimpleFile() override {
+ _defined.clear();
+ _undefined.clear();
+ _shared.clear();
+ _absolute.clear();
+ }
+ void addAtom(DefinedAtom &a) {
+ _defined.push_back(OwningAtomPtr<DefinedAtom>(&a));
+ }
+ void addAtom(UndefinedAtom &a) {
+ _undefined.push_back(OwningAtomPtr<UndefinedAtom>(&a));
+ }
+ void addAtom(SharedLibraryAtom &a) {
+ _shared.push_back(OwningAtomPtr<SharedLibraryAtom>(&a));
+ }
+ void addAtom(AbsoluteAtom &a) {
+ _absolute.push_back(OwningAtomPtr<AbsoluteAtom>(&a));
+ }
+ void addAtom(const Atom &atom) {
+ if (auto *p = dyn_cast<DefinedAtom>(&atom)) {
+ addAtom(const_cast<DefinedAtom &>(*p));
+ } else if (auto *p = dyn_cast<UndefinedAtom>(&atom)) {
+ addAtom(const_cast<UndefinedAtom &>(*p));
+ } else if (auto *p = dyn_cast<SharedLibraryAtom>(&atom)) {
+ addAtom(const_cast<SharedLibraryAtom &>(*p));
+ } else if (auto *p = dyn_cast<AbsoluteAtom>(&atom)) {
+ addAtom(const_cast<AbsoluteAtom &>(*p));
+ } else {
+ llvm_unreachable("atom has unknown definition kind");
+ }
+ }
+ void removeDefinedAtomsIf(std::function<bool(const DefinedAtom *)> pred) {
+ auto &atoms = _defined;
+ auto newEnd = std::remove_if(atoms.begin(), atoms.end(),
+ [&pred](OwningAtomPtr<DefinedAtom> &p) {
+ return pred(p.get());
+ });
+ atoms.erase(newEnd, atoms.end());
+ }
+ const AtomRange<DefinedAtom> defined() const override { return _defined; }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ return _undefined;
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ return _shared;
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ return _absolute;
+ }
+ void clearAtoms() override {
+ _defined.clear();
+ _undefined.clear();
+ _shared.clear();
+ _absolute.clear();
+ }
+ AtomVector<DefinedAtom> _defined;
+ AtomVector<UndefinedAtom> _undefined;
+ AtomVector<SharedLibraryAtom> _shared;
+ AtomVector<AbsoluteAtom> _absolute;
+class SimpleReference : public Reference,
+ public llvm::ilist_node<SimpleReference> {
+ SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch,
+ Reference::KindValue value, uint64_t off, const Atom *t,
+ Reference::Addend a)
+ : Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a) {
+ }
+ SimpleReference()
+ : Reference(Reference::KindNamespace::all, Reference::KindArch::all, 0),
+ _target(nullptr), _offsetInAtom(0), _addend(0) {}
+ uint64_t offsetInAtom() const override { return _offsetInAtom; }
+ const Atom *target() const override {
+ assert(_target);
+ return _target;
+ }
+ Addend addend() const override { return _addend; }
+ void setAddend(Addend a) override { _addend = a; }
+ void setTarget(const Atom *newAtom) override { _target = newAtom; }
+ const Atom *_target;
+ uint64_t _offsetInAtom;
+ Addend _addend;
+class SimpleDefinedAtom : public DefinedAtom {
+ explicit SimpleDefinedAtom(const File &f)
+ : _file(f), _ordinal(f.getNextAtomOrdinalAndIncrement()) {}
+ ~SimpleDefinedAtom() override {
+ _references.clearAndLeakNodesUnsafely();
+ }
+ const File &file() const override { return _file; }
+ StringRef name() const override { return StringRef(); }
+ uint64_t ordinal() const override { return _ordinal; }
+ Scope scope() const override { return DefinedAtom::scopeLinkageUnit; }
+ Interposable interposable() const override {
+ return DefinedAtom::interposeNo;
+ }
+ Merge merge() const override { return DefinedAtom::mergeNo; }
+ Alignment alignment() const override { return 1; }
+ SectionChoice sectionChoice() const override {
+ return DefinedAtom::sectionBasedOnContent;
+ }
+ StringRef customSectionName() const override { return StringRef(); }
+ DeadStripKind deadStrip() const override {
+ return DefinedAtom::deadStripNormal;
+ }
+ DefinedAtom::reference_iterator begin() const override {
+ const void *it =
+ reinterpret_cast<const void *>(_references.begin().getNodePtr());
+ return reference_iterator(*this, it);
+ }
+ DefinedAtom::reference_iterator end() const override {
+ const void *it =
+ reinterpret_cast<const void *>(_references.end().getNodePtr());
+ return reference_iterator(*this, it);
+ }
+ const Reference *derefIterator(const void *it) const override {
+ return &*RefList::const_iterator(
+ *reinterpret_cast<const llvm::ilist_node<SimpleReference> *>(it));
+ }
+ void incrementIterator(const void *&it) const override {
+ RefList::const_iterator ref(
+ *reinterpret_cast<const llvm::ilist_node<SimpleReference> *>(it));
+ it = reinterpret_cast<const void *>(std::next(ref).getNodePtr());
+ }
+ void addReference(Reference::KindNamespace ns,
+ Reference::KindArch arch,
+ Reference::KindValue kindValue, uint64_t off,
+ const Atom *target, Reference::Addend a) override {
+ assert(target && "trying to create reference to nothing");
+ auto node = new (_file.allocator())
+ SimpleReference(ns, arch, kindValue, off, target, a);
+ _references.push_back(node);
+ }
+ /// Sort references in a canonical order (by offset, then by kind).
+ void sortReferences() const {
+ // Cannot sort a linked list, so move elements into a temporary vector,
+ // sort the vector, then reconstruct the list.
+ llvm::SmallVector<SimpleReference *, 16> elements;
+ for (SimpleReference &node : _references) {
+ elements.push_back(&node);
+ }
+ std::sort(elements.begin(), elements.end(),
+ [] (const SimpleReference *lhs, const SimpleReference *rhs) -> bool {
+ uint64_t lhsOffset = lhs->offsetInAtom();
+ uint64_t rhsOffset = rhs->offsetInAtom();
+ if (rhsOffset != lhsOffset)
+ return (lhsOffset < rhsOffset);
+ if (rhs->kindNamespace() != lhs->kindNamespace())
+ return (lhs->kindNamespace() < rhs->kindNamespace());
+ if (rhs->kindArch() != lhs->kindArch())
+ return (lhs->kindArch() < rhs->kindArch());
+ return (lhs->kindValue() < rhs->kindValue());
+ });
+ _references.clearAndLeakNodesUnsafely();
+ for (SimpleReference *node : elements) {
+ _references.push_back(node);
+ }
+ }
+ void setOrdinal(uint64_t ord) { _ordinal = ord; }
+ typedef llvm::ilist<SimpleReference> RefList;
+ const File &_file;
+ uint64_t _ordinal;
+ mutable RefList _references;
+class SimpleUndefinedAtom : public UndefinedAtom {
+ SimpleUndefinedAtom(const File &f, StringRef name) : _file(f), _name(name) {
+ assert(!name.empty() && "UndefinedAtoms must have a name");
+ }
+ ~SimpleUndefinedAtom() override = default;
+ /// file - returns the File that produced/owns this Atom
+ const File &file() const override { return _file; }
+ /// name - The name of the atom. For a function atom, it is the (mangled)
+ /// name of the function.
+ StringRef name() const override { return _name; }
+ CanBeNull canBeNull() const override { return UndefinedAtom::canBeNullNever; }
+ const File &_file;
+ StringRef _name;
+} // end namespace lld
+#endif // LLD_CORE_SIMPLE_H
diff --git a/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h b/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h
new file mode 100644
index 000000000000..156c56eafbf7
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/SymbolTable.h
@@ -0,0 +1,96 @@
+//===- Core/SymbolTable.h - Main Symbol Table -----------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/DJB.h"
+#include <cstring>
+#include <map>
+#include <vector>
+namespace lld {
+class AbsoluteAtom;
+class Atom;
+class DefinedAtom;
+class LinkingContext;
+class ResolverOptions;
+class SharedLibraryAtom;
+class UndefinedAtom;
+/// The SymbolTable class is responsible for coalescing atoms.
+/// All atoms coalescable by-name or by-content should be added.
+/// The method replacement() can be used to find the replacement atom
+/// if an atom has been coalesced away.
+class SymbolTable {
+ /// add atom to symbol table
+ bool add(const DefinedAtom &);
+ /// add atom to symbol table
+ bool add(const UndefinedAtom &);
+ /// add atom to symbol table
+ bool add(const SharedLibraryAtom &);
+ /// add atom to symbol table
+ bool add(const AbsoluteAtom &);
+ /// returns atom in symbol table for specified name (or nullptr)
+ const Atom *findByName(StringRef sym);
+ /// returns vector of remaining UndefinedAtoms
+ std::vector<const UndefinedAtom *> undefines();
+ /// if atom has been coalesced away, return replacement, else return atom
+ const Atom *replacement(const Atom *);
+ /// if atom has been coalesced away, return true
+ bool isCoalescedAway(const Atom *);
+ typedef llvm::DenseMap<const Atom *, const Atom *> AtomToAtom;
+ struct StringRefMappingInfo {
+ static StringRef getEmptyKey() { return StringRef(); }
+ static StringRef getTombstoneKey() { return StringRef(" ", 1); }
+ static unsigned getHashValue(StringRef const val) {
+ return llvm::djbHash(val, 0);
+ }
+ static bool isEqual(StringRef const lhs, StringRef const rhs) {
+ return lhs.equals(rhs);
+ }
+ };
+ typedef llvm::DenseMap<StringRef, const Atom *,
+ StringRefMappingInfo> NameToAtom;
+ struct AtomMappingInfo {
+ static const DefinedAtom * getEmptyKey() { return nullptr; }
+ static const DefinedAtom * getTombstoneKey() { return (DefinedAtom*)(-1); }
+ static unsigned getHashValue(const DefinedAtom * const Val);
+ static bool isEqual(const DefinedAtom * const LHS,
+ const DefinedAtom * const RHS);
+ };
+ typedef llvm::DenseSet<const DefinedAtom*, AtomMappingInfo> AtomContentSet;
+ bool addByName(const Atom &);
+ bool addByContent(const DefinedAtom &);
+ AtomToAtom _replacedAtoms;
+ NameToAtom _nameTable;
+ AtomContentSet _contentTable;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/UndefinedAtom.h b/contrib/llvm/tools/lld/include/lld/Core/UndefinedAtom.h
new file mode 100644
index 000000000000..f45d6ecda6b0
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/UndefinedAtom.h
@@ -0,0 +1,68 @@
+//===- Core/UndefinedAtom.h - An Undefined Atom ---------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Atom.h"
+namespace lld {
+/// An UndefinedAtom has no content.
+/// It exists as a placeholder for a future atom.
+class UndefinedAtom : public Atom {
+ /// Whether this undefined symbol needs to be resolved,
+ /// or whether it can just evaluate to nullptr.
+ /// This concept is often called "weak", but that term
+ /// is overloaded to mean other things too.
+ enum CanBeNull {
+ /// Normal symbols must be resolved at build time
+ canBeNullNever,
+ /// This symbol can be missing at runtime and will evalute to nullptr.
+ /// That is, the static linker still must find a definition (usually
+ /// is some shared library), but at runtime, the dynamic loader
+ /// will allow the symbol to be missing and resolved to nullptr.
+ ///
+ /// On Darwin this is generated using a function prototype with
+ /// __attribute__((weak_import)).
+ /// On linux this is generated using a function prototype with
+ /// __attribute__((weak)).
+ /// On Windows this feature is not supported.
+ canBeNullAtRuntime,
+ /// This symbol can be missing at build time.
+ /// That is, the static linker will not error if a definition for
+ /// this symbol is not found at build time. Instead, the linker
+ /// will build an executable that lets the dynamic loader find the
+ /// symbol at runtime.
+ /// This feature is not supported on Darwin nor Windows.
+ /// On linux this is generated using a function prototype with
+ /// __attribute__((weak)).
+ canBeNullAtBuildtime
+ };
+ virtual CanBeNull canBeNull() const = 0;
+ static bool classof(const Atom *a) {
+ return a->definition() == definitionUndefined;
+ }
+ static bool classof(const UndefinedAtom *) { return true; }
+ UndefinedAtom() : Atom(definitionUndefined) {}
+ ~UndefinedAtom() override = default;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/Core/Writer.h b/contrib/llvm/tools/lld/include/lld/Core/Writer.h
new file mode 100644
index 000000000000..1cdfabefebd7
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/Core/Writer.h
@@ -0,0 +1,47 @@
+//===- lld/Core/Writer.h - Abstract File Format Interface -----------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+#include <vector>
+namespace lld {
+class File;
+class LinkingContext;
+class MachOLinkingContext;
+/// The Writer is an abstract class for writing object files, shared
+/// library files, and executable files. Each file format (e.g. mach-o, etc)
+/// has a concrete subclass of Writer.
+class Writer {
+ virtual ~Writer();
+ /// Write a file from the supplied File object
+ virtual llvm::Error writeFile(const File &linkedFile, StringRef path) = 0;
+ /// This method is called by Core Linking to give the Writer a chance
+ /// to add file format specific "files" to set of files to be linked. This is
+ /// how file format specific atoms can be added to the link.
+ virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) {}
+ // only concrete subclasses can be instantiated
+ Writer();
+std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &);
+std::unique_ptr<Writer> createWriterYAML(const LinkingContext &);
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h b/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h
new file mode 100644
index 000000000000..fde65880c3e3
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/ReaderWriter/MachOLinkingContext.h
@@ -0,0 +1,508 @@
+//===- lld/ReaderWriter/MachOLinkingContext.h -----------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/Reader.h"
+#include "lld/Core/Writer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <set>
+using llvm::MachO::HeaderFileType;
+namespace lld {
+namespace mach_o {
+class ArchHandler;
+class MachODylibFile;
+class MachOFile;
+class SectCreateFile;
+class MachOLinkingContext : public LinkingContext {
+ MachOLinkingContext();
+ ~MachOLinkingContext() override;
+ enum Arch {
+ arch_unknown,
+ arch_ppc,
+ arch_x86,
+ arch_x86_64,
+ arch_armv6,
+ arch_armv7,
+ arch_armv7s,
+ arch_arm64,
+ };
+ enum class OS {
+ unknown,
+ macOSX,
+ iOS,
+ iOS_simulator
+ };
+ enum class ExportMode {
+ globals, // Default, all global symbols exported.
+ whiteList, // -exported_symbol[s_list], only listed symbols exported.
+ blackList // -unexported_symbol[s_list], no listed symbol exported.
+ };
+ enum class DebugInfoMode {
+ addDebugMap, // Default
+ noDebugMap // -S option
+ };
+ enum class UndefinedMode {
+ error,
+ warning,
+ suppress,
+ dynamicLookup
+ };
+ enum ObjCConstraint {
+ objc_unknown = 0,
+ objc_supports_gc = 2,
+ objc_gc_only = 4,
+ // Image optimized by dyld = 8
+ // GC compaction = 16
+ objc_retainReleaseForSimulator = 32,
+ objc_retainRelease
+ };
+ /// Initializes the context to sane default values given the specified output
+ /// file type, arch, os, and minimum os version. This should be called before
+ /// other setXXX() methods.
+ void configure(HeaderFileType type, Arch arch, OS os, uint32_t minOSVersion,
+ bool exportDynamicSymbols);
+ void addPasses(PassManager &pm) override;
+ bool validateImpl() override;
+ std::string demangle(StringRef symbolName) const override;
+ void createImplicitFiles(std::vector<std::unique_ptr<File>> &) override;
+ /// Creates a new file which is owned by the context. Returns a pointer to
+ /// the new file.
+ template <class T, class... Args>
+ typename std::enable_if<!std::is_array<T>::value, T *>::type
+ make_file(Args &&... args) const {
+ auto file = std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+ auto *filePtr = file.get();
+ auto *ctx = const_cast<MachOLinkingContext *>(this);
+ ctx->getNodes().push_back(llvm::make_unique<FileNode>(std::move(file)));
+ return filePtr;
+ }
+ uint32_t getCPUType() const;
+ uint32_t getCPUSubType() const;
+ bool addEntryPointLoadCommand() const;
+ bool addUnixThreadLoadCommand() const;
+ bool outputTypeHasEntry() const;
+ bool is64Bit() const;
+ virtual uint64_t pageZeroSize() const { return _pageZeroSize; }
+ virtual uint64_t pageSize() const { return _pageSize; }
+ mach_o::ArchHandler &archHandler() const;
+ HeaderFileType outputMachOType() const { return _outputMachOType; }
+ Arch arch() const { return _arch; }
+ StringRef archName() const { return nameFromArch(_arch); }
+ OS os() const { return _os; }
+ ExportMode exportMode() const { return _exportMode; }
+ void setExportMode(ExportMode mode) { _exportMode = mode; }
+ void addExportSymbol(StringRef sym);
+ bool exportRestrictMode() const { return _exportMode != ExportMode::globals; }
+ bool exportSymbolNamed(StringRef sym) const;
+ DebugInfoMode debugInfoMode() const { return _debugInfoMode; }
+ void setDebugInfoMode(DebugInfoMode mode) {
+ _debugInfoMode = mode;
+ }
+ void appendOrderedSymbol(StringRef symbol, StringRef filename);
+ bool keepPrivateExterns() const { return _keepPrivateExterns; }
+ void setKeepPrivateExterns(bool v) { _keepPrivateExterns = v; }
+ bool demangleSymbols() const { return _demangle; }
+ void setDemangleSymbols(bool d) { _demangle = d; }
+ bool mergeObjCCategories() const { return _mergeObjCCategories; }
+ void setMergeObjCCategories(bool v) { _mergeObjCCategories = v; }
+ /// Create file at specified path which will contain a binary encoding
+ /// of all input and output file paths.
+ std::error_code createDependencyFile(StringRef path);
+ void addInputFileDependency(StringRef path) const;
+ void addInputFileNotFound(StringRef path) const;
+ void addOutputFileDependency(StringRef path) const;
+ bool minOS(StringRef mac, StringRef iOS) const;
+ void setDoNothing(bool value) { _doNothing = value; }
+ bool doNothing() const { return _doNothing; }
+ bool printAtoms() const { return _printAtoms; }
+ bool testingFileUsage() const { return _testingFileUsage; }
+ const StringRefVector &searchDirs() const { return _searchDirs; }
+ const StringRefVector &frameworkDirs() const { return _frameworkDirs; }
+ void setSysLibRoots(const StringRefVector &paths);
+ const StringRefVector &sysLibRoots() const { return _syslibRoots; }
+ bool PIE() const { return _pie; }
+ void setPIE(bool pie) { _pie = pie; }
+ bool generateVersionLoadCommand() const {
+ return _generateVersionLoadCommand;
+ }
+ void setGenerateVersionLoadCommand(bool v) {
+ _generateVersionLoadCommand = v;
+ }
+ bool generateFunctionStartsLoadCommand() const {
+ return _generateFunctionStartsLoadCommand;
+ }
+ void setGenerateFunctionStartsLoadCommand(bool v) {
+ _generateFunctionStartsLoadCommand = v;
+ }
+ bool generateDataInCodeLoadCommand() const {
+ return _generateDataInCodeLoadCommand;
+ }
+ void setGenerateDataInCodeLoadCommand(bool v) {
+ _generateDataInCodeLoadCommand = v;
+ }
+ uint64_t stackSize() const { return _stackSize; }
+ void setStackSize(uint64_t stackSize) { _stackSize = stackSize; }
+ uint64_t baseAddress() const { return _baseAddress; }
+ void setBaseAddress(uint64_t baseAddress) { _baseAddress = baseAddress; }
+ ObjCConstraint objcConstraint() const { return _objcConstraint; }
+ uint32_t osMinVersion() const { return _osMinVersion; }
+ uint32_t sdkVersion() const { return _sdkVersion; }
+ void setSdkVersion(uint64_t v) { _sdkVersion = v; }
+ uint64_t sourceVersion() const { return _sourceVersion; }
+ void setSourceVersion(uint64_t v) { _sourceVersion = v; }
+ uint32_t swiftVersion() const { return _swiftVersion; }
+ /// Checks whether a given path on the filesystem exists.
+ ///
+ /// When running in -test_file_usage mode, this method consults an
+ /// internally maintained list of files that exist (provided by -path_exists)
+ /// instead of the actual filesystem.
+ bool pathExists(StringRef path) const;
+ /// Like pathExists() but only used on files - not directories.
+ bool fileExists(StringRef path) const;
+ /// Adds any library search paths derived from the given base, possibly
+ /// modified by -syslibroots.
+ ///
+ /// The set of paths added consists of approximately all syslibroot-prepended
+ /// versions of libPath that exist, or the original libPath if there are none
+ /// for whatever reason. With various edge-cases for compatibility.
+ void addModifiedSearchDir(StringRef libPath, bool isSystemPath = false);
+ /// Determine whether -lFoo can be resolve within the given path, and
+ /// return the filename if so.
+ ///
+ /// The -lFoo option is documented to search for libFoo.dylib and libFoo.a in
+ /// that order, unless Foo ends in ".o", in which case only the exact file
+ /// matches (e.g. -lfoo.o would only find foo.o).
+ llvm::Optional<StringRef> searchDirForLibrary(StringRef path,
+ StringRef libName) const;
+ /// Iterates through all search path entries looking for libName (as
+ /// specified by -lFoo).
+ llvm::Optional<StringRef> searchLibrary(StringRef libName) const;
+ /// Add a framework search path. Internally, this method may be prepended
+ /// the path with syslibroot.
+ void addFrameworkSearchDir(StringRef fwPath, bool isSystemPath = false);
+ /// Iterates through all framework directories looking for
+ /// Foo.framework/Foo (when fwName = "Foo").
+ llvm::Optional<StringRef> findPathForFramework(StringRef fwName) const;
+ /// The dylib's binary compatibility version, in the raw uint32 format.
+ ///
+ /// When building a dynamic library, this is the compatibility version that
+ /// gets embedded into the result. Other Mach-O binaries that link against
+ /// this library will store the compatibility version in its load command. At
+ /// runtime, the loader will verify that the binary is compatible with the
+ /// installed dynamic library.
+ uint32_t compatibilityVersion() const { return _compatibilityVersion; }
+ /// The dylib's current version, in the the raw uint32 format.
+ ///
+ /// When building a dynamic library, this is the current version that gets
+ /// embedded into the result. Other Mach-O binaries that link against
+ /// this library will store the compatibility version in its load command.
+ uint32_t currentVersion() const { return _currentVersion; }
+ /// The dylib's install name.
+ ///
+ /// Binaries that link against the dylib will embed this path into the dylib
+ /// load command. When loading the binaries at runtime, this is the location
+ /// on disk that the loader will look for the dylib.
+ StringRef installName() const { return _installName; }
+ /// Whether or not the dylib has side effects during initialization.
+ ///
+ /// Dylibs marked as being dead strippable provide the guarantee that loading
+ /// the dylib has no side effects, allowing the linker to strip out the dylib
+ /// when linking a binary that does not use any of its symbols.
+ bool deadStrippableDylib() const { return _deadStrippableDylib; }
+ /// Whether or not to use flat namespace.
+ ///
+ /// MachO usually uses a two-level namespace, where each external symbol
+ /// referenced by the target is associated with the dylib that will provide
+ /// the symbol's definition at runtime. Using flat namespace overrides this
+ /// behavior: the linker searches all dylibs on the command line and all
+ /// dylibs those original dylibs depend on, but does not record which dylib
+ /// an external symbol came from. At runtime dyld again searches all images
+ /// and uses the first definition it finds. In addition, any undefines in
+ /// loaded flat_namespace dylibs must be resolvable at build time.
+ bool useFlatNamespace() const { return _flatNamespace; }
+ /// How to handle undefined symbols.
+ ///
+ /// Options are:
+ /// * error: Report an error and terminate linking.
+ /// * warning: Report a warning, but continue linking.
+ /// * suppress: Ignore and continue linking.
+ /// * dynamic_lookup: For use with -twolevel namespace: Records source dylibs
+ /// for symbols that are defined in a linked dylib at static link time.
+ /// Undefined symbols are handled by searching all loaded images at
+ /// runtime.
+ UndefinedMode undefinedMode() const { return _undefinedMode; }
+ /// The path to the executable that will load the bundle at runtime.
+ ///
+ /// When building a Mach-O bundle, this executable will be examined if there
+ /// are undefined symbols after the main link phase. It is expected that this
+ /// binary will be loading the bundle at runtime and will provide the symbols
+ /// at that point.
+ StringRef bundleLoader() const { return _bundleLoader; }
+ void setCompatibilityVersion(uint32_t vers) { _compatibilityVersion = vers; }
+ void setCurrentVersion(uint32_t vers) { _currentVersion = vers; }
+ void setInstallName(StringRef name) { _installName = name; }
+ void setDeadStrippableDylib(bool deadStrippable) {
+ _deadStrippableDylib = deadStrippable;
+ }
+ void setUseFlatNamespace(bool flatNamespace) {
+ _flatNamespace = flatNamespace;
+ }
+ void setUndefinedMode(UndefinedMode undefinedMode) {
+ _undefinedMode = undefinedMode;
+ }
+ void setBundleLoader(StringRef loader) { _bundleLoader = loader; }
+ void setPrintAtoms(bool value=true) { _printAtoms = value; }
+ void setTestingFileUsage(bool value = true) {
+ _testingFileUsage = value;
+ }
+ void addExistingPathForDebug(StringRef path) {
+ _existingPaths.insert(path);
+ }
+ void addRpath(StringRef rpath);
+ const StringRefVector &rpaths() const { return _rpaths; }
+ /// Add section alignment constraint on final layout.
+ void addSectionAlignment(StringRef seg, StringRef sect, uint16_t align);
+ /// Add a section based on a command-line sectcreate option.
+ void addSectCreateSection(StringRef seg, StringRef sect,
+ std::unique_ptr<MemoryBuffer> content);
+ /// Returns true if specified section had alignment constraints.
+ bool sectionAligned(StringRef seg, StringRef sect, uint16_t &align) const;
+ StringRef dyldPath() const { return "/usr/lib/dyld"; }
+ /// Stub creation Pass should be run.
+ bool needsStubsPass() const;
+ // GOT creation Pass should be run.
+ bool needsGOTPass() const;
+ /// Pass to add TLV sections.
+ bool needsTLVPass() const;
+ /// Pass to transform __compact_unwind into __unwind_info should be run.
+ bool needsCompactUnwindPass() const;
+ /// Pass to add shims switching between thumb and arm mode.
+ bool needsShimPass() const;
+ /// Pass to add objc image info and optimized objc data.
+ bool needsObjCPass() const;
+ /// Magic symbol name stubs will need to help lazy bind.
+ StringRef binderSymbolName() const;
+ /// Used to keep track of direct and indirect dylibs.
+ void registerDylib(mach_o::MachODylibFile *dylib, bool upward) const;
+ // Reads a file from disk to memory. Returns only a needed chunk
+ // if a fat binary.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> getMemoryBuffer(StringRef path);
+ /// Used to find indirect dylibs. Instantiates a MachODylibFile if one
+ /// has not already been made for the requested dylib. Uses -L and -F
+ /// search paths to allow indirect dylibs to be overridden.
+ mach_o::MachODylibFile* findIndirectDylib(StringRef path);
+ uint32_t dylibCurrentVersion(StringRef installName) const;
+ uint32_t dylibCompatVersion(StringRef installName) const;
+ ArrayRef<mach_o::MachODylibFile*> allDylibs() const {
+ return _allDylibs;
+ }
+ /// Creates a copy (owned by this MachOLinkingContext) of a string.
+ StringRef copy(StringRef str) { return str.copy(_allocator); }
+ /// If the memoryBuffer is a fat file with a slice for the current arch,
+ /// this method will return the offset and size of that slice.
+ bool sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, uint32_t &size);
+ /// Returns if a command line option specified dylib is an upward link.
+ bool isUpwardDylib(StringRef installName) const;
+ static bool isThinObjectFile(StringRef path, Arch &arch);
+ static Arch archFromCpuType(uint32_t cputype, uint32_t cpusubtype);
+ static Arch archFromName(StringRef archName);
+ static StringRef nameFromArch(Arch arch);
+ static uint32_t cpuTypeFromArch(Arch arch);
+ static uint32_t cpuSubtypeFromArch(Arch arch);
+ static bool is64Bit(Arch arch);
+ static bool isHostEndian(Arch arch);
+ static bool isBigEndian(Arch arch);
+ /// Construct 32-bit value from string "X.Y.Z" where
+ /// bits are xxxx.yy.zz. Largest number is 65535.255.255
+ static bool parsePackedVersion(StringRef str, uint32_t &result);
+ /// Construct 64-bit value from string "A.B.C.D.E" where
+ /// bits are aaaa.bb.cc.dd.ee. Largest number is 16777215.1023.1023.1023.1023
+ static bool parsePackedVersion(StringRef str, uint64_t &result);
+ void finalizeInputFiles() override;
+ llvm::Error handleLoadedFile(File &file) override;
+ bool customAtomOrderer(const DefinedAtom *left, const DefinedAtom *right,
+ bool &leftBeforeRight) const;
+ /// Return the 'flat namespace' file. This is the file that supplies
+ /// atoms for otherwise undefined symbols when the -flat_namespace or
+ /// -undefined dynamic_lookup options are used.
+ File* flatNamespaceFile() const { return _flatNamespaceFile; }
+ Writer &writer() const override;
+ mach_o::MachODylibFile* loadIndirectDylib(StringRef path);
+ void checkExportWhiteList(const DefinedAtom *atom) const;
+ void checkExportBlackList(const DefinedAtom *atom) const;
+ struct ArchInfo {
+ StringRef archName;
+ MachOLinkingContext::Arch arch;
+ bool littleEndian;
+ uint32_t cputype;
+ uint32_t cpusubtype;
+ };
+ struct SectionAlign {
+ StringRef segmentName;
+ StringRef sectionName;
+ uint16_t align;
+ };
+ struct OrderFileNode {
+ StringRef fileFilter;
+ unsigned order;
+ };
+ static bool findOrderOrdinal(const std::vector<OrderFileNode> &nodes,
+ const DefinedAtom *atom, unsigned &ordinal);
+ static ArchInfo _s_archInfos[];
+ std::set<StringRef> _existingPaths; // For testing only.
+ StringRefVector _searchDirs;
+ StringRefVector _syslibRoots;
+ StringRefVector _frameworkDirs;
+ HeaderFileType _outputMachOType = llvm::MachO::MH_EXECUTE;
+ bool _outputMachOTypeStatic = false; // Disambiguate static vs dynamic prog
+ bool _doNothing = false; // for -help and -v which just print info
+ bool _pie = false;
+ Arch _arch = arch_unknown;
+ OS _os = OS::macOSX;
+ uint32_t _osMinVersion = 0;
+ uint32_t _sdkVersion = 0;
+ uint64_t _sourceVersion = 0;
+ uint64_t _pageZeroSize = 0;
+ uint64_t _pageSize = 4096;
+ uint64_t _baseAddress = 0;
+ uint64_t _stackSize = 0;
+ uint32_t _compatibilityVersion = 0;
+ uint32_t _currentVersion = 0;
+ ObjCConstraint _objcConstraint = objc_unknown;
+ uint32_t _swiftVersion = 0;
+ StringRef _installName;
+ StringRefVector _rpaths;
+ bool _flatNamespace = false;
+ UndefinedMode _undefinedMode = UndefinedMode::error;
+ bool _deadStrippableDylib = false;
+ bool _printAtoms = false;
+ bool _testingFileUsage = false;
+ bool _keepPrivateExterns = false;
+ bool _demangle = false;
+ bool _mergeObjCCategories = true;
+ bool _generateVersionLoadCommand = false;
+ bool _generateFunctionStartsLoadCommand = false;
+ bool _generateDataInCodeLoadCommand = false;
+ StringRef _bundleLoader;
+ mutable std::unique_ptr<mach_o::ArchHandler> _archHandler;
+ mutable std::unique_ptr<Writer> _writer;
+ std::vector<SectionAlign> _sectAligns;
+ mutable llvm::StringMap<mach_o::MachODylibFile*> _pathToDylibMap;
+ mutable std::vector<mach_o::MachODylibFile*> _allDylibs;
+ mutable std::set<mach_o::MachODylibFile*> _upwardDylibs;
+ mutable std::vector<std::unique_ptr<File>> _indirectDylibs;
+ mutable std::mutex _dylibsMutex;
+ ExportMode _exportMode = ExportMode::globals;
+ llvm::StringSet<> _exportedSymbols;
+ DebugInfoMode _debugInfoMode = DebugInfoMode::addDebugMap;
+ std::unique_ptr<llvm::raw_fd_ostream> _dependencyInfo;
+ llvm::StringMap<std::vector<OrderFileNode>> _orderFiles;
+ unsigned _orderFileEntries = 0;
+ File *_flatNamespaceFile = nullptr;
+ mach_o::SectCreateFile *_sectCreateFile = nullptr;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/include/lld/ReaderWriter/YamlContext.h b/contrib/llvm/tools/lld/include/lld/ReaderWriter/YamlContext.h
new file mode 100644
index 000000000000..b97d21f68e55
--- /dev/null
+++ b/contrib/llvm/tools/lld/include/lld/ReaderWriter/YamlContext.h
@@ -0,0 +1,43 @@
+//===- lld/ReaderWriter/YamlContext.h - object used in YAML I/O context ---===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include <functional>
+#include <memory>
+#include <vector>
+namespace lld {
+class File;
+class LinkingContext;
+class Registry;
+namespace mach_o {
+namespace normalized {
+struct NormalizedFile;
+using lld::mach_o::normalized::NormalizedFile;
+/// When YAML I/O is used in lld, the yaml context always holds a YamlContext
+/// object. We need to support hetergenous yaml documents which each require
+/// different context info. This struct supports all clients.
+struct YamlContext {
+ const LinkingContext *_ctx = nullptr;
+ const Registry *_registry = nullptr;
+ File *_file = nullptr;
+ NormalizedFile *_normalizeMachOFile = nullptr;
+ StringRef _path;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/CMakeLists.txt b/contrib/llvm/tools/lld/lib/CMakeLists.txt
new file mode 100644
index 000000000000..8884efcfe9ba
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/CMakeLists.txt
@@ -0,0 +1,3 @@
diff --git a/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt b/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt
new file mode 100644
index 000000000000..2d4d9ded0886
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/CMakeLists.txt
@@ -0,0 +1,28 @@
+ set(tablegen_deps intrinsics_gen)
+ DefinedAtom.cpp
+ Error.cpp
+ File.cpp
+ LinkingContext.cpp
+ Reader.cpp
+ Resolver.cpp
+ SymbolTable.cpp
+ Writer.cpp
+ ${LLD_INCLUDE_DIR}/lld/Core
+ BinaryFormat
+ MC
+ Support
+ ${tablegen_deps}
+ )
diff --git a/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp b/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp
new file mode 100644
index 000000000000..177cae7fcbf0
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/DefinedAtom.cpp
@@ -0,0 +1,82 @@
+//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/Support/ErrorHandling.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+namespace lld {
+DefinedAtom::ContentPermissions DefinedAtom::permissions() const {
+ // By default base permissions on content type.
+ return permissions(this->contentType());
+// Utility function for deriving permissions from content type
+DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) {
+ switch (type) {
+ case typeCode:
+ case typeResolver:
+ case typeBranchIsland:
+ case typeBranchShim:
+ case typeStub:
+ case typeStubHelper:
+ case typeMachHeader:
+ return permR_X;
+ case typeConstant:
+ case typeCString:
+ case typeUTF16String:
+ case typeCFI:
+ case typeLSDA:
+ case typeLiteral4:
+ case typeLiteral8:
+ case typeLiteral16:
+ case typeDTraceDOF:
+ case typeCompactUnwindInfo:
+ case typeProcessedUnwindInfo:
+ case typeObjCImageInfo:
+ case typeObjCMethodList:
+ return permR__;
+ case typeData:
+ case typeDataFast:
+ case typeZeroFill:
+ case typeZeroFillFast:
+ case typeObjC1Class:
+ case typeLazyPointer:
+ case typeLazyDylibPointer:
+ case typeNonLazyPointer:
+ case typeThunkTLV:
+ return permRW_;
+ case typeGOT:
+ case typeConstData:
+ case typeCFString:
+ case typeInitializerPtr:
+ case typeTerminatorPtr:
+ case typeCStringPtr:
+ case typeObjCClassPtr:
+ case typeObjC2CategoryList:
+ case typeInterposingTuples:
+ case typeTLVInitialData:
+ case typeTLVInitialZeroFill:
+ case typeTLVInitializerPtr:
+ return permRW_L;
+ case typeUnknown:
+ case typeTempLTO:
+ case typeSectCreate:
+ case typeDSOHandle:
+ return permUnknown;
+ }
+ llvm_unreachable("unknown content type");
+} // namespace
diff --git a/contrib/llvm/tools/lld/lib/Core/Error.cpp b/contrib/llvm/tools/lld/lib/Core/Error.cpp
new file mode 100644
index 000000000000..6fc76f7ca3d0
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/Error.cpp
@@ -0,0 +1,93 @@
+//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Error.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <mutex>
+#include <string>
+#include <vector>
+using namespace lld;
+namespace {
+class _YamlReaderErrorCategory : public std::error_category {
+ const char* name() const noexcept override {
+ return "lld.yaml.reader";
+ }
+ std::string message(int ev) const override {
+ switch (static_cast<YamlReaderError>(ev)) {
+ case YamlReaderError::unknown_keyword:
+ return "Unknown keyword found in yaml file";
+ case YamlReaderError::illegal_value:
+ return "Bad value found in yaml file";
+ }
+ llvm_unreachable("An enumerator of YamlReaderError does not have a "
+ "message defined.");
+ }
+} // end anonymous namespace
+const std::error_category &lld::YamlReaderCategory() {
+ static _YamlReaderErrorCategory o;
+ return o;
+namespace lld {
+/// Temporary class to enable make_dynamic_error_code() until
+/// llvm::ErrorOr<> is updated to work with error encapsulations
+/// other than error_code.
+class dynamic_error_category : public std::error_category {
+ ~dynamic_error_category() override = default;
+ const char *name() const noexcept override {
+ return "lld.dynamic_error";
+ }
+ std::string message(int ev) const override {
+ assert(ev >= 0);
+ assert(ev < (int)_messages.size());
+ // The value is an index into the string vector.
+ return _messages[ev];
+ }
+ int add(std::string msg) {
+ std::lock_guard<std::recursive_mutex> lock(_mutex);
+ // Value zero is always the successs value.
+ if (_messages.empty())
+ _messages.push_back("Success");
+ _messages.push_back(msg);
+ // Return the index of the string just appended.
+ return _messages.size() - 1;
+ }
+ std::vector<std::string> _messages;
+ std::recursive_mutex _mutex;
+static dynamic_error_category categorySingleton;
+std::error_code make_dynamic_error_code(StringRef msg) {
+ return std::error_code(categorySingleton.add(msg), categorySingleton);
+char GenericError::ID = 0;
+GenericError::GenericError(Twine Msg) : Msg(Msg.str()) { }
+void GenericError::log(raw_ostream &OS) const {
+ OS << Msg;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Core/File.cpp b/contrib/llvm/tools/lld/lib/Core/File.cpp
new file mode 100644
index 000000000000..30ded091a92a
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/File.cpp
@@ -0,0 +1,29 @@
+//===- Core/File.cpp - A Container of Atoms -------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/File.h"
+#include <mutex>
+namespace lld {
+File::~File() = default;
+File::AtomVector<DefinedAtom> File::_noDefinedAtoms;
+File::AtomVector<UndefinedAtom> File::_noUndefinedAtoms;
+File::AtomVector<SharedLibraryAtom> File::_noSharedLibraryAtoms;
+File::AtomVector<AbsoluteAtom> File::_noAbsoluteAtoms;
+std::error_code File::parse() {
+ std::lock_guard<std::mutex> lock(_parseMutex);
+ if (!_lastError.hasValue())
+ _lastError = doParse();
+ return _lastError.getValue();
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp b/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp
new file mode 100644
index 000000000000..0f225c322122
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/LinkingContext.cpp
@@ -0,0 +1,70 @@
+//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Node.h"
+#include "lld/Core/Simple.h"
+#include "lld/Core/Writer.h"
+#include <algorithm>
+namespace lld {
+LinkingContext::LinkingContext() = default;
+LinkingContext::~LinkingContext() = default;
+bool LinkingContext::validate() {
+ return validateImpl();
+llvm::Error LinkingContext::writeFile(const File &linkedFile) const {
+ return this->writer().writeFile(linkedFile, _outputPath);
+std::unique_ptr<File> LinkingContext::createEntrySymbolFile() const {
+ return createEntrySymbolFile("<command line option -e>");
+LinkingContext::createEntrySymbolFile(StringRef filename) const {
+ if (entrySymbolName().empty())
+ return nullptr;
+ std::unique_ptr<SimpleFile> entryFile(new SimpleFile(filename,
+ File::kindEntryObject));
+ entryFile->addAtom(
+ *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName())));
+ return std::move(entryFile);
+std::unique_ptr<File> LinkingContext::createUndefinedSymbolFile() const {
+ return createUndefinedSymbolFile("<command line option -u or --defsym>");
+LinkingContext::createUndefinedSymbolFile(StringRef filename) const {
+ if (_initialUndefinedSymbols.empty())
+ return nullptr;
+ std::unique_ptr<SimpleFile> undefinedSymFile(
+ new SimpleFile(filename, File::kindUndefinedSymsObject));
+ for (StringRef undefSym : _initialUndefinedSymbols)
+ undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom(
+ *undefinedSymFile, undefSym)));
+ return std::move(undefinedSymFile);
+void LinkingContext::createInternalFiles(
+ std::vector<std::unique_ptr<File>> &result) const {
+ if (std::unique_ptr<File> file = createEntrySymbolFile())
+ result.push_back(std::move(file));
+ if (std::unique_ptr<File> file = createUndefinedSymbolFile())
+ result.push_back(std::move(file));
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Core/Reader.cpp b/contrib/llvm/tools/lld/lib/Core/Reader.cpp
new file mode 100644
index 000000000000..5d8bbbbfe4d7
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/Reader.cpp
@@ -0,0 +1,114 @@
+//===- lib/Core/Reader.cpp ------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Reader.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <memory>
+using llvm::file_magic;
+using llvm::identify_magic;
+namespace lld {
+YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() = default;
+void Registry::add(std::unique_ptr<Reader> reader) {
+ _readers.push_back(std::move(reader));
+void Registry::add(std::unique_ptr<YamlIOTaggedDocumentHandler> handler) {
+ _yamlHandlers.push_back(std::move(handler));
+Registry::loadFile(std::unique_ptr<MemoryBuffer> mb) const {
+ // Get file magic.
+ StringRef content(mb->getBufferStart(), mb->getBufferSize());
+ file_magic fileType = identify_magic(content);
+ // Ask each registered reader if it can handle this file type or extension.
+ for (const std::unique_ptr<Reader> &reader : _readers) {
+ if (!reader->canParse(fileType, mb->getMemBufferRef()))
+ continue;
+ return reader->loadFile(std::move(mb), *this);
+ }
+ // No Reader could parse this file.
+ return make_error_code(llvm::errc::executable_format_error);
+static const Registry::KindStrings kindStrings[] = {
+ {Reference::kindLayoutAfter, "layout-after"},
+ {Reference::kindAssociate, "associate"},
+Registry::Registry() {
+ addKindTable(Reference::KindNamespace::all, Reference::KindArch::all,
+ kindStrings);
+bool Registry::handleTaggedDoc(llvm::yaml::IO &io,
+ const lld::File *&file) const {
+ for (const std::unique_ptr<YamlIOTaggedDocumentHandler> &h : _yamlHandlers)
+ if (h->handledDocTag(io, file))
+ return true;
+ return false;
+void Registry::addKindTable(Reference::KindNamespace ns,
+ Reference::KindArch arch,
+ const KindStrings array[]) {
+ KindEntry entry = { ns, arch, array };
+ _kindEntries.push_back(entry);
+bool Registry::referenceKindFromString(StringRef inputStr,
+ Reference::KindNamespace &ns,
+ Reference::KindArch &arch,
+ Reference::KindValue &value) const {
+ for (const KindEntry &entry : _kindEntries) {
+ for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) {
+ if (!inputStr.equals(pair->name))
+ continue;
+ ns = entry.ns;
+ arch = entry.arch;
+ value = pair->value;
+ return true;
+ }
+ }
+ return false;
+bool Registry::referenceKindToString(Reference::KindNamespace ns,
+ Reference::KindArch arch,
+ Reference::KindValue value,
+ StringRef &str) const {
+ for (const KindEntry &entry : _kindEntries) {
+ if (entry.ns != ns)
+ continue;
+ if (entry.arch != arch)
+ continue;
+ for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) {
+ if (pair->value != value)
+ continue;
+ str = pair->name;
+ return true;
+ }
+ }
+ return false;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Core/Resolver.cpp b/contrib/llvm/tools/lld/lib/Core/Resolver.cpp
new file mode 100644
index 000000000000..9c51c6cdb19c
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/Resolver.cpp
@@ -0,0 +1,505 @@
+//===- Core/Resolver.cpp - Resolves Atom References -----------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Resolver.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/ArchiveLibraryFile.h"
+#include "lld/Core/Atom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Instrumentation.h"
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/SharedLibraryFile.h"
+#include "lld/Core/SymbolTable.h"
+#include "lld/Core/UndefinedAtom.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <utility>
+#include <vector>
+namespace lld {
+llvm::Expected<bool> Resolver::handleFile(File &file) {
+ if (auto ec = _ctx.handleLoadedFile(file))
+ return std::move(ec);
+ bool undefAdded = false;
+ for (auto &atom : file.defined().owning_ptrs())
+ doDefinedAtom(std::move(atom));
+ for (auto &atom : file.undefined().owning_ptrs()) {
+ if (doUndefinedAtom(std::move(atom)))
+ undefAdded = true;
+ }
+ for (auto &atom : file.sharedLibrary().owning_ptrs())
+ doSharedLibraryAtom(std::move(atom));
+ for (auto &atom : file.absolute().owning_ptrs())
+ doAbsoluteAtom(std::move(atom));
+ return undefAdded;
+llvm::Expected<bool> Resolver::forEachUndefines(File &file,
+ UndefCallback callback) {
+ size_t i = _undefineIndex[&file];
+ bool undefAdded = false;
+ do {
+ for (; i < _undefines.size(); ++i) {
+ StringRef undefName = _undefines[i];
+ if (undefName.empty())
+ continue;
+ const Atom *atom = _symbolTable.findByName(undefName);
+ if (!isa<UndefinedAtom>(atom) || _symbolTable.isCoalescedAway(atom)) {
+ // The symbol was resolved by some other file. Cache the result.
+ _undefines[i] = "";
+ continue;
+ }
+ auto undefAddedOrError = callback(undefName);
+ if (auto ec = undefAddedOrError.takeError())
+ return std::move(ec);
+ undefAdded |= undefAddedOrError.get();
+ }
+ } while (i < _undefines.size());
+ _undefineIndex[&file] = i;
+ return undefAdded;
+llvm::Expected<bool> Resolver::handleArchiveFile(File &file) {
+ ArchiveLibraryFile *archiveFile = cast<ArchiveLibraryFile>(&file);
+ return forEachUndefines(file,
+ [&](StringRef undefName) -> llvm::Expected<bool> {
+ if (File *member = archiveFile->find(undefName)) {
+ member->setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ return handleFile(*member);
+ }
+ return false;
+ });
+llvm::Error Resolver::handleSharedLibrary(File &file) {
+ // Add all the atoms from the shared library
+ SharedLibraryFile *sharedLibrary = cast<SharedLibraryFile>(&file);
+ auto undefAddedOrError = handleFile(*sharedLibrary);
+ if (auto ec = undefAddedOrError.takeError())
+ return ec;
+ undefAddedOrError =
+ forEachUndefines(file, [&](StringRef undefName) -> llvm::Expected<bool> {
+ auto atom = sharedLibrary->exports(undefName);
+ if (atom.get())
+ doSharedLibraryAtom(std::move(atom));
+ return false;
+ });
+ if (auto ec = undefAddedOrError.takeError())
+ return ec;
+ return llvm::Error::success();
+bool Resolver::doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs()
+ << " UndefinedAtom: "
+ << llvm::format("0x%09lX", atom.get())
+ << ", name=" << atom.get()->name() << "\n");
+ // tell symbol table
+ bool newUndefAdded = _symbolTable.add(*atom.get());
+ if (newUndefAdded)
+ _undefines.push_back(atom.get()->name());
+ // add to list of known atoms
+ _atoms.push_back(OwningAtomPtr<Atom>(atom.release()));
+ return newUndefAdded;
+// Called on each atom when a file is added. Returns true if a given
+// atom is added to the symbol table.
+void Resolver::doDefinedAtom(OwningAtomPtr<DefinedAtom> atom) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs()
+ << " DefinedAtom: "
+ << llvm::format("0x%09lX", atom.get())
+ << ", file=#"
+ << atom.get()->file().ordinal()
+ << ", atom=#"
+ << atom.get()->ordinal()
+ << ", name="
+ << atom.get()->name()
+ << ", type="
+ << atom.get()->contentType()
+ << "\n");
+ // An atom that should never be dead-stripped is a dead-strip root.
+ if (_ctx.deadStrip() &&
+ atom.get()->deadStrip() == DefinedAtom::deadStripNever) {
+ _deadStripRoots.insert(atom.get());
+ }
+ // add to list of known atoms
+ _symbolTable.add(*atom.get());
+ _atoms.push_back(OwningAtomPtr<Atom>(atom.release()));
+void Resolver::doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs()
+ << " SharedLibraryAtom: "
+ << llvm::format("0x%09lX", atom.get())
+ << ", name="
+ << atom.get()->name()
+ << "\n");
+ // tell symbol table
+ _symbolTable.add(*atom.get());
+ // add to list of known atoms
+ _atoms.push_back(OwningAtomPtr<Atom>(atom.release()));
+void Resolver::doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs()
+ << " AbsoluteAtom: "
+ << llvm::format("0x%09lX", atom.get())
+ << ", name="
+ << atom.get()->name()
+ << "\n");
+ // tell symbol table
+ if (atom.get()->scope() != Atom::scopeTranslationUnit)
+ _symbolTable.add(*atom.get());
+ // add to list of known atoms
+ _atoms.push_back(OwningAtomPtr<Atom>(atom.release()));
+// Returns true if at least one of N previous files has created an
+// undefined symbol.
+bool Resolver::undefinesAdded(int begin, int end) {
+ std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes();
+ for (int i = begin; i < end; ++i)
+ if (FileNode *node = dyn_cast<FileNode>(inputs[i].get()))
+ if (_newUndefinesAdded[node->getFile()])
+ return true;
+ return false;
+File *Resolver::getFile(int &index) {
+ std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes();
+ if ((size_t)index >= inputs.size())
+ return nullptr;
+ if (GroupEnd *group = dyn_cast<GroupEnd>(inputs[index].get())) {
+ // We are at the end of the current group. If one or more new
+ // undefined atom has been added in the last groupSize files, we
+ // reiterate over the files.
+ int size = group->getSize();
+ if (undefinesAdded(index - size, index)) {
+ index -= size;
+ return getFile(index);
+ }
+ ++index;
+ return getFile(index);
+ }
+ return cast<FileNode>(inputs[index++].get())->getFile();
+// Keep adding atoms until _ctx.getNextFile() returns an error. This
+// function is where undefined atoms are resolved.
+bool Resolver::resolveUndefines() {
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "******** Resolving undefines:\n");
+ ScopedTask task(getDefaultDomain(), "resolveUndefines");
+ int index = 0;
+ std::set<File *> seen;
+ for (;;) {
+ bool undefAdded = false;
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "Loading file #" << index << "\n");
+ File *file = getFile(index);
+ if (!file)
+ return true;
+ if (std::error_code ec = file->parse()) {
+ llvm::errs() << "Cannot open " + file->path()
+ << ": " << ec.message() << "\n";
+ return false;
+ }
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "Loaded file: " << file->path() << "\n");
+ switch (file->kind()) {
+ case File::kindErrorObject:
+ case File::kindNormalizedObject:
+ case File::kindMachObject:
+ case File::kindCEntryObject:
+ case File::kindHeaderObject:
+ case File::kindEntryObject:
+ case File::kindUndefinedSymsObject:
+ case File::kindStubHelperObject:
+ case File::kindResolverMergedObject:
+ case File::kindSectCreateObject: {
+ // The same file may be visited more than once if the file is
+ // in --start-group and --end-group. Only library files should
+ // be processed more than once.
+ if (seen.count(file))
+ break;
+ seen.insert(file);
+ assert(!file->hasOrdinal());
+ file->setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ auto undefAddedOrError = handleFile(*file);
+ if (auto EC = undefAddedOrError.takeError()) {
+ // FIXME: This should be passed to logAllUnhandledErrors but it needs
+ // to be passed a Twine instead of a string.
+ llvm::errs() << "Error in " + file->path() << ": ";
+ logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string());
+ return false;
+ }
+ undefAdded = undefAddedOrError.get();
+ break;
+ }
+ case File::kindArchiveLibrary: {
+ if (!file->hasOrdinal())
+ file->setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ auto undefAddedOrError = handleArchiveFile(*file);
+ if (auto EC = undefAddedOrError.takeError()) {
+ // FIXME: This should be passed to logAllUnhandledErrors but it needs
+ // to be passed a Twine instead of a string.
+ llvm::errs() << "Error in " + file->path() << ": ";
+ logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string());
+ return false;
+ }
+ undefAdded = undefAddedOrError.get();
+ break;
+ }
+ case File::kindSharedLibrary:
+ if (!file->hasOrdinal())
+ file->setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ if (auto EC = handleSharedLibrary(*file)) {
+ // FIXME: This should be passed to logAllUnhandledErrors but it needs
+ // to be passed a Twine instead of a string.
+ llvm::errs() << "Error in " + file->path() << ": ";
+ logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string());
+ return false;
+ }
+ break;
+ }
+ _newUndefinesAdded[file] = undefAdded;
+ }
+// switch all references to undefined or coalesced away atoms
+// to the new defined atom
+void Resolver::updateReferences() {
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "******** Updating references:\n");
+ ScopedTask task(getDefaultDomain(), "updateReferences");
+ for (const OwningAtomPtr<Atom> &atom : _atoms) {
+ if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) {
+ for (const Reference *ref : *defAtom) {
+ // A reference of type kindAssociate should't be updated.
+ // Instead, an atom having such reference will be removed
+ // if the target atom is coalesced away, so that they will
+ // go away as a group.
+ if (ref->kindNamespace() == lld::Reference::KindNamespace::all &&
+ ref->kindValue() == lld::Reference::kindAssociate) {
+ if (_symbolTable.isCoalescedAway(atom.get()))
+ _deadAtoms.insert(ref->target());
+ continue;
+ }
+ const Atom *newTarget = _symbolTable.replacement(ref->target());
+ const_cast<Reference *>(ref)->setTarget(newTarget);
+ }
+ }
+ }
+// For dead code stripping, recursively mark atoms "live"
+void Resolver::markLive(const Atom *atom) {
+ // Mark the atom is live. If it's already marked live, then stop recursion.
+ auto exists = _liveAtoms.insert(atom);
+ if (!exists.second)
+ return;
+ // Mark all atoms it references as live
+ if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) {
+ for (const Reference *ref : *defAtom)
+ markLive(ref->target());
+ for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) {
+ const Atom *target = p.second;
+ markLive(target);
+ }
+ }
+static bool isBackref(const Reference *ref) {
+ if (ref->kindNamespace() != lld::Reference::KindNamespace::all)
+ return false;
+ return (ref->kindValue() == lld::Reference::kindLayoutAfter);
+// remove all atoms not actually used
+void Resolver::deadStripOptimize() {
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "******** Dead stripping unused atoms:\n");
+ ScopedTask task(getDefaultDomain(), "deadStripOptimize");
+ // only do this optimization with -dead_strip
+ if (!_ctx.deadStrip())
+ return;
+ // Some type of references prevent referring atoms to be dead-striped.
+ // Make a reverse map of such references before traversing the graph.
+ // While traversing the list of atoms, mark AbsoluteAtoms as live
+ // in order to avoid reclaim.
+ for (const OwningAtomPtr<Atom> &atom : _atoms) {
+ if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get()))
+ for (const Reference *ref : *defAtom)
+ if (isBackref(ref))
+ _reverseRef.insert(std::make_pair(ref->target(), atom.get()));
+ if (const AbsoluteAtom *absAtom = dyn_cast<AbsoluteAtom>(atom.get()))
+ markLive(absAtom);
+ }
+ // By default, shared libraries are built with all globals as dead strip roots
+ if (_ctx.globalsAreDeadStripRoots())
+ for (const OwningAtomPtr<Atom> &atom : _atoms)
+ if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get()))
+ if (defAtom->scope() == DefinedAtom::scopeGlobal)
+ _deadStripRoots.insert(defAtom);
+ // Or, use list of names that are dead strip roots.
+ for (const StringRef &name : _ctx.deadStripRoots()) {
+ const Atom *symAtom = _symbolTable.findByName(name);
+ assert(symAtom);
+ _deadStripRoots.insert(symAtom);
+ }
+ // mark all roots as live, and recursively all atoms they reference
+ for (const Atom *dsrAtom : _deadStripRoots)
+ markLive(dsrAtom);
+ // now remove all non-live atoms from _atoms
+ _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(),
+ [&](OwningAtomPtr<Atom> &a) {
+ return _liveAtoms.count(a.get()) == 0;
+ }),
+ _atoms.end());
+// error out if some undefines remain
+bool Resolver::checkUndefines() {
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "******** Checking for undefines:\n");
+ // build vector of remaining undefined symbols
+ std::vector<const UndefinedAtom *> undefinedAtoms = _symbolTable.undefines();
+ if (_ctx.deadStrip()) {
+ // When dead code stripping, we don't care if dead atoms are undefined.
+ undefinedAtoms.erase(
+ std::remove_if(undefinedAtoms.begin(), undefinedAtoms.end(),
+ [&](const Atom *a) { return _liveAtoms.count(a) == 0; }),
+ undefinedAtoms.end());
+ }
+ if (undefinedAtoms.empty())
+ return false;
+ // Warn about unresolved symbols.
+ bool foundUndefines = false;
+ for (const UndefinedAtom *undef : undefinedAtoms) {
+ // Skip over a weak symbol.
+ if (undef->canBeNull() != UndefinedAtom::canBeNullNever)
+ continue;
+ // If this is a library and undefined symbols are allowed on the
+ // target platform, skip over it.
+ if (isa<SharedLibraryFile>(undef->file()) && _ctx.allowShlibUndefines())
+ continue;
+ // If the undefine is coalesced away, skip over it.
+ if (_symbolTable.isCoalescedAway(undef))
+ continue;
+ // Seems like this symbol is undefined. Warn that.
+ foundUndefines = true;
+ if (_ctx.printRemainingUndefines()) {
+ llvm::errs() << "Undefined symbol: " << undef->file().path()
+ << ": " << _ctx.demangle(undef->name())
+ << "\n";
+ }
+ }
+ if (!foundUndefines)
+ return false;
+ if (_ctx.printRemainingUndefines())
+ llvm::errs() << "symbol(s) not found\n";
+ return true;
+// remove from _atoms all coaleseced away atoms
+void Resolver::removeCoalescedAwayAtoms() {
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "******** Removing coalesced away atoms:\n");
+ ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms");
+ _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(),
+ [&](OwningAtomPtr<Atom> &a) {
+ return _symbolTable.isCoalescedAway(a.get()) ||
+ _deadAtoms.count(a.get());
+ }),
+ _atoms.end());
+bool Resolver::resolve() {
+ DEBUG_WITH_TYPE("resolver",
+ llvm::dbgs() << "******** Resolving atom references:\n");
+ if (!resolveUndefines())
+ return false;
+ updateReferences();
+ deadStripOptimize();
+ if (checkUndefines()) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Found undefines... ");
+ if (!_ctx.allowRemainingUndefines()) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we don't allow\n");
+ return false;
+ }
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we are ok with\n");
+ }
+ removeCoalescedAwayAtoms();
+ _result->addAtoms(_atoms);
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "******** Finished resolver\n");
+ return true;
+void Resolver::MergedFile::addAtoms(
+ llvm::MutableArrayRef<OwningAtomPtr<Atom>> all) {
+ ScopedTask task(getDefaultDomain(), "addAtoms");
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n");
+ for (OwningAtomPtr<Atom> &atom : all) {
+#ifndef NDEBUG
+ if (auto *definedAtom = dyn_cast<DefinedAtom>(atom.get())) {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs()
+ << llvm::format(" 0x%09lX", definedAtom)
+ << ", file=#"
+ << definedAtom->file().ordinal()
+ << ", atom=#"
+ << definedAtom->ordinal()
+ << ", name="
+ << definedAtom->name()
+ << ", type="
+ << definedAtom->contentType()
+ << "\n");
+ } else {
+ DEBUG_WITH_TYPE("resolver", llvm::dbgs()
+ << llvm::format(" 0x%09lX", atom.get())
+ << ", name="
+ << atom.get()->name()
+ << "\n");
+ }
+ addAtom(*atom.release());
+ }
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp b/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp
new file mode 100644
index 000000000000..51ae8d17181d
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/SymbolTable.cpp
@@ -0,0 +1,291 @@
+//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/SymbolTable.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/AbsoluteAtom.h"
+#include "lld/Core/Atom.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/Resolver.h"
+#include "lld/Core/SharedLibraryAtom.h"
+#include "lld/Core/UndefinedAtom.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <vector>
+namespace lld {
+bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); }
+bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); }
+bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); }
+bool SymbolTable::add(const DefinedAtom &atom) {
+ if (!atom.name().empty() &&
+ atom.scope() != DefinedAtom::scopeTranslationUnit) {
+ // Named atoms cannot be merged by content.
+ assert(atom.merge() != DefinedAtom::mergeByContent);
+ // Track named atoms that are not scoped to file (static).
+ return addByName(atom);
+ }
+ if (atom.merge() == DefinedAtom::mergeByContent) {
+ // Named atoms cannot be merged by content.
+ assert(atom.name().empty());
+ // Currently only read-only constants can be merged.
+ if (atom.permissions() == DefinedAtom::permR__)
+ return addByContent(atom);
+ // TODO: support mergeByContent of data atoms by comparing content & fixups.
+ }
+ return false;
+enum NameCollisionResolution {
+ NCR_First,
+ NCR_Second,
+ NCR_DupDef,
+ NCR_DupUndef,
+ NCR_DupShLib,
+ NCR_Error
+static NameCollisionResolution cases[4][4] = {
+ //regular absolute undef sharedLib
+ {
+ // first is regular
+ NCR_DupDef, NCR_Error, NCR_First, NCR_First
+ },
+ {
+ // first is absolute
+ NCR_Error, NCR_Error, NCR_First, NCR_First
+ },
+ {
+ // first is undef
+ NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second
+ },
+ {
+ // first is sharedLib
+ NCR_Second, NCR_Second, NCR_First, NCR_DupShLib
+ }
+static NameCollisionResolution collide(Atom::Definition first,
+ Atom::Definition second) {
+ return cases[first][second];
+enum MergeResolution {
+ MCR_First,
+ MCR_Second,
+ MCR_Largest,
+ MCR_SameSize,
+ MCR_Error
+static MergeResolution mergeCases[][6] = {
+ // no tentative weak weakAddress sameNameAndSize largest
+ {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no
+ {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative
+ {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak
+ {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress
+ {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize
+ {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest
+static MergeResolution mergeSelect(DefinedAtom::Merge first,
+ DefinedAtom::Merge second) {
+ assert(first != DefinedAtom::mergeByContent);
+ assert(second != DefinedAtom::mergeByContent);
+ return mergeCases[first][second];
+bool SymbolTable::addByName(const Atom &newAtom) {
+ StringRef name = newAtom.name();
+ assert(!name.empty());
+ const Atom *existing = findByName(name);
+ if (existing == nullptr) {
+ // Name is not in symbol table yet, add it associate with this atom.
+ _nameTable[name] = &newAtom;
+ return true;
+ }
+ // Do nothing if the same object is added more than once.
+ if (existing == &newAtom)
+ return false;
+ // Name is already in symbol table and associated with another atom.
+ bool useNew = true;
+ switch (collide(existing->definition(), newAtom.definition())) {
+ case NCR_First:
+ useNew = false;
+ break;
+ case NCR_Second:
+ useNew = true;
+ break;
+ case NCR_DupDef: {
+ const auto *existingDef = cast<DefinedAtom>(existing);
+ const auto *newDef = cast<DefinedAtom>(&newAtom);
+ switch (mergeSelect(existingDef->merge(), newDef->merge())) {
+ case MCR_First:
+ useNew = false;
+ break;
+ case MCR_Second:
+ useNew = true;
+ break;
+ case MCR_Largest: {
+ uint64_t existingSize = existingDef->sectionSize();
+ uint64_t newSize = newDef->sectionSize();
+ useNew = (newSize >= existingSize);
+ break;
+ }
+ case MCR_SameSize: {
+ uint64_t existingSize = existingDef->sectionSize();
+ uint64_t newSize = newDef->sectionSize();
+ if (existingSize == newSize) {
+ useNew = true;
+ break;
+ }
+ llvm::errs() << "Size mismatch: "
+ << existing->name() << " (" << existingSize << ") "
+ << newAtom.name() << " (" << newSize << ")\n";
+ }
+ case MCR_Error:
+ llvm::errs() << "Duplicate symbols: "
+ << existing->name()
+ << ":"
+ << existing->file().path()
+ << " and "
+ << newAtom.name()
+ << ":"
+ << newAtom.file().path()
+ << "\n";
+ llvm::report_fatal_error("duplicate symbol error");
+ break;
+ }
+ break;
+ }
+ case NCR_DupUndef: {
+ const UndefinedAtom* existingUndef = cast<UndefinedAtom>(existing);
+ const UndefinedAtom* newUndef = cast<UndefinedAtom>(&newAtom);
+ bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull());
+ if (sameCanBeNull)
+ useNew = false;
+ else
+ useNew = (newUndef->canBeNull() < existingUndef->canBeNull());
+ break;
+ }
+ case NCR_DupShLib: {
+ useNew = false;
+ break;
+ }
+ case NCR_Error:
+ llvm::errs() << "SymbolTable: error while merging " << name << "\n";
+ llvm::report_fatal_error("duplicate symbol error");
+ break;
+ }
+ if (useNew) {
+ // Update name table to use new atom.
+ _nameTable[name] = &newAtom;
+ // Add existing atom to replacement table.
+ _replacedAtoms[existing] = &newAtom;
+ } else {
+ // New atom is not being used. Add it to replacement table.
+ _replacedAtoms[&newAtom] = existing;
+ }
+ return false;
+unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) {
+ auto content = atom->rawContent();
+ return llvm::hash_combine(atom->size(),
+ atom->contentType(),
+ llvm::hash_combine_range(content.begin(),
+ content.end()));
+bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l,
+ const DefinedAtom * const r) {
+ if (l == r)
+ return true;
+ if (l == getEmptyKey() || r == getEmptyKey())
+ return false;
+ if (l == getTombstoneKey() || r == getTombstoneKey())
+ return false;
+ if (l->contentType() != r->contentType())
+ return false;
+ if (l->size() != r->size())
+ return false;
+ if (l->sectionChoice() != r->sectionChoice())
+ return false;
+ if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) {
+ if (!l->customSectionName().equals(r->customSectionName()))
+ return false;
+ }
+ ArrayRef<uint8_t> lc = l->rawContent();
+ ArrayRef<uint8_t> rc = r->rawContent();
+ return memcmp(lc.data(), rc.data(), lc.size()) == 0;
+bool SymbolTable::addByContent(const DefinedAtom &newAtom) {
+ AtomContentSet::iterator pos = _contentTable.find(&newAtom);
+ if (pos == _contentTable.end()) {
+ _contentTable.insert(&newAtom);
+ return true;
+ }
+ const Atom* existing = *pos;
+ // New atom is not being used. Add it to replacement table.
+ _replacedAtoms[&newAtom] = existing;
+ return false;
+const Atom *SymbolTable::findByName(StringRef sym) {
+ NameToAtom::iterator pos = _nameTable.find(sym);
+ if (pos == _nameTable.end())
+ return nullptr;
+ return pos->second;
+const Atom *SymbolTable::replacement(const Atom *atom) {
+ // Find the replacement for a given atom. Atoms in _replacedAtoms
+ // may be chained, so find the last one.
+ for (;;) {
+ AtomToAtom::iterator pos = _replacedAtoms.find(atom);
+ if (pos == _replacedAtoms.end())
+ return atom;
+ atom = pos->second;
+ }
+bool SymbolTable::isCoalescedAway(const Atom *atom) {
+ return _replacedAtoms.count(atom) > 0;
+std::vector<const UndefinedAtom *> SymbolTable::undefines() {
+ std::vector<const UndefinedAtom *> ret;
+ for (auto it : _nameTable) {
+ const Atom *atom = it.second;
+ assert(atom != nullptr);
+ if (const auto *undef = dyn_cast<const UndefinedAtom>(atom))
+ if (_replacedAtoms.count(undef) == 0)
+ ret.push_back(undef);
+ }
+ return ret;
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Core/Writer.cpp b/contrib/llvm/tools/lld/lib/Core/Writer.cpp
new file mode 100644
index 000000000000..51f95bc5053a
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Core/Writer.cpp
@@ -0,0 +1,18 @@
+//===- lib/Core/Writer.cpp ------------------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Writer.h"
+namespace lld {
+Writer::Writer() = default;
+Writer::~Writer() = default;
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Driver/CMakeLists.txt b/contrib/llvm/tools/lld/lib/Driver/CMakeLists.txt
new file mode 100644
index 000000000000..ff67c282f47e
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Driver/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td)
+tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs)
+ DarwinLdDriver.cpp
+ ${LLD_INCLUDE_DIR}/lld/Driver
+ Option
+ Support
+ lldCommon
+ lldCore
+ lldMachO
+ lldReaderWriter
+ lldYAML
+ )
+add_dependencies(lldDriver DriverOptionsTableGen)
diff --git a/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp b/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp
new file mode 100644
index 000000000000..bbac230df453
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp
@@ -0,0 +1,1230 @@
+//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// Concrete instance of the Driver for darwin's ld.
+#include "lld/Common/Args.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/ArchiveLibraryFile.h"
+#include "lld/Core/Error.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Instrumentation.h"
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/Node.h"
+#include "lld/Core/PassManager.h"
+#include "lld/Core/Resolver.h"
+#include "lld/Core/SharedLibraryFile.h"
+#include "lld/Core/Simple.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+using namespace lld;
+namespace {
+// Create enum with OPT_xxx values for each option in DarwinLdOptions.td
+enum {
+ OPT_##ID,
+#include "DarwinLdOptions.inc"
+#undef OPTION
+// Create prefix string literals used in DarwinLdOptions.td
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "DarwinLdOptions.inc"
+#undef PREFIX
+// Create table mapping all options defined in DarwinLdOptions.td
+static const llvm::opt::OptTable::Info InfoTable[] = {
+ METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \
+#include "DarwinLdOptions.inc"
+#undef OPTION
+// Create OptTable class for parsing actual command line arguments
+class DarwinLdOptTable : public llvm::opt::OptTable {
+ DarwinLdOptTable() : OptTable(InfoTable) {}
+static std::vector<std::unique_ptr<File>>
+makeErrorFile(StringRef path, std::error_code ec) {
+ std::vector<std::unique_ptr<File>> result;
+ result.push_back(llvm::make_unique<ErrorFile>(path, ec));
+ return result;
+static std::vector<std::unique_ptr<File>>
+parseMemberFiles(std::unique_ptr<File> file) {
+ std::vector<std::unique_ptr<File>> members;
+ if (auto *archive = dyn_cast<ArchiveLibraryFile>(file.get())) {
+ if (std::error_code ec = archive->parseAllMembers(members))
+ return makeErrorFile(file->path(), ec);
+ } else {
+ members.push_back(std::move(file));
+ }
+ return members;
+std::vector<std::unique_ptr<File>> loadFile(MachOLinkingContext &ctx,
+ StringRef path, bool wholeArchive,
+ bool upwardDylib) {
+ if (ctx.logInputFiles())
+ message(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = ctx.getMemoryBuffer(path);
+ if (std::error_code ec = mbOrErr.getError())
+ return makeErrorFile(path, ec);
+ ErrorOr<std::unique_ptr<File>> fileOrErr =
+ ctx.registry().loadFile(std::move(mbOrErr.get()));
+ if (std::error_code ec = fileOrErr.getError())
+ return makeErrorFile(path, ec);
+ std::unique_ptr<File> &file = fileOrErr.get();
+ // If file is a dylib, inform LinkingContext about it.
+ if (SharedLibraryFile *shl = dyn_cast<SharedLibraryFile>(file.get())) {
+ if (std::error_code ec = shl->parse())
+ return makeErrorFile(path, ec);
+ ctx.registerDylib(reinterpret_cast<mach_o::MachODylibFile *>(shl),
+ upwardDylib);
+ }
+ if (wholeArchive)
+ return parseMemberFiles(std::move(file));
+ std::vector<std::unique_ptr<File>> files;
+ files.push_back(std::move(file));
+ return files;
+} // end anonymous namespace
+// Test may be running on Windows. Canonicalize the path
+// separator to '/' to get consistent outputs for tests.
+static std::string canonicalizePath(StringRef path) {
+ char sep = llvm::sys::path::get_separator().front();
+ if (sep != '/') {
+ std::string fixedPath = path;
+ std::replace(fixedPath.begin(), fixedPath.end(), sep, '/');
+ return fixedPath;
+ } else {
+ return path;
+ }
+static void addFile(StringRef path, MachOLinkingContext &ctx,
+ bool loadWholeArchive, bool upwardDylib) {
+ std::vector<std::unique_ptr<File>> files =
+ loadFile(ctx, path, loadWholeArchive, upwardDylib);
+ for (std::unique_ptr<File> &file : files)
+ ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file)));
+// Export lists are one symbol per line. Blank lines are ignored.
+// Trailing comments start with #.
+static std::error_code parseExportsList(StringRef exportFilePath,
+ MachOLinkingContext &ctx) {
+ // Map in export list file.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mb =
+ MemoryBuffer::getFileOrSTDIN(exportFilePath);
+ if (std::error_code ec = mb.getError())
+ return ec;
+ ctx.addInputFileDependency(exportFilePath);
+ StringRef buffer = mb->get()->getBuffer();
+ while (!buffer.empty()) {
+ // Split off each line in the file.
+ std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n');
+ StringRef line = lineAndRest.first;
+ // Ignore trailing # comments.
+ std::pair<StringRef, StringRef> symAndComment = line.split('#');
+ StringRef sym = symAndComment.first.trim();
+ if (!sym.empty())
+ ctx.addExportSymbol(sym);
+ buffer = lineAndRest.second;
+ }
+ return std::error_code();
+/// Order files are one symbol per line. Blank lines are ignored.
+/// Trailing comments start with #. Symbol names can be prefixed with an
+/// architecture name and/or .o leaf name. Examples:
+/// _foo
+/// bar.o:_bar
+/// libfrob.a(bar.o):_bar
+/// x86_64:_foo64
+static std::error_code parseOrderFile(StringRef orderFilePath,
+ MachOLinkingContext &ctx) {
+ // Map in order file.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mb =
+ MemoryBuffer::getFileOrSTDIN(orderFilePath);
+ if (std::error_code ec = mb.getError())
+ return ec;
+ ctx.addInputFileDependency(orderFilePath);
+ StringRef buffer = mb->get()->getBuffer();
+ while (!buffer.empty()) {
+ // Split off each line in the file.
+ std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n');
+ StringRef line = lineAndRest.first;
+ buffer = lineAndRest.second;
+ // Ignore trailing # comments.
+ std::pair<StringRef, StringRef> symAndComment = line.split('#');
+ if (symAndComment.first.empty())
+ continue;
+ StringRef sym = symAndComment.first.trim();
+ if (sym.empty())
+ continue;
+ // Check for prefix.
+ StringRef prefix;
+ std::pair<StringRef, StringRef> prefixAndSym = sym.split(':');
+ if (!prefixAndSym.second.empty()) {
+ sym = prefixAndSym.second;
+ prefix = prefixAndSym.first;
+ if (!prefix.endswith(".o") && !prefix.endswith(".o)")) {
+ // If arch name prefix does not match arch being linked, ignore symbol.
+ if (!ctx.archName().equals(prefix))
+ continue;
+ prefix = "";
+ }
+ } else
+ sym = prefixAndSym.first;
+ if (!sym.empty()) {
+ ctx.appendOrderedSymbol(sym, prefix);
+ //llvm::errs() << sym << ", prefix=" << prefix << "\n";
+ }
+ }
+ return std::error_code();
+// There are two variants of the -filelist option:
+// -filelist <path>
+// In this variant, the path is to a text file which contains one file path
+// per line. There are no comments or trimming of whitespace.
+// -fileList <path>,<dir>
+// In this variant, the path is to a text file which contains a partial path
+// per line. The <dir> prefix is prepended to each partial path.
+static llvm::Error loadFileList(StringRef fileListPath,
+ MachOLinkingContext &ctx, bool forceLoad) {
+ // If there is a comma, split off <dir>.
+ std::pair<StringRef, StringRef> opt = fileListPath.split(',');
+ StringRef filePath = opt.first;
+ StringRef dirName = opt.second;
+ ctx.addInputFileDependency(filePath);
+ // Map in file list file.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mb =
+ MemoryBuffer::getFileOrSTDIN(filePath);
+ if (std::error_code ec = mb.getError())
+ return llvm::errorCodeToError(ec);
+ StringRef buffer = mb->get()->getBuffer();
+ while (!buffer.empty()) {
+ // Split off each line in the file.
+ std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n');
+ StringRef line = lineAndRest.first;
+ StringRef path;
+ if (!dirName.empty()) {
+ // If there is a <dir> then prepend dir to each line.
+ SmallString<256> fullPath;
+ fullPath.assign(dirName);
+ llvm::sys::path::append(fullPath, Twine(line));
+ path = ctx.copy(fullPath.str());
+ } else {
+ // No <dir> use whole line as input file path.
+ path = ctx.copy(line);
+ }
+ if (!ctx.pathExists(path)) {
+ return llvm::make_error<GenericError>(Twine("File not found '")
+ + path
+ + "'");
+ }
+ if (ctx.testingFileUsage()) {
+ message("Found filelist entry " + canonicalizePath(path));
+ }
+ addFile(path, ctx, forceLoad, false);
+ buffer = lineAndRest.second;
+ }
+ return llvm::Error::success();
+/// Parse number assuming it is base 16, but allow 0x prefix.
+static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) {
+ if (numStr.startswith_lower("0x"))
+ numStr = numStr.drop_front(2);
+ return numStr.getAsInteger(16, baseAddress);
+static void parseLLVMOptions(const LinkingContext &ctx) {
+ // Honor -mllvm
+ if (!ctx.llvmOptions().empty()) {
+ unsigned numArgs = ctx.llvmOptions().size();
+ auto **args = new const char *[numArgs + 2];
+ args[0] = "lld (LLVM option parsing)";
+ for (unsigned i = 0; i != numArgs; ++i)
+ args[i + 1] = ctx.llvmOptions()[i];
+ args[numArgs + 1] = nullptr;
+ llvm::cl::ParseCommandLineOptions(numArgs + 1, args);
+ }
+namespace lld {
+namespace mach_o {
+bool parse(llvm::ArrayRef<const char *> args, MachOLinkingContext &ctx) {
+ // Parse command line options using DarwinLdOptions.td
+ DarwinLdOptTable table;
+ unsigned missingIndex;
+ unsigned missingCount;
+ llvm::opt::InputArgList parsedArgs =
+ table.ParseArgs(args.slice(1), missingIndex, missingCount);
+ if (missingCount) {
+ error("missing arg value for '" +
+ Twine(parsedArgs.getArgString(missingIndex)) + "' expected " +
+ Twine(missingCount) + " argument(s).");
+ return false;
+ }
+ for (auto unknownArg : parsedArgs.filtered(OPT_UNKNOWN)) {
+ warn("ignoring unknown argument: " +
+ Twine(unknownArg->getAsString(parsedArgs)));
+ }
+ errorHandler().Verbose = parsedArgs.hasArg(OPT_v);
+ errorHandler().ErrorLimit = args::getInteger(parsedArgs, OPT_error_limit, 20);
+ // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static )
+ llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE;
+ bool isStaticExecutable = false;
+ if (llvm::opt::Arg *kind = parsedArgs.getLastArg(
+ OPT_dylib, OPT_relocatable, OPT_bundle, OPT_static, OPT_preload)) {
+ switch (kind->getOption().getID()) {
+ case OPT_dylib:
+ fileType = llvm::MachO::MH_DYLIB;
+ break;
+ case OPT_relocatable:
+ fileType = llvm::MachO::MH_OBJECT;
+ break;
+ case OPT_bundle:
+ fileType = llvm::MachO::MH_BUNDLE;
+ break;
+ case OPT_static:
+ fileType = llvm::MachO::MH_EXECUTE;
+ isStaticExecutable = true;
+ break;
+ case OPT_preload:
+ fileType = llvm::MachO::MH_PRELOAD;
+ break;
+ }
+ }
+ // Handle -arch xxx
+ MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown;
+ if (llvm::opt::Arg *archStr = parsedArgs.getLastArg(OPT_arch)) {
+ arch = MachOLinkingContext::archFromName(archStr->getValue());
+ if (arch == MachOLinkingContext::arch_unknown) {
+ error("unknown arch named '" + Twine(archStr->getValue()) + "'");
+ return false;
+ }
+ }
+ // If no -arch specified, scan input files to find first non-fat .o file.
+ if (arch == MachOLinkingContext::arch_unknown) {
+ for (auto &inFile : parsedArgs.filtered(OPT_INPUT)) {
+ // This is expensive because it opens and maps the file. But that is
+ // ok because no -arch is rare.
+ if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch))
+ break;
+ }
+ if (arch == MachOLinkingContext::arch_unknown &&
+ !parsedArgs.getLastArg(OPT_test_file_usage)) {
+ // If no -arch and no options at all, print usage message.
+ if (parsedArgs.size() == 0) {
+ table.PrintHelp(llvm::outs(),
+ (std::string(args[0]) + " [options] file...").c_str(),
+ "LLVM Linker", false);
+ } else {
+ error("-arch not specified and could not be inferred");
+ }
+ return false;
+ }
+ }
+ // Handle -macosx_version_min or -ios_version_min
+ MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown;
+ uint32_t minOSVersion = 0;
+ if (llvm::opt::Arg *minOS =
+ parsedArgs.getLastArg(OPT_macosx_version_min, OPT_ios_version_min,
+ OPT_ios_simulator_version_min)) {
+ switch (minOS->getOption().getID()) {
+ case OPT_macosx_version_min:
+ os = MachOLinkingContext::OS::macOSX;
+ if (MachOLinkingContext::parsePackedVersion(minOS->getValue(),
+ minOSVersion)) {
+ error("malformed macosx_version_min value");
+ return false;
+ }
+ break;
+ case OPT_ios_version_min:
+ os = MachOLinkingContext::OS::iOS;
+ if (MachOLinkingContext::parsePackedVersion(minOS->getValue(),
+ minOSVersion)) {
+ error("malformed ios_version_min value");
+ return false;
+ }
+ break;
+ case OPT_ios_simulator_version_min:
+ os = MachOLinkingContext::OS::iOS_simulator;
+ if (MachOLinkingContext::parsePackedVersion(minOS->getValue(),
+ minOSVersion)) {
+ error("malformed ios_simulator_version_min value");
+ return false;
+ }
+ break;
+ }
+ } else {
+ // No min-os version on command line, check environment variables
+ }
+ // Handle export_dynamic
+ // FIXME: Should we warn when this applies to something other than a static
+ // executable or dylib? Those are the only cases where this has an effect.
+ // Note, this has to come before ctx.configure() so that we get the correct
+ // value for _globalsAreDeadStripRoots.
+ bool exportDynamicSymbols = parsedArgs.hasArg(OPT_export_dynamic);
+ // Now that there's enough information parsed in, let the linking context
+ // set up default values.
+ ctx.configure(fileType, arch, os, minOSVersion, exportDynamicSymbols);
+ // Handle -e xxx
+ if (llvm::opt::Arg *entry = parsedArgs.getLastArg(OPT_entry))
+ ctx.setEntrySymbolName(entry->getValue());
+ // Handle -o xxx
+ if (llvm::opt::Arg *outpath = parsedArgs.getLastArg(OPT_output))
+ ctx.setOutputPath(outpath->getValue());
+ else
+ ctx.setOutputPath("a.out");
+ // Handle -image_base XXX and -seg1addr XXXX
+ if (llvm::opt::Arg *imageBase = parsedArgs.getLastArg(OPT_image_base)) {
+ uint64_t baseAddress;
+ if (parseNumberBase16(imageBase->getValue(), baseAddress)) {
+ error("image_base expects a hex number");
+ return false;
+ } else if (baseAddress < ctx.pageZeroSize()) {
+ error("image_base overlaps with __PAGEZERO");
+ return false;
+ } else if (baseAddress % ctx.pageSize()) {
+ error("image_base must be a multiple of page size (0x" +
+ llvm::utohexstr(ctx.pageSize()) + ")");
+ return false;
+ }
+ ctx.setBaseAddress(baseAddress);
+ }
+ // Handle -dead_strip
+ if (parsedArgs.getLastArg(OPT_dead_strip))
+ ctx.setDeadStripping(true);
+ bool globalWholeArchive = false;
+ // Handle -all_load
+ if (parsedArgs.getLastArg(OPT_all_load))
+ globalWholeArchive = true;
+ // Handle -install_name
+ if (llvm::opt::Arg *installName = parsedArgs.getLastArg(OPT_install_name))
+ ctx.setInstallName(installName->getValue());
+ else
+ ctx.setInstallName(ctx.outputPath());
+ // Handle -mark_dead_strippable_dylib
+ if (parsedArgs.getLastArg(OPT_mark_dead_strippable_dylib))
+ ctx.setDeadStrippableDylib(true);
+ // Handle -compatibility_version and -current_version
+ if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_compatibility_version)) {
+ if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) {
+ error("-compatibility_version can only be used with -dylib");
+ return false;
+ }
+ uint32_t parsedVers;
+ if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) {
+ error("-compatibility_version value is malformed");
+ return false;
+ }
+ ctx.setCompatibilityVersion(parsedVers);
+ }
+ if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_current_version)) {
+ if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) {
+ error("-current_version can only be used with -dylib");
+ return false;
+ }
+ uint32_t parsedVers;
+ if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) {
+ error("-current_version value is malformed");
+ return false;
+ }
+ ctx.setCurrentVersion(parsedVers);
+ }
+ // Handle -bundle_loader
+ if (llvm::opt::Arg *loader = parsedArgs.getLastArg(OPT_bundle_loader))
+ ctx.setBundleLoader(loader->getValue());
+ // Handle -sectalign segname sectname align
+ for (auto &alignArg : parsedArgs.filtered(OPT_sectalign)) {
+ const char* segName = alignArg->getValue(0);
+ const char* sectName = alignArg->getValue(1);
+ const char* alignStr = alignArg->getValue(2);
+ if ((alignStr[0] == '0') && (alignStr[1] == 'x'))
+ alignStr += 2;
+ unsigned long long alignValue;
+ if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) {
+ error("-sectalign alignment value '" + Twine(alignStr) +
+ "' not a valid number");
+ return false;
+ }
+ uint16_t align = 1 << llvm::countTrailingZeros(alignValue);
+ if (!llvm::isPowerOf2_64(alignValue)) {
+ std::string Msg;
+ llvm::raw_string_ostream OS(Msg);
+ OS << "alignment for '-sectalign " << segName << " " << sectName
+ << llvm::format(" 0x%llX", alignValue)
+ << "' is not a power of two, using " << llvm::format("0x%08X", align);
+ OS.flush();
+ warn(Msg);
+ }
+ ctx.addSectionAlignment(segName, sectName, align);
+ }
+ // Handle -mllvm
+ for (auto &llvmArg : parsedArgs.filtered(OPT_mllvm)) {
+ ctx.appendLLVMOption(llvmArg->getValue());
+ }
+ // Handle -print_atoms
+ if (parsedArgs.getLastArg(OPT_print_atoms))
+ ctx.setPrintAtoms();
+ // Handle -t (trace) option.
+ if (parsedArgs.getLastArg(OPT_t))
+ ctx.setLogInputFiles(true);
+ // Handle -demangle option.
+ if (parsedArgs.getLastArg(OPT_demangle))
+ ctx.setDemangleSymbols(true);
+ // Handle -keep_private_externs
+ if (parsedArgs.getLastArg(OPT_keep_private_externs)) {
+ ctx.setKeepPrivateExterns(true);
+ if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT)
+ warn("-keep_private_externs only used in -r mode");
+ }
+ // Handle -dependency_info <path> used by Xcode.
+ if (llvm::opt::Arg *depInfo = parsedArgs.getLastArg(OPT_dependency_info))
+ if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue()))
+ warn(ec.message() + ", processing '-dependency_info " +
+ depInfo->getValue());
+ // In -test_file_usage mode, we'll be given an explicit list of paths that
+ // exist. We'll also be expected to print out information about how we located
+ // libraries and so on that the user specified, but not to actually do any
+ // linking.
+ if (parsedArgs.getLastArg(OPT_test_file_usage)) {
+ ctx.setTestingFileUsage();
+ // With paths existing by fiat, linking is not going to end well.
+ ctx.setDoNothing(true);
+ // Only bother looking for an existence override if we're going to use it.
+ for (auto existingPath : parsedArgs.filtered(OPT_path_exists)) {
+ ctx.addExistingPathForDebug(existingPath->getValue());
+ }
+ }
+ // Register possible input file parsers.
+ if (!ctx.doNothing()) {
+ ctx.registry().addSupportMachOObjects(ctx);
+ ctx.registry().addSupportArchives(ctx.logInputFiles());
+ ctx.registry().addSupportYamlFiles();
+ }
+ // Now construct the set of library search directories, following ld64's
+ // baroque set of accumulated hacks. Mostly, the algorithm constructs
+ // { syslibroots } x { libpaths }
+ //
+ // Unfortunately, there are numerous exceptions:
+ // 1. Only absolute paths get modified by syslibroot options.
+ // 2. If there is just 1 -syslibroot, system paths not found in it are
+ // skipped.
+ // 3. If the last -syslibroot is "/", all of them are ignored entirely.
+ // 4. If { syslibroots } x path == {}, the original path is kept.
+ std::vector<StringRef> sysLibRoots;
+ for (auto syslibRoot : parsedArgs.filtered(OPT_syslibroot)) {
+ sysLibRoots.push_back(syslibRoot->getValue());
+ }
+ if (!sysLibRoots.empty()) {
+ // Ignore all if last -syslibroot is "/".
+ if (sysLibRoots.back() != "/")
+ ctx.setSysLibRoots(sysLibRoots);
+ }
+ // Paths specified with -L come first, and are not considered system paths for
+ // the case where there is precisely 1 -syslibroot.
+ for (auto libPath : parsedArgs.filtered(OPT_L)) {
+ ctx.addModifiedSearchDir(libPath->getValue());
+ }
+ // Process -F directories (where to look for frameworks).
+ for (auto fwPath : parsedArgs.filtered(OPT_F)) {
+ ctx.addFrameworkSearchDir(fwPath->getValue());
+ }
+ // -Z suppresses the standard search paths.
+ if (!parsedArgs.hasArg(OPT_Z)) {
+ ctx.addModifiedSearchDir("/usr/lib", true);
+ ctx.addModifiedSearchDir("/usr/local/lib", true);
+ ctx.addFrameworkSearchDir("/Library/Frameworks", true);
+ ctx.addFrameworkSearchDir("/System/Library/Frameworks", true);
+ }
+ // Now that we've constructed the final set of search paths, print out those
+ // search paths in verbose mode.
+ if (errorHandler().Verbose) {
+ message("Library search paths:");
+ for (auto path : ctx.searchDirs()) {
+ message(" " + path);
+ }
+ message("Framework search paths:");
+ for (auto path : ctx.frameworkDirs()) {
+ message(" " + path);
+ }
+ }
+ // Handle -exported_symbols_list <file>
+ for (auto expFile : parsedArgs.filtered(OPT_exported_symbols_list)) {
+ if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) {
+ error("-exported_symbols_list cannot be combined with "
+ "-unexported_symbol[s_list]");
+ return false;
+ }
+ ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList);
+ if (std::error_code ec = parseExportsList(expFile->getValue(), ctx)) {
+ error(ec.message() + ", processing '-exported_symbols_list " +
+ expFile->getValue());
+ return false;
+ }
+ }
+ // Handle -exported_symbol <symbol>
+ for (auto symbol : parsedArgs.filtered(OPT_exported_symbol)) {
+ if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) {
+ error("-exported_symbol cannot be combined with "
+ "-unexported_symbol[s_list]");
+ return false;
+ }
+ ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList);
+ ctx.addExportSymbol(symbol->getValue());
+ }
+ // Handle -unexported_symbols_list <file>
+ for (auto expFile : parsedArgs.filtered(OPT_unexported_symbols_list)) {
+ if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) {
+ error("-unexported_symbols_list cannot be combined with "
+ "-exported_symbol[s_list]");
+ return false;
+ }
+ ctx.setExportMode(MachOLinkingContext::ExportMode::blackList);
+ if (std::error_code ec = parseExportsList(expFile->getValue(), ctx)) {
+ error(ec.message() + ", processing '-unexported_symbols_list " +
+ expFile->getValue());
+ return false;
+ }
+ }
+ // Handle -unexported_symbol <symbol>
+ for (auto symbol : parsedArgs.filtered(OPT_unexported_symbol)) {
+ if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) {
+ error("-unexported_symbol cannot be combined with "
+ "-exported_symbol[s_list]");
+ return false;
+ }
+ ctx.setExportMode(MachOLinkingContext::ExportMode::blackList);
+ ctx.addExportSymbol(symbol->getValue());
+ }
+ // Handle obosolete -multi_module and -single_module
+ if (llvm::opt::Arg *mod =
+ parsedArgs.getLastArg(OPT_multi_module, OPT_single_module)) {
+ if (mod->getOption().getID() == OPT_multi_module)
+ warn("-multi_module is obsolete and being ignored");
+ else if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB)
+ warn("-single_module being ignored. It is only for use when producing a "
+ "dylib");
+ }
+ // Handle obsolete ObjC options: -objc_gc_compaction, -objc_gc, -objc_gc_only
+ if (parsedArgs.getLastArg(OPT_objc_gc_compaction)) {
+ error("-objc_gc_compaction is not supported");
+ return false;
+ }
+ if (parsedArgs.getLastArg(OPT_objc_gc)) {
+ error("-objc_gc is not supported");
+ return false;
+ }
+ if (parsedArgs.getLastArg(OPT_objc_gc_only)) {
+ error("-objc_gc_only is not supported");
+ return false;
+ }
+ // Handle -pie or -no_pie
+ if (llvm::opt::Arg *pie = parsedArgs.getLastArg(OPT_pie, OPT_no_pie)) {
+ switch (ctx.outputMachOType()) {
+ case llvm::MachO::MH_EXECUTE:
+ switch (ctx.os()) {
+ case MachOLinkingContext::OS::macOSX:
+ if ((minOSVersion < 0x000A0500) &&
+ (pie->getOption().getID() == OPT_pie)) {
+ error("-pie can only be used when targeting Mac OS X 10.5 or later");
+ return false;
+ }
+ break;
+ case MachOLinkingContext::OS::iOS:
+ if ((minOSVersion < 0x00040200) &&
+ (pie->getOption().getID() == OPT_pie)) {
+ error("-pie can only be used when targeting iOS 4.2 or later");
+ return false;
+ }
+ break;
+ case MachOLinkingContext::OS::iOS_simulator:
+ if (pie->getOption().getID() == OPT_no_pie) {
+ error("iOS simulator programs must be built PIE");
+ return false;
+ }
+ break;
+ case MachOLinkingContext::OS::unknown:
+ break;
+ }
+ ctx.setPIE(pie->getOption().getID() == OPT_pie);
+ break;
+ case llvm::MachO::MH_PRELOAD:
+ break;
+ case llvm::MachO::MH_DYLIB:
+ case llvm::MachO::MH_BUNDLE:
+ warn(pie->getSpelling() +
+ " being ignored. It is only used when linking main executables");
+ break;
+ default:
+ error(pie->getSpelling() +
+ " can only used when linking main executables");
+ return false;
+ }
+ }
+ // Handle -version_load_command or -no_version_load_command
+ {
+ bool flagOn = false;
+ bool flagOff = false;
+ if (auto *arg = parsedArgs.getLastArg(OPT_version_load_command,
+ OPT_no_version_load_command)) {
+ flagOn = arg->getOption().getID() == OPT_version_load_command;
+ flagOff = arg->getOption().getID() == OPT_no_version_load_command;
+ }
+ // default to adding version load command for dynamic code,
+ // static code must opt-in
+ switch (ctx.outputMachOType()) {
+ case llvm::MachO::MH_OBJECT:
+ ctx.setGenerateVersionLoadCommand(false);
+ break;
+ case llvm::MachO::MH_EXECUTE:
+ // dynamic executables default to generating a version load command,
+ // while static exectuables only generate it if required.
+ if (isStaticExecutable) {
+ if (flagOn)
+ ctx.setGenerateVersionLoadCommand(true);
+ } else {
+ if (!flagOff)
+ ctx.setGenerateVersionLoadCommand(true);
+ }
+ break;
+ case llvm::MachO::MH_PRELOAD:
+ case llvm::MachO::MH_KEXT_BUNDLE:
+ if (flagOn)
+ ctx.setGenerateVersionLoadCommand(true);
+ break;
+ case llvm::MachO::MH_DYLINKER:
+ case llvm::MachO::MH_DYLIB:
+ case llvm::MachO::MH_BUNDLE:
+ if (!flagOff)
+ ctx.setGenerateVersionLoadCommand(true);
+ break;
+ case llvm::MachO::MH_FVMLIB:
+ case llvm::MachO::MH_DYLDLINK:
+ case llvm::MachO::MH_DYLIB_STUB:
+ case llvm::MachO::MH_DSYM:
+ // We don't generate load commands for these file types, even if
+ // forced on.
+ break;
+ }
+ }
+ // Handle -function_starts or -no_function_starts
+ {
+ bool flagOn = false;
+ bool flagOff = false;
+ if (auto *arg = parsedArgs.getLastArg(OPT_function_starts,
+ OPT_no_function_starts)) {
+ flagOn = arg->getOption().getID() == OPT_function_starts;
+ flagOff = arg->getOption().getID() == OPT_no_function_starts;
+ }
+ // default to adding functions start for dynamic code, static code must
+ // opt-in
+ switch (ctx.outputMachOType()) {
+ case llvm::MachO::MH_OBJECT:
+ ctx.setGenerateFunctionStartsLoadCommand(false);
+ break;
+ case llvm::MachO::MH_EXECUTE:
+ // dynamic executables default to generating a version load command,
+ // while static exectuables only generate it if required.
+ if (isStaticExecutable) {
+ if (flagOn)
+ ctx.setGenerateFunctionStartsLoadCommand(true);
+ } else {
+ if (!flagOff)
+ ctx.setGenerateFunctionStartsLoadCommand(true);
+ }
+ break;
+ case llvm::MachO::MH_PRELOAD:
+ case llvm::MachO::MH_KEXT_BUNDLE:
+ if (flagOn)
+ ctx.setGenerateFunctionStartsLoadCommand(true);
+ break;
+ case llvm::MachO::MH_DYLINKER:
+ case llvm::MachO::MH_DYLIB:
+ case llvm::MachO::MH_BUNDLE:
+ if (!flagOff)
+ ctx.setGenerateFunctionStartsLoadCommand(true);
+ break;
+ case llvm::MachO::MH_FVMLIB:
+ case llvm::MachO::MH_DYLDLINK:
+ case llvm::MachO::MH_DYLIB_STUB:
+ case llvm::MachO::MH_DSYM:
+ // We don't generate load commands for these file types, even if
+ // forced on.
+ break;
+ }
+ }
+ // Handle -data_in_code_info or -no_data_in_code_info
+ {
+ bool flagOn = false;
+ bool flagOff = false;
+ if (auto *arg = parsedArgs.getLastArg(OPT_data_in_code_info,
+ OPT_no_data_in_code_info)) {
+ flagOn = arg->getOption().getID() == OPT_data_in_code_info;
+ flagOff = arg->getOption().getID() == OPT_no_data_in_code_info;
+ }
+ // default to adding data in code for dynamic code, static code must
+ // opt-in
+ switch (ctx.outputMachOType()) {
+ case llvm::MachO::MH_OBJECT:
+ if (!flagOff)
+ ctx.setGenerateDataInCodeLoadCommand(true);
+ break;
+ case llvm::MachO::MH_EXECUTE:
+ // dynamic executables default to generating a version load command,
+ // while static exectuables only generate it if required.
+ if (isStaticExecutable) {
+ if (flagOn)
+ ctx.setGenerateDataInCodeLoadCommand(true);
+ } else {
+ if (!flagOff)
+ ctx.setGenerateDataInCodeLoadCommand(true);
+ }
+ break;
+ case llvm::MachO::MH_PRELOAD:
+ case llvm::MachO::MH_KEXT_BUNDLE:
+ if (flagOn)
+ ctx.setGenerateDataInCodeLoadCommand(true);
+ break;
+ case llvm::MachO::MH_DYLINKER:
+ case llvm::MachO::MH_DYLIB:
+ case llvm::MachO::MH_BUNDLE:
+ if (!flagOff)
+ ctx.setGenerateDataInCodeLoadCommand(true);
+ break;
+ case llvm::MachO::MH_FVMLIB:
+ case llvm::MachO::MH_DYLDLINK:
+ case llvm::MachO::MH_DYLIB_STUB:
+ case llvm::MachO::MH_DSYM:
+ // We don't generate load commands for these file types, even if
+ // forced on.
+ break;
+ }
+ }
+ // Handle sdk_version
+ if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) {
+ uint32_t sdkVersion = 0;
+ if (MachOLinkingContext::parsePackedVersion(arg->getValue(),
+ sdkVersion)) {
+ error("malformed sdkVersion value");
+ return false;
+ }
+ ctx.setSdkVersion(sdkVersion);
+ } else if (ctx.generateVersionLoadCommand()) {
+ // If we don't have an sdk version, but were going to emit a load command
+ // with min_version, then we need to give an warning as we have no sdk
+ // version to put in that command.
+ // FIXME: We need to decide whether to make this an error.
+ warn("-sdk_version is required when emitting min version load command. "
+ "Setting sdk version to match provided min version");
+ ctx.setSdkVersion(ctx.osMinVersion());
+ }
+ // Handle source_version
+ if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_source_version)) {
+ uint64_t version = 0;
+ if (MachOLinkingContext::parsePackedVersion(arg->getValue(),
+ version)) {
+ error("malformed source_version value");
+ return false;
+ }
+ ctx.setSourceVersion(version);
+ }
+ // Handle stack_size
+ if (llvm::opt::Arg *stackSize = parsedArgs.getLastArg(OPT_stack_size)) {
+ uint64_t stackSizeVal;
+ if (parseNumberBase16(stackSize->getValue(), stackSizeVal)) {
+ error("stack_size expects a hex number");
+ return false;
+ }
+ if ((stackSizeVal % ctx.pageSize()) != 0) {
+ error("stack_size must be a multiple of page size (0x" +
+ llvm::utohexstr(ctx.pageSize()) + ")");
+ return false;
+ }
+ ctx.setStackSize(stackSizeVal);
+ }
+ // Handle debug info handling options: -S
+ if (parsedArgs.hasArg(OPT_S))
+ ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap);
+ // Handle -order_file <file>
+ for (auto orderFile : parsedArgs.filtered(OPT_order_file)) {
+ if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx)) {
+ error(ec.message() + ", processing '-order_file " + orderFile->getValue()
+ + "'");
+ return false;
+ }
+ }
+ // Handle -flat_namespace.
+ if (llvm::opt::Arg *ns =
+ parsedArgs.getLastArg(OPT_flat_namespace, OPT_twolevel_namespace)) {
+ if (ns->getOption().getID() == OPT_flat_namespace)
+ ctx.setUseFlatNamespace(true);
+ }
+ // Handle -undefined
+ if (llvm::opt::Arg *undef = parsedArgs.getLastArg(OPT_undefined)) {
+ MachOLinkingContext::UndefinedMode UndefMode;
+ if (StringRef(undef->getValue()).equals("error"))
+ UndefMode = MachOLinkingContext::UndefinedMode::error;
+ else if (StringRef(undef->getValue()).equals("warning"))
+ UndefMode = MachOLinkingContext::UndefinedMode::warning;
+ else if (StringRef(undef->getValue()).equals("suppress"))
+ UndefMode = MachOLinkingContext::UndefinedMode::suppress;
+ else if (StringRef(undef->getValue()).equals("dynamic_lookup"))
+ UndefMode = MachOLinkingContext::UndefinedMode::dynamicLookup;
+ else {
+ error("invalid option to -undefined [ warning | error | suppress | "
+ "dynamic_lookup ]");
+ return false;
+ }
+ if (ctx.useFlatNamespace()) {
+ // If we're using -flat_namespace then 'warning', 'suppress' and
+ // 'dynamic_lookup' are all equivalent, so map them to 'suppress'.
+ if (UndefMode != MachOLinkingContext::UndefinedMode::error)
+ UndefMode = MachOLinkingContext::UndefinedMode::suppress;
+ } else {
+ // If we're using -twolevel_namespace then 'warning' and 'suppress' are
+ // illegal. Emit a diagnostic if they've been (mis)used.
+ if (UndefMode == MachOLinkingContext::UndefinedMode::warning ||
+ UndefMode == MachOLinkingContext::UndefinedMode::suppress) {
+ error("can't use -undefined warning or suppress with "
+ "-twolevel_namespace");
+ return false;
+ }
+ }
+ ctx.setUndefinedMode(UndefMode);
+ }
+ // Handle -no_objc_category_merging.
+ if (parsedArgs.getLastArg(OPT_no_objc_category_merging))
+ ctx.setMergeObjCCategories(false);
+ // Handle -rpath <path>
+ if (parsedArgs.hasArg(OPT_rpath)) {
+ switch (ctx.outputMachOType()) {
+ case llvm::MachO::MH_EXECUTE:
+ case llvm::MachO::MH_DYLIB:
+ case llvm::MachO::MH_BUNDLE:
+ if (!ctx.minOS("10.5", "2.0")) {
+ if (ctx.os() == MachOLinkingContext::OS::macOSX)
+ error("-rpath can only be used when targeting OS X 10.5 or later");
+ else
+ error("-rpath can only be used when targeting iOS 2.0 or later");
+ return false;
+ }
+ break;
+ default:
+ error("-rpath can only be used when creating a dynamic final linked "
+ "image");
+ return false;
+ }
+ for (auto rPath : parsedArgs.filtered(OPT_rpath)) {
+ ctx.addRpath(rPath->getValue());
+ }
+ }
+ // Parse the LLVM options before we process files in case the file handling
+ // makes use of things like LLVM_DEBUG().
+ parseLLVMOptions(ctx);
+ // Handle input files and sectcreate.
+ for (auto &arg : parsedArgs) {
+ bool upward;
+ llvm::Optional<StringRef> resolvedPath;
+ switch (arg->getOption().getID()) {
+ default:
+ continue;
+ case OPT_INPUT:
+ addFile(arg->getValue(), ctx, globalWholeArchive, false);
+ break;
+ case OPT_upward_library:
+ addFile(arg->getValue(), ctx, false, true);
+ break;
+ case OPT_force_load:
+ addFile(arg->getValue(), ctx, true, false);
+ break;
+ case OPT_l:
+ case OPT_upward_l:
+ upward = (arg->getOption().getID() == OPT_upward_l);
+ resolvedPath = ctx.searchLibrary(arg->getValue());
+ if (!resolvedPath) {
+ error("Unable to find library for " + arg->getSpelling() +
+ arg->getValue());
+ return false;
+ } else if (ctx.testingFileUsage()) {
+ message(Twine("Found ") + (upward ? "upward " : " ") + "library " +
+ canonicalizePath(resolvedPath.getValue()));
+ }
+ addFile(resolvedPath.getValue(), ctx, globalWholeArchive, upward);
+ break;
+ case OPT_framework:
+ case OPT_upward_framework:
+ upward = (arg->getOption().getID() == OPT_upward_framework);
+ resolvedPath = ctx.findPathForFramework(arg->getValue());
+ if (!resolvedPath) {
+ error("Unable to find framework for " + arg->getSpelling() + " " +
+ arg->getValue());
+ return false;
+ } else if (ctx.testingFileUsage()) {
+ message(Twine("Found ") + (upward ? "upward " : " ") + "framework " +
+ canonicalizePath(resolvedPath.getValue()));
+ }
+ addFile(resolvedPath.getValue(), ctx, globalWholeArchive, upward);
+ break;
+ case OPT_filelist:
+ if (auto ec = loadFileList(arg->getValue(), ctx, globalWholeArchive)) {
+ handleAllErrors(std::move(ec), [&](const llvm::ErrorInfoBase &EI) {
+ error(EI.message() + ", processing '-filelist " + arg->getValue());
+ });
+ return false;
+ }
+ break;
+ case OPT_sectcreate: {
+ const char* seg = arg->getValue(0);
+ const char* sect = arg->getValue(1);
+ const char* fileName = arg->getValue(2);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> contentOrErr =
+ MemoryBuffer::getFile(fileName);
+ if (!contentOrErr) {
+ error("can't open -sectcreate file " + Twine(fileName));
+ return false;
+ }
+ ctx.addSectCreateSection(seg, sect, std::move(*contentOrErr));
+ }
+ break;
+ }
+ }
+ if (ctx.getNodes().empty()) {
+ error("No input files");
+ return false;
+ }
+ // Validate the combination of options used.
+ return ctx.validate();
+static void createFiles(MachOLinkingContext &ctx, bool Implicit) {
+ std::vector<std::unique_ptr<File>> Files;
+ if (Implicit)
+ ctx.createImplicitFiles(Files);
+ else
+ ctx.createInternalFiles(Files);
+ for (auto i = Files.rbegin(), e = Files.rend(); i != e; ++i) {
+ auto &members = ctx.getNodes();
+ members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i)));
+ }
+/// This is where the link is actually performed.
+bool link(llvm::ArrayRef<const char *> args, bool CanExitEarly,
+ raw_ostream &Error) {
+ errorHandler().LogName = args::getFilenameWithoutExe(args[0]);
+ errorHandler().ErrorLimitExceededMsg =
+ "too many errors emitted, stopping now (use "
+ "'-error-limit 0' to see all errors)";
+ errorHandler().ErrorOS = &Error;
+ errorHandler().ExitEarly = CanExitEarly;
+ errorHandler().ColorDiagnostics = Error.has_colors();
+ MachOLinkingContext ctx;
+ if (!parse(args, ctx))
+ return false;
+ if (ctx.doNothing())
+ return true;
+ if (ctx.getNodes().empty())
+ return false;
+ for (std::unique_ptr<Node> &ie : ctx.getNodes())
+ if (FileNode *node = dyn_cast<FileNode>(ie.get()))
+ node->getFile()->parse();
+ createFiles(ctx, false /* Implicit */);
+ // Give target a chance to add files
+ createFiles(ctx, true /* Implicit */);
+ // Give target a chance to postprocess input files.
+ // Mach-O uses this chance to move all object files before library files.
+ ctx.finalizeInputFiles();
+ // Do core linking.
+ ScopedTask resolveTask(getDefaultDomain(), "Resolve");
+ Resolver resolver(ctx);
+ if (!resolver.resolve())
+ return false;
+ SimpleFile *merged = nullptr;
+ {
+ std::unique_ptr<SimpleFile> mergedFile = resolver.resultFile();
+ merged = mergedFile.get();
+ auto &members = ctx.getNodes();
+ members.insert(members.begin(),
+ llvm::make_unique<FileNode>(std::move(mergedFile)));
+ }
+ resolveTask.end();
+ // Run passes on linked atoms.
+ ScopedTask passTask(getDefaultDomain(), "Passes");
+ PassManager pm;
+ ctx.addPasses(pm);
+ if (auto ec = pm.runOnFile(*merged)) {
+ // FIXME: This should be passed to logAllUnhandledErrors but it needs
+ // to be passed a Twine instead of a string.
+ *errorHandler().ErrorOS << "Failed to run passes on file '"
+ << ctx.outputPath() << "': ";
+ logAllUnhandledErrors(std::move(ec), *errorHandler().ErrorOS,
+ std::string());
+ return false;
+ }
+ passTask.end();
+ // Give linked atoms to Writer to generate output file.
+ ScopedTask writeTask(getDefaultDomain(), "Write");
+ if (auto ec = ctx.writeFile(*merged)) {
+ // FIXME: This should be passed to logAllUnhandledErrors but it needs
+ // to be passed a Twine instead of a string.
+ *errorHandler().ErrorOS << "Failed to write file '" << ctx.outputPath()
+ << "': ";
+ logAllUnhandledErrors(std::move(ec), *errorHandler().ErrorOS,
+ std::string());
+ return false;
+ }
+ // Call exit() if we can to avoid calling destructors.
+ if (CanExitEarly)
+ exitLld(errorCount() ? 1 : 0);
+ return true;
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/Driver/DarwinLdOptions.td b/contrib/llvm/tools/lld/lib/Driver/DarwinLdOptions.td
new file mode 100644
index 000000000000..3bbde8bf1c1c
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/Driver/DarwinLdOptions.td
@@ -0,0 +1,250 @@
+include "llvm/Option/OptParser.td"
+// output kinds
+def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">;
+def relocatable : Flag<["-"], "r">,
+ HelpText<"Create relocatable object file">, Group<grp_kind>;
+def static : Flag<["-"], "static">,
+ HelpText<"Create static executable">, Group<grp_kind>;
+def dynamic : Flag<["-"], "dynamic">,
+ HelpText<"Create dynamic executable (default)">,Group<grp_kind>;
+def dylib : Flag<["-"], "dylib">,
+ HelpText<"Create dynamic library">, Group<grp_kind>;
+def bundle : Flag<["-"], "bundle">,
+ HelpText<"Create dynamic bundle">, Group<grp_kind>;
+def execute : Flag<["-"], "execute">,
+ HelpText<"Create main executable (default)">, Group<grp_kind>;
+def preload : Flag<["-"], "preload">,
+ HelpText<"Create binary for use with embedded systems">, Group<grp_kind>;
+// optimizations
+def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">;
+def dead_strip : Flag<["-"], "dead_strip">,
+ HelpText<"Remove unreference code and data">, Group<grp_opts>;
+def macosx_version_min : Separate<["-"], "macosx_version_min">,
+ MetaVarName<"<version>">,
+ HelpText<"Minimum Mac OS X version">, Group<grp_opts>;
+def ios_version_min : Separate<["-"], "ios_version_min">,
+ MetaVarName<"<version>">,
+ HelpText<"Minimum iOS version">, Group<grp_opts>;
+def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">,
+ Alias<ios_version_min>;
+def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">,
+ MetaVarName<"<version>">,
+ HelpText<"Minimum iOS simulator version">, Group<grp_opts>;
+def sdk_version : Separate<["-"], "sdk_version">,
+ MetaVarName<"<version>">,
+ HelpText<"SDK version">, Group<grp_opts>;
+def source_version : Separate<["-"], "source_version">,
+ MetaVarName<"<version>">,
+ HelpText<"Source version">, Group<grp_opts>;
+def version_load_command : Flag<["-"], "version_load_command">,
+ HelpText<"Force generation of a version load command">, Group<grp_opts>;
+def no_version_load_command : Flag<["-"], "no_version_load_command">,
+ HelpText<"Disable generation of a version load command">, Group<grp_opts>;
+def function_starts : Flag<["-"], "function_starts">,
+ HelpText<"Force generation of a function starts load command">,
+ Group<grp_opts>;
+def no_function_starts : Flag<["-"], "no_function_starts">,
+ HelpText<"Disable generation of a function starts load command">,
+ Group<grp_opts>;
+def data_in_code_info : Flag<["-"], "data_in_code_info">,
+ HelpText<"Force generation of a data in code load command">,
+ Group<grp_opts>;
+def no_data_in_code_info : Flag<["-"], "no_data_in_code_info">,
+ HelpText<"Disable generation of a data in code load command">,
+ Group<grp_opts>;
+def mllvm : Separate<["-"], "mllvm">,
+ MetaVarName<"<option>">,
+ HelpText<"Options to pass to LLVM during LTO">, Group<grp_opts>;
+def exported_symbols_list : Separate<["-"], "exported_symbols_list">,
+ MetaVarName<"<file-path>">,
+ HelpText<"Restricts which symbols will be exported">, Group<grp_opts>;
+def exported_symbol : Separate<["-"], "exported_symbol">,
+ MetaVarName<"<symbol>">,
+ HelpText<"Restricts which symbols will be exported">, Group<grp_opts>;
+def unexported_symbols_list : Separate<["-"], "unexported_symbols_list">,
+ MetaVarName<"<file-path>">,
+ HelpText<"Lists symbols that should not be exported">, Group<grp_opts>;
+def unexported_symbol : Separate<["-"], "unexported_symbol">,
+ MetaVarName<"<symbol>">,
+ HelpText<"A symbol which should not be exported">, Group<grp_opts>;
+def keep_private_externs : Flag<["-"], "keep_private_externs">,
+ HelpText<"Private extern (hidden) symbols should not be transformed "
+ "into local symbols">, Group<grp_opts>;
+def order_file : Separate<["-"], "order_file">,
+ MetaVarName<"<file-path>">,
+ HelpText<"re-order and move specified symbols to start of their section">,
+ Group<grp_opts>;
+def flat_namespace : Flag<["-"], "flat_namespace">,
+ HelpText<"Resolves symbols in any (transitively) linked dynamic libraries. "
+ "Source libraries are not recorded: dyld will re-search all "
+ "images at runtime and use the first definition found.">,
+ Group<grp_opts>;
+def twolevel_namespace : Flag<["-"], "twolevel_namespace">,
+ HelpText<"Resolves symbols in listed libraries only. Source libraries are "
+ "recorded in the symbol table.">,
+ Group<grp_opts>;
+def undefined : Separate<["-"], "undefined">,
+ MetaVarName<"<undefined>">,
+ HelpText<"Determines how undefined symbols are handled.">,
+ Group<grp_opts>;
+def no_objc_category_merging : Flag<["-"], "no_objc_category_merging">,
+ HelpText<"Disables the optimisation which merges Objective-C categories "
+ "on a class in to the class itself.">,
+ Group<grp_opts>;
+// main executable options
+def grp_main : OptionGroup<"opts">, HelpText<"MAIN EXECUTABLE OPTIONS">;
+def entry : Separate<["-"], "e">,
+ MetaVarName<"<entry-name>">,
+ HelpText<"entry symbol name">,Group<grp_main>;
+def pie : Flag<["-"], "pie">,
+ HelpText<"Create Position Independent Executable (for ASLR)">,
+ Group<grp_main>;
+def no_pie : Flag<["-"], "no_pie">,
+ HelpText<"Do not create Position Independent Executable">,
+ Group<grp_main>;
+def stack_size : Separate<["-"], "stack_size">,
+ HelpText<"Specifies the maximum stack size for the main thread in a program. "
+ "Must be a page-size multiple. (default=8Mb)">,
+ Group<grp_main>;
+def export_dynamic : Flag<["-"], "export_dynamic">,
+ HelpText<"Preserves all global symbols in main executables during LTO">,
+ Group<grp_main>;
+// dylib executable options
+def grp_dylib : OptionGroup<"opts">, HelpText<"DYLIB EXECUTABLE OPTIONS">;
+def install_name : Separate<["-"], "install_name">,
+ MetaVarName<"<path>">,
+ HelpText<"The dylib's install name">, Group<grp_dylib>;
+def mark_dead_strippable_dylib : Flag<["-"], "mark_dead_strippable_dylib">,
+ HelpText<"Marks the dylib as having no side effects during initialization">,
+ Group<grp_dylib>;
+def compatibility_version : Separate<["-"], "compatibility_version">,
+ MetaVarName<"<version>">,
+ HelpText<"The dylib's compatibility version">, Group<grp_dylib>;
+def current_version : Separate<["-"], "current_version">,
+ MetaVarName<"<version>">,
+ HelpText<"The dylib's current version">, Group<grp_dylib>;
+// dylib executable options - compatibility aliases
+def dylib_install_name : Separate<["-"], "dylib_install_name">,
+ Alias<install_name>;
+def dylib_compatibility_version : Separate<["-"], "dylib_compatibility_version">,
+ MetaVarName<"<version>">, Alias<compatibility_version>;
+def dylib_current_version : Separate<["-"], "dylib_current_version">,
+ MetaVarName<"<version>">, Alias<current_version>;
+// bundle executable options
+def grp_bundle : OptionGroup<"opts">, HelpText<"BUNDLE EXECUTABLE OPTIONS">;
+def bundle_loader : Separate<["-"], "bundle_loader">,
+ MetaVarName<"<path>">,
+ HelpText<"The executable that will be loading this Mach-O bundle">,
+ Group<grp_bundle>;
+// library options
+def grp_libs : OptionGroup<"libs">, HelpText<"LIBRARY OPTIONS">;
+def L : JoinedOrSeparate<["-"], "L">,
+ MetaVarName<"<dir>">,
+ HelpText<"Add directory to library search path">, Group<grp_libs>;
+def F : JoinedOrSeparate<["-"], "F">,
+ MetaVarName<"<dir>">,
+ HelpText<"Add directory to framework search path">, Group<grp_libs>;
+def Z : Flag<["-"], "Z">,
+ HelpText<"Do not search standard directories for libraries or frameworks">;
+def all_load : Flag<["-"], "all_load">,
+ HelpText<"Forces all members of all static libraries to be loaded">,
+ Group<grp_libs>;
+def force_load : Separate<["-"], "force_load">,
+ MetaVarName<"<library-path>">,
+ HelpText<"Forces all members of specified static libraries to be loaded">,
+ Group<grp_libs>;
+def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"<dir>">,
+ HelpText<"Add path to SDK to all absolute library search paths">,
+ Group<grp_libs>;
+// Input options
+def l : Joined<["-"], "l">,
+ MetaVarName<"<libname>">,
+ HelpText<"Base name of library searched for in -L directories">;
+def upward_l : Joined<["-"], "upward-l">,
+ MetaVarName<"<libname>">,
+ HelpText<"Base name of upward library searched for in -L directories">;
+def framework : Separate<["-"], "framework">,
+ MetaVarName<"<name>">,
+ HelpText<"Base name of framework searched for in -F directories">;
+def upward_framework : Separate<["-"], "upward_framework">,
+ MetaVarName<"<name>">,
+ HelpText<"Base name of upward framework searched for in -F directories">;
+def upward_library : Separate<["-"], "upward_library">,
+ MetaVarName<"<path>">,
+ HelpText<"path to upward dylib to link with">;
+def filelist : Separate<["-"], "filelist">,
+ MetaVarName<"<path>">,
+ HelpText<"file containing paths to input files">;
+// test case options
+def print_atoms : Flag<["-"], "print_atoms">,
+ HelpText<"Emit output as yaml atoms">;
+def test_file_usage : Flag<["-"], "test_file_usage">,
+ HelpText<"Only files specified by -file_exists are considered to exist. "
+ "Print which files would be used">;
+def path_exists : Separate<["-"], "path_exists">,
+ MetaVarName<"<path>">,
+ HelpText<"Used with -test_file_usage to declare a path">;
+// general options
+def output : Separate<["-"], "o">,
+ MetaVarName<"<path>">,
+ HelpText<"Output file path">;
+def arch : Separate<["-"], "arch">,
+ MetaVarName<"<arch-name>">,
+ HelpText<"Architecture to link">;
+def sectalign : MultiArg<["-"], "sectalign", 3>,
+ MetaVarName<"<segname> <sectname> <alignment>">,
+ HelpText<"Alignment for segment/section">;
+def sectcreate : MultiArg<["-"], "sectcreate", 3>,
+ MetaVarName<"<segname> <sectname> <file>">,
+ HelpText<"Create section <segname>/<sectname> from contents of <file>">;
+def image_base : Separate<["-"], "image_base">;
+def seg1addr : Separate<["-"], "seg1addr">, Alias<image_base>;
+def demangle : Flag<["-"], "demangle">,
+ HelpText<"Demangles symbol names in errors and warnings">;
+def dependency_info : Separate<["-"], "dependency_info">,
+ MetaVarName<"<file>">,
+ HelpText<"Write binary list of files used during link">;
+def S : Flag<["-"], "S">,
+ HelpText<"Remove debug information (STABS or DWARF) from the output file">;
+def rpath : Separate<["-"], "rpath">,
+ MetaVarName<"<path>">,
+ HelpText<"Add path to the runpath search path list for image being created">;
+def t : Flag<["-"], "t">,
+ HelpText<"Print the names of the input files as ld processes them">;
+def v : Flag<["-"], "v">,
+ HelpText<"Print linker information">;
+def error_limit : Separate<["-", "--"], "error-limit">,
+ MetaVarName<"<number>">,
+ HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">;
+// Ignored options
+def lto_library : Separate<["-"], "lto_library">,
+ MetaVarName<"<path>">,
+ HelpText<"Ignored for compatibility with other linkers">;
+// Obsolete options
+def grp_obsolete : OptionGroup<"obsolete">, HelpText<"OBSOLETE OPTIONS">;
+def single_module : Flag<["-"], "single_module">,
+ HelpText<"Default for dylibs">, Group<grp_obsolete>;
+def multi_module : Flag<["-"], "multi_module">,
+ HelpText<"Unsupported way to build dylibs">, Group<grp_obsolete>;
+def objc_gc_compaction : Flag<["-"], "objc_gc_compaction">,
+ HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>;
+def objc_gc : Flag<["-"], "objc_gc">,
+ HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>;
+def objc_gc_only : Flag<["-"], "objc_gc_only">,
+ HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>;
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt
new file mode 100644
index 000000000000..bedb836d2c1e
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt
@@ -0,0 +1,20 @@
+if (MSVC)
+ add_definitions(-wd4062) # Suppress 'warning C4062: Enumerator has no associated handler in a switch statement.'
+ FileArchive.cpp
+ ${LLD_INCLUDE_DIR}/lld/ReaderWriter
+ Object
+ Support
+ lldCore
+ )
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp
new file mode 100644
index 000000000000..2f52d9d34312
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp
@@ -0,0 +1,228 @@
+//===- lib/ReaderWriter/FileArchive.cpp -----------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/LLVM.h"
+#include "lld/Core/ArchiveLibraryFile.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reader.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <set>
+#include <string>
+#include <system_error>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+using llvm::object::Archive;
+using llvm::file_magic;
+using llvm::identify_magic;
+namespace lld {
+namespace {
+/// The FileArchive class represents an Archive Library file
+class FileArchive : public lld::ArchiveLibraryFile {
+ FileArchive(std::unique_ptr<MemoryBuffer> mb, const Registry &reg,
+ StringRef path, bool logLoading)
+ : ArchiveLibraryFile(path), _mb(std::shared_ptr<MemoryBuffer>(mb.release())),
+ _registry(reg), _logLoading(logLoading) {}
+ /// Check if any member of the archive contains an Atom with the
+ /// specified name and return the File object for that member, or nullptr.
+ File *find(StringRef name) override {
+ auto member = _symbolMemberMap.find(name);
+ if (member == _symbolMemberMap.end())
+ return nullptr;
+ Archive::Child c = member->second;
+ // Don't return a member already returned
+ Expected<StringRef> buf = c.getBuffer();
+ if (!buf) {
+ // TODO: Actually report errors helpfully.
+ consumeError(buf.takeError());
+ return nullptr;
+ }
+ const char *memberStart = buf->data();
+ if (_membersInstantiated.count(memberStart))
+ return nullptr;
+ _membersInstantiated.insert(memberStart);
+ std::unique_ptr<File> result;
+ if (instantiateMember(c, result))
+ return nullptr;
+ File *file = result.get();
+ _filesReturned.push_back(std::move(result));
+ // Give up the file pointer. It was stored and will be destroyed with destruction of FileArchive
+ return file;
+ }
+ /// parse each member
+ std::error_code
+ parseAllMembers(std::vector<std::unique_ptr<File>> &result) override {
+ if (std::error_code ec = parse())
+ return ec;
+ llvm::Error err = llvm::Error::success();
+ for (auto mf = _archive->child_begin(err), me = _archive->child_end();
+ mf != me; ++mf) {
+ std::unique_ptr<File> file;
+ if (std::error_code ec = instantiateMember(*mf, file)) {
+ // err is Success (or we wouldn't be in the loop body) but we can't
+ // return without testing or consuming it.
+ consumeError(std::move(err));
+ return ec;
+ }
+ result.push_back(std::move(file));
+ }
+ if (err)
+ return errorToErrorCode(std::move(err));
+ return std::error_code();
+ }
+ const AtomRange<DefinedAtom> defined() const override {
+ return _noDefinedAtoms;
+ }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ return _noUndefinedAtoms;
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ return _noSharedLibraryAtoms;
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ return _noAbsoluteAtoms;
+ }
+ void clearAtoms() override {
+ _noDefinedAtoms.clear();
+ _noUndefinedAtoms.clear();
+ _noSharedLibraryAtoms.clear();
+ _noAbsoluteAtoms.clear();
+ }
+ std::error_code doParse() override {
+ // Make Archive object which will be owned by FileArchive object.
+ llvm::Error Err = llvm::Error::success();
+ _archive.reset(new Archive(_mb->getMemBufferRef(), Err));
+ if (Err)
+ return errorToErrorCode(std::move(Err));
+ std::error_code ec;
+ if ((ec = buildTableOfContents()))
+ return ec;
+ return std::error_code();
+ }
+ std::error_code instantiateMember(Archive::Child member,
+ std::unique_ptr<File> &result) const {
+ Expected<llvm::MemoryBufferRef> mbOrErr = member.getMemoryBufferRef();
+ if (!mbOrErr)
+ return errorToErrorCode(mbOrErr.takeError());
+ llvm::MemoryBufferRef mb = mbOrErr.get();
+ std::string memberPath = (_archive->getFileName() + "("
+ + mb.getBufferIdentifier() + ")").str();
+ if (_logLoading)
+ llvm::errs() << memberPath << "\n";
+ std::unique_ptr<MemoryBuffer> memberMB(MemoryBuffer::getMemBuffer(
+ mb.getBuffer(), mb.getBufferIdentifier(), false));
+ ErrorOr<std::unique_ptr<File>> fileOrErr =
+ _registry.loadFile(std::move(memberMB));
+ if (std::error_code ec = fileOrErr.getError())
+ return ec;
+ result = std::move(fileOrErr.get());
+ if (std::error_code ec = result->parse())
+ return ec;
+ result->setArchivePath(_archive->getFileName());
+ // The memory buffer is co-owned by the archive file and the children,
+ // so that the bufffer is deallocated when all the members are destructed.
+ result->setSharedMemoryBuffer(_mb);
+ return std::error_code();
+ }
+ std::error_code buildTableOfContents() {
+ DEBUG_WITH_TYPE("FileArchive", llvm::dbgs()
+ << "Table of contents for archive '"
+ << _archive->getFileName() << "':\n");
+ for (const Archive::Symbol &sym : _archive->symbols()) {
+ StringRef name = sym.getName();
+ Expected<Archive::Child> memberOrErr = sym.getMember();
+ if (!memberOrErr)
+ return errorToErrorCode(memberOrErr.takeError());
+ Archive::Child member = memberOrErr.get();
+ DEBUG_WITH_TYPE("FileArchive",
+ llvm::dbgs()
+ << llvm::format("0x%08llX ",
+ member.getBuffer()->data())
+ << "'" << name << "'\n");
+ _symbolMemberMap.insert(std::make_pair(name, member));
+ }
+ return std::error_code();
+ }
+ typedef std::unordered_map<StringRef, Archive::Child> MemberMap;
+ typedef std::set<const char *> InstantiatedSet;
+ std::shared_ptr<MemoryBuffer> _mb;
+ const Registry &_registry;
+ std::unique_ptr<Archive> _archive;
+ MemberMap _symbolMemberMap;
+ InstantiatedSet _membersInstantiated;
+ bool _logLoading;
+ std::vector<std::unique_ptr<MemoryBuffer>> _memberBuffers;
+ std::vector<std::unique_ptr<File>> _filesReturned;
+class ArchiveReader : public Reader {
+ ArchiveReader(bool logLoading) : _logLoading(logLoading) {}
+ bool canParse(file_magic magic, MemoryBufferRef) const override {
+ return magic == file_magic::archive;
+ }
+ ErrorOr<std::unique_ptr<File>> loadFile(std::unique_ptr<MemoryBuffer> mb,
+ const Registry &reg) const override {
+ StringRef path = mb->getBufferIdentifier();
+ std::unique_ptr<File> ret =
+ llvm::make_unique<FileArchive>(std::move(mb), reg, path, _logLoading);
+ return std::move(ret);
+ }
+ bool _logLoading;
+} // anonymous namespace
+void Registry::addSupportArchives(bool logLoading) {
+ add(std::unique_ptr<Reader>(new ArchiveReader(logLoading)));
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp
new file mode 100644
index 000000000000..cb20907b3e30
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp
@@ -0,0 +1,172 @@
+//===- lib/FileFormat/MachO/ArchHandler.cpp -------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+namespace lld {
+namespace mach_o {
+ArchHandler::ArchHandler() {
+ArchHandler::~ArchHandler() {
+std::unique_ptr<mach_o::ArchHandler> ArchHandler::create(
+ MachOLinkingContext::Arch arch) {
+ switch (arch) {
+ case MachOLinkingContext::arch_x86_64:
+ return create_x86_64();
+ case MachOLinkingContext::arch_x86:
+ return create_x86();
+ case MachOLinkingContext::arch_armv6:
+ case MachOLinkingContext::arch_armv7:
+ case MachOLinkingContext::arch_armv7s:
+ return create_arm();
+ case MachOLinkingContext::arch_arm64:
+ return create_arm64();
+ default:
+ llvm_unreachable("Unknown arch");
+ }
+bool ArchHandler::isLazyPointer(const Reference &ref) {
+ // A lazy bind entry is needed for a lazy pointer.
+ const StubInfo &info = stubInfo();
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ if (ref.kindArch() != info.lazyPointerReferenceToFinal.arch)
+ return false;
+ return (ref.kindValue() == info.lazyPointerReferenceToFinal.kind);
+ArchHandler::RelocPattern ArchHandler::relocPattern(const Relocation &reloc) {
+ assert((reloc.type & 0xFFF0) == 0);
+ uint16_t result = reloc.type;
+ if (reloc.scattered)
+ result |= rScattered;
+ if (reloc.pcRel)
+ result |= rPcRel;
+ if (reloc.isExtern)
+ result |= rExtern;
+ switch(reloc.length) {
+ case 0:
+ break;
+ case 1:
+ result |= rLength2;
+ break;
+ case 2:
+ result |= rLength4;
+ break;
+ case 3:
+ result |= rLength8;
+ break;
+ default:
+ llvm_unreachable("bad r_length");
+ }
+ return result;
+ArchHandler::relocFromPattern(ArchHandler::RelocPattern pattern) {
+ normalized::Relocation result;
+ result.offset = 0;
+ result.scattered = (pattern & rScattered);
+ result.type = (RelocationInfoType)(pattern & 0xF);
+ result.pcRel = (pattern & rPcRel);
+ result.isExtern = (pattern & rExtern);
+ result.value = 0;
+ result.symbol = 0;
+ switch (pattern & 0x300) {
+ case rLength1:
+ result.length = 0;
+ break;
+ case rLength2:
+ result.length = 1;
+ break;
+ case rLength4:
+ result.length = 2;
+ break;
+ case rLength8:
+ result.length = 3;
+ break;
+ }
+ return result;
+void ArchHandler::appendReloc(normalized::Relocations &relocs, uint32_t offset,
+ uint32_t symbol, uint32_t value,
+ RelocPattern pattern) {
+ normalized::Relocation reloc = relocFromPattern(pattern);
+ reloc.offset = offset;
+ reloc.symbol = symbol;
+ reloc.value = value;
+ relocs.push_back(reloc);
+int16_t ArchHandler::readS16(const uint8_t *addr, bool isBig) {
+ return read16(addr, isBig);
+int32_t ArchHandler::readS32(const uint8_t *addr, bool isBig) {
+ return read32(addr, isBig);
+uint32_t ArchHandler::readU32(const uint8_t *addr, bool isBig) {
+ return read32(addr, isBig);
+ int64_t ArchHandler::readS64(const uint8_t *addr, bool isBig) {
+ return read64(addr, isBig);
+bool ArchHandler::isDwarfCIE(bool isBig, const DefinedAtom *atom) {
+ assert(atom->contentType() == DefinedAtom::typeCFI);
+ if (atom->rawContent().size() < sizeof(uint32_t))
+ return false;
+ uint32_t size = read32(atom->rawContent().data(), isBig);
+ uint32_t idOffset = sizeof(uint32_t);
+ if (size == 0xffffffffU)
+ idOffset += sizeof(uint64_t);
+ return read32(atom->rawContent().data() + idOffset, isBig) == 0;
+const Atom *ArchHandler::fdeTargetFunction(const DefinedAtom *fde) {
+ for (auto ref : *fde) {
+ if (ref->kindNamespace() == Reference::KindNamespace::mach_o &&
+ ref->kindValue() == unwindRefToFunctionKind()) {
+ assert(ref->kindArch() == kindArch() && "unexpected Reference arch");
+ return ref->target();
+ }
+ }
+ return nullptr;
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h
new file mode 100644
index 000000000000..80840b561701
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h
@@ -0,0 +1,323 @@
+//===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Atoms.h"
+#include "File.h"
+#include "MachONormalizedFile.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/Triple.h"
+namespace lld {
+namespace mach_o {
+/// The ArchHandler class handles all architecture specific aspects of
+/// mach-o linking.
+class ArchHandler {
+ virtual ~ArchHandler();
+ /// There is no public interface to subclasses of ArchHandler, so this
+ /// is the only way to instantiate an ArchHandler.
+ static std::unique_ptr<ArchHandler> create(MachOLinkingContext::Arch arch);
+ /// Get (arch specific) kind strings used by Registry.
+ virtual const Registry::KindStrings *kindStrings() = 0;
+ /// Convert mach-o Arch to Reference::KindArch.
+ virtual Reference::KindArch kindArch() = 0;
+ /// Used by StubPass to update References to shared library functions
+ /// to be references to a stub.
+ virtual bool isCallSite(const Reference &) = 0;
+ /// Used by GOTPass to locate GOT References
+ virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) {
+ return false;
+ }
+ /// Used by TLVPass to locate TLV References.
+ virtual bool isTLVAccess(const Reference &) const { return false; }
+ /// Used by the TLVPass to update TLV References.
+ virtual void updateReferenceToTLV(const Reference *) {}
+ /// Used by ShimPass to insert shims in branches that switch mode.
+ virtual bool isNonCallBranch(const Reference &) = 0;
+ /// Used by GOTPass to update GOT References
+ virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {}
+ /// Does this architecture make use of __unwind_info sections for exception
+ /// handling? If so, it will need a separate pass to create them.
+ virtual bool needsCompactUnwind() = 0;
+ /// Returns the kind of reference to use to synthesize a 32-bit image-offset
+ /// value, used in the __unwind_info section.
+ virtual Reference::KindValue imageOffsetKind() = 0;
+ /// Returns the kind of reference to use to synthesize a 32-bit image-offset
+ /// indirect value. Used for personality functions in the __unwind_info
+ /// section.
+ virtual Reference::KindValue imageOffsetKindIndirect() = 0;
+ /// Architecture specific compact unwind type that signals __eh_frame should
+ /// actually be used.
+ virtual uint32_t dwarfCompactUnwindType() = 0;
+ /// Reference from an __eh_frame CIE atom to its personality function it's
+ /// describing. Usually pointer-sized and PC-relative, but differs in whether
+ /// it needs to be in relocatable objects.
+ virtual Reference::KindValue unwindRefToPersonalityFunctionKind() = 0;
+ /// Reference from an __eh_frame FDE to the CIE it's based on.
+ virtual Reference::KindValue unwindRefToCIEKind() = 0;
+ /// Reference from an __eh_frame FDE atom to the function it's
+ /// describing. Usually pointer-sized and PC-relative, but differs in whether
+ /// it needs to be in relocatable objects.
+ virtual Reference::KindValue unwindRefToFunctionKind() = 0;
+ /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the
+ /// required __eh_frame entry. On current architectures, the low 24 bits
+ /// represent the offset of the function's FDE entry from the start of
+ /// __eh_frame.
+ virtual Reference::KindValue unwindRefToEhFrameKind() = 0;
+ /// Returns a pointer sized reference kind. On 64-bit targets this will
+ /// likely be something like pointer64, and pointer32 on 32-bit targets.
+ virtual Reference::KindValue pointerKind() = 0;
+ virtual const Atom *fdeTargetFunction(const DefinedAtom *fde);
+ /// Used by normalizedFromAtoms() to know where to generated rebasing and
+ /// binding info in final executables.
+ virtual bool isPointer(const Reference &) = 0;
+ /// Used by normalizedFromAtoms() to know where to generated lazy binding
+ /// info in final executables.
+ virtual bool isLazyPointer(const Reference &);
+ /// Reference from an __stub_helper entry to the required offset of the
+ /// lazy bind commands.
+ virtual Reference::KindValue lazyImmediateLocationKind() = 0;
+ /// Returns true if the specified relocation is paired to the next relocation.
+ virtual bool isPairedReloc(const normalized::Relocation &) = 0;
+ /// Prototype for a helper function. Given a sectionIndex and address,
+ /// finds the atom and offset with that atom of that address.
+ typedef std::function<llvm::Error (uint32_t sectionIndex, uint64_t addr,
+ const lld::Atom **, Reference::Addend *)>
+ FindAtomBySectionAndAddress;
+ /// Prototype for a helper function. Given a symbolIndex, finds the atom
+ /// representing that symbol.
+ typedef std::function<llvm::Error (uint32_t symbolIndex,
+ const lld::Atom **)> FindAtomBySymbolIndex;
+ /// Analyzes a relocation from a .o file and returns the info
+ /// (kind, target, addend) needed to instantiate a Reference.
+ /// Two helper functions are passed as parameters to find the target atom
+ /// given a symbol index or address.
+ virtual llvm::Error
+ getReferenceInfo(const normalized::Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBigEndian,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) = 0;
+ /// Analyzes a pair of relocations from a .o file and returns the info
+ /// (kind, target, addend) needed to instantiate a Reference.
+ /// Two helper functions are passed as parameters to find the target atom
+ /// given a symbol index or address.
+ virtual llvm::Error
+ getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBig, bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) = 0;
+ /// Prototype for a helper function. Given an atom, finds the symbol table
+ /// index for it in the output file.
+ typedef std::function<uint32_t (const Atom &atom)> FindSymbolIndexForAtom;
+ /// Prototype for a helper function. Given an atom, finds the index
+ /// of the section that will contain the atom.
+ typedef std::function<uint32_t (const Atom &atom)> FindSectionIndexForAtom;
+ /// Prototype for a helper function. Given an atom, finds the address
+ /// assigned to it in the output file.
+ typedef std::function<uint64_t (const Atom &atom)> FindAddressForAtom;
+ /// Some architectures require local symbols on anonymous atoms.
+ virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) {
+ return false;
+ }
+ /// Copy raw content then apply all fixup References on an Atom.
+ virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) = 0;
+ /// Used in -r mode to convert a Reference to a mach-o relocation.
+ virtual void appendSectionRelocations(const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom,
+ FindSectionIndexForAtom,
+ FindAddressForAtom,
+ normalized::Relocations&) = 0;
+ /// Add arch-specific References.
+ virtual void addAdditionalReferences(MachODefinedAtom &atom) { }
+ // Add Reference for data-in-code marker.
+ virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff,
+ uint16_t length, uint16_t kind) { }
+ /// Returns true if the specificed Reference value marks the start or end
+ /// of a data-in-code range in an atom.
+ virtual bool isDataInCodeTransition(Reference::KindValue refKind) {
+ return false;
+ }
+ /// Returns the Reference value for a Reference that marks that start of
+ /// a data-in-code range.
+ virtual Reference::KindValue dataInCodeTransitionStart(
+ const MachODefinedAtom &atom) {
+ return 0;
+ }
+ /// Returns the Reference value for a Reference that marks that end of
+ /// a data-in-code range.
+ virtual Reference::KindValue dataInCodeTransitionEnd(
+ const MachODefinedAtom &atom) {
+ return 0;
+ }
+ /// Only relevant for 32-bit arm archs.
+ virtual bool isThumbFunction(const DefinedAtom &atom) { return false; }
+ /// Only relevant for 32-bit arm archs.
+ virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm,
+ const DefinedAtom &) {
+ llvm_unreachable("shims only support on arm");
+ }
+ /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE).
+ static bool isDwarfCIE(bool isBig, const DefinedAtom *atom);
+ struct ReferenceInfo {
+ Reference::KindArch arch;
+ uint16_t kind;
+ uint32_t offset;
+ int32_t addend;
+ };
+ struct OptionalRefInfo {
+ bool used;
+ uint16_t kind;
+ uint32_t offset;
+ int32_t addend;
+ };
+ /// Table of architecture specific information for creating stubs.
+ struct StubInfo {
+ const char* binderSymbolName;
+ ReferenceInfo lazyPointerReferenceToHelper;
+ ReferenceInfo lazyPointerReferenceToFinal;
+ ReferenceInfo nonLazyPointerReferenceToBinder;
+ uint8_t codeAlignment;
+ uint32_t stubSize;
+ uint8_t stubBytes[16];
+ ReferenceInfo stubReferenceToLP;
+ OptionalRefInfo optStubReferenceToLP;
+ uint32_t stubHelperSize;
+ uint8_t stubHelperBytes[16];
+ ReferenceInfo stubHelperReferenceToImm;
+ ReferenceInfo stubHelperReferenceToHelperCommon;
+ DefinedAtom::ContentType stubHelperImageCacheContentType;
+ uint32_t stubHelperCommonSize;
+ uint8_t stubHelperCommonAlignment;
+ uint8_t stubHelperCommonBytes[36];
+ ReferenceInfo stubHelperCommonReferenceToCache;
+ OptionalRefInfo optStubHelperCommonReferenceToCache;
+ ReferenceInfo stubHelperCommonReferenceToBinder;
+ OptionalRefInfo optStubHelperCommonReferenceToBinder;
+ };
+ virtual const StubInfo &stubInfo() = 0;
+ ArchHandler();
+ static std::unique_ptr<mach_o::ArchHandler> create_x86_64();
+ static std::unique_ptr<mach_o::ArchHandler> create_x86();
+ static std::unique_ptr<mach_o::ArchHandler> create_arm();
+ static std::unique_ptr<mach_o::ArchHandler> create_arm64();
+ // Handy way to pack mach-o r_type and other bit fields into one 16-bit value.
+ typedef uint16_t RelocPattern;
+ enum {
+ rScattered = 0x8000,
+ rPcRel = 0x4000,
+ rExtern = 0x2000,
+ rLength1 = 0x0000,
+ rLength2 = 0x0100,
+ rLength4 = 0x0200,
+ rLength8 = 0x0300,
+ rLenArmLo = rLength1,
+ rLenArmHi = rLength2,
+ rLenThmbLo = rLength4,
+ rLenThmbHi = rLength8
+ };
+ /// Extract RelocPattern from normalized mach-o relocation.
+ static RelocPattern relocPattern(const normalized::Relocation &reloc);
+ /// Create normalized Relocation initialized from pattern.
+ static normalized::Relocation relocFromPattern(RelocPattern pattern);
+ /// One liner to add a relocation.
+ static void appendReloc(normalized::Relocations &relocs, uint32_t offset,
+ uint32_t symbol, uint32_t value,
+ RelocPattern pattern);
+ static int16_t readS16(const uint8_t *addr, bool isBig);
+ static int32_t readS32(const uint8_t *addr, bool isBig);
+ static uint32_t readU32(const uint8_t *addr, bool isBig);
+ static int64_t readS64(const uint8_t *addr, bool isBig);
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp
new file mode 100644
index 000000000000..2f663c660f5c
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp
@@ -0,0 +1,1523 @@
+//===- lib/FileFormat/MachO/ArchHandler_arm.cpp ---------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+namespace lld {
+namespace mach_o {
+using llvm::support::ulittle32_t;
+using llvm::support::little32_t;
+class ArchHandler_arm : public ArchHandler {
+ ArchHandler_arm() = default;
+ ~ArchHandler_arm() override = default;
+ const Registry::KindStrings *kindStrings() override { return _sKindStrings; }
+ Reference::KindArch kindArch() override { return Reference::KindArch::ARM; }
+ const ArchHandler::StubInfo &stubInfo() override;
+ bool isCallSite(const Reference &) override;
+ bool isPointer(const Reference &) override;
+ bool isPairedReloc(const normalized::Relocation &) override;
+ bool isNonCallBranch(const Reference &) override;
+ bool needsCompactUnwind() override {
+ return false;
+ }
+ Reference::KindValue imageOffsetKind() override {
+ return invalid;
+ }
+ Reference::KindValue imageOffsetKindIndirect() override {
+ return invalid;
+ }
+ Reference::KindValue unwindRefToPersonalityFunctionKind() override {
+ return invalid;
+ }
+ Reference::KindValue unwindRefToCIEKind() override {
+ return invalid;
+ }
+ Reference::KindValue unwindRefToFunctionKind() override {
+ return invalid;
+ }
+ Reference::KindValue unwindRefToEhFrameKind() override {
+ return invalid;
+ }
+ Reference::KindValue lazyImmediateLocationKind() override {
+ return lazyImmediateLocation;
+ }
+ Reference::KindValue pointerKind() override {
+ return invalid;
+ }
+ uint32_t dwarfCompactUnwindType() override {
+ // FIXME
+ return -1;
+ }
+ llvm::Error getReferenceInfo(const normalized::Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ llvm::Error
+ getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap, bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ void generateAtomContent(const DefinedAtom &atom, bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) override;
+ void appendSectionRelocations(const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom,
+ FindSectionIndexForAtom,
+ FindAddressForAtom,
+ normalized::Relocations &) override;
+ void addAdditionalReferences(MachODefinedAtom &atom) override;
+ bool isDataInCodeTransition(Reference::KindValue refKind) override {
+ switch (refKind) {
+ case modeThumbCode:
+ case modeArmCode:
+ case modeData:
+ return true;
+ default:
+ return false;
+ break;
+ }
+ }
+ Reference::KindValue dataInCodeTransitionStart(
+ const MachODefinedAtom &atom) override {
+ return modeData;
+ }
+ Reference::KindValue dataInCodeTransitionEnd(
+ const MachODefinedAtom &atom) override {
+ return atom.isThumb() ? modeThumbCode : modeArmCode;
+ }
+ bool isThumbFunction(const DefinedAtom &atom) override;
+ const DefinedAtom *createShim(MachOFile &file, bool thumbToArm,
+ const DefinedAtom &) override;
+ friend class Thumb2ToArmShimAtom;
+ friend class ArmToThumbShimAtom;
+ static const Registry::KindStrings _sKindStrings[];
+ static const StubInfo _sStubInfoArmPIC;
+ enum ArmKind : Reference::KindValue {
+ invalid, /// for error condition
+ modeThumbCode, /// Content starting at this offset is thumb.
+ modeArmCode, /// Content starting at this offset is arm.
+ modeData, /// Content starting at this offset is data.
+ // Kinds found in mach-o .o files:
+ thumb_bl22, /// ex: bl _foo
+ thumb_b22, /// ex: b _foo
+ thumb_movw, /// ex: movw r1, :lower16:_foo
+ thumb_movt, /// ex: movt r1, :lower16:_foo
+ thumb_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4))
+ thumb_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4))
+ arm_bl24, /// ex: bl _foo
+ arm_b24, /// ex: b _foo
+ arm_movw, /// ex: movw r1, :lower16:_foo
+ arm_movt, /// ex: movt r1, :lower16:_foo
+ arm_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4))
+ arm_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4))
+ pointer32, /// ex: .long _foo
+ delta32, /// ex: .long _foo - .
+ // Kinds introduced by Passes:
+ lazyPointer, /// Location contains a lazy pointer.
+ lazyImmediateLocation, /// Location contains immediate value used in stub.
+ };
+ // Utility functions for inspecting/updating instructions.
+ static bool isThumbMovw(uint32_t instruction);
+ static bool isThumbMovt(uint32_t instruction);
+ static bool isArmMovw(uint32_t instruction);
+ static bool isArmMovt(uint32_t instruction);
+ static int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t);
+ static int32_t getDisplacementFromArmBranch(uint32_t instruction);
+ static uint16_t getWordFromThumbMov(uint32_t instruction);
+ static uint16_t getWordFromArmMov(uint32_t instruction);
+ static uint32_t clearThumbBit(uint32_t value, const Atom *target);
+ static uint32_t setDisplacementInArmBranch(uint32_t instr, int32_t disp,
+ bool targetIsThumb);
+ static uint32_t setDisplacementInThumbBranch(uint32_t instr, uint32_t ia,
+ int32_t disp, bool targetThumb);
+ static uint32_t setWordFromThumbMov(uint32_t instruction, uint16_t word);
+ static uint32_t setWordFromArmMov(uint32_t instruction, uint16_t word);
+ StringRef stubName(const DefinedAtom &);
+ bool useExternalRelocationTo(const Atom &target);
+ void applyFixupFinal(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress, uint64_t targetAddress,
+ uint64_t inAtomAddress, bool &thumbMode,
+ bool targetIsThumb);
+ void applyFixupRelocatable(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress, bool &thumbMode,
+ bool targetIsThumb);
+// ArchHandler_arm
+const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = {
+ LLD_KIND_STRING_ENTRY(thumb_bl22),
+ LLD_KIND_STRING_ENTRY(thumb_movw),
+ LLD_KIND_STRING_ENTRY(thumb_movt),
+ LLD_KIND_STRING_ENTRY(thumb_movw_funcRel),
+ LLD_KIND_STRING_ENTRY(thumb_movt_funcRel),
+ LLD_KIND_STRING_ENTRY(arm_movw_funcRel),
+ LLD_KIND_STRING_ENTRY(arm_movt_funcRel),
+ LLD_KIND_STRING_ENTRY(lazyImmediateLocation),
+const ArchHandler::StubInfo ArchHandler_arm::_sStubInfoArmPIC = {
+ "dyld_stub_binder",
+ // References in lazy pointer
+ { Reference::KindArch::ARM, pointer32, 0, 0 },
+ { Reference::KindArch::ARM, lazyPointer, 0, 0 },
+ // GOT pointer to dyld_stub_binder
+ { Reference::KindArch::ARM, pointer32, 0, 0 },
+ // arm code alignment 2^2
+ 2,
+ // Stub size and code
+ 16,
+ { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 12
+ 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip
+ 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip]
+ 0x00, 0x00, 0x00, 0x00 }, // .long L_foo$lazy_ptr - (L1$scv + 8)
+ { Reference::KindArch::ARM, delta32, 12, 0 },
+ { false, 0, 0, 0 },
+ // Stub Helper size and code
+ 12,
+ { 0x00, 0xC0, 0x9F, 0xE5, // ldr ip, [pc, #0]
+ 0x00, 0x00, 0x00, 0xEA, // b _helperhelper
+ 0x00, 0x00, 0x00, 0x00 }, // .long lazy-info-offset
+ { Reference::KindArch::ARM, lazyImmediateLocation, 8, 0 },
+ { Reference::KindArch::ARM, arm_b24, 4, 0 },
+ // Stub helper image cache content type
+ DefinedAtom::typeGOT,
+ // Stub Helper-Common size and code
+ 36,
+ // Stub helper alignment
+ 2,
+ { // push lazy-info-offset
+ 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]!
+ // push address of dyld_mageLoaderCache
+ 0x10, 0xC0, 0x9F, 0xE5, // ldr ip, L1
+ 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip
+ 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]!
+ // jump through dyld_stub_binder
+ 0x08, 0xC0, 0x9F, 0xE5, // ldr ip, L2
+ 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip
+ 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip]
+ 0x00, 0x00, 0x00, 0x00, // L1: .long fFastStubGOTAtom - (helper+16)
+ 0x00, 0x00, 0x00, 0x00 }, // L2: .long dyld_stub_binder - (helper+28)
+ { Reference::KindArch::ARM, delta32, 28, 0xC },
+ { false, 0, 0, 0 },
+ { Reference::KindArch::ARM, delta32, 32, 0x04 },
+ { false, 0, 0, 0 }
+const ArchHandler::StubInfo &ArchHandler_arm::stubInfo() {
+ // If multiple kinds of stubs are supported, select which StubInfo here.
+ return _sStubInfoArmPIC;
+bool ArchHandler_arm::isCallSite(const Reference &ref) {
+ switch (ref.kindValue()) {
+ case thumb_b22:
+ case thumb_bl22:
+ case arm_b24:
+ case arm_bl24:
+ return true;
+ default:
+ return false;
+ }
+bool ArchHandler_arm::isPointer(const Reference &ref) {
+ return (ref.kindValue() == pointer32);
+bool ArchHandler_arm::isNonCallBranch(const Reference &ref) {
+ switch (ref.kindValue()) {
+ case thumb_b22:
+ case arm_b24:
+ return true;
+ default:
+ return false;
+ }
+bool ArchHandler_arm::isPairedReloc(const Relocation &reloc) {
+ switch (reloc.type) {
+ return true;
+ default:
+ return false;
+ }
+/// Trace references from stub atom to lazy pointer to target and get its name.
+StringRef ArchHandler_arm::stubName(const DefinedAtom &stubAtom) {
+ assert(stubAtom.contentType() == DefinedAtom::typeStub);
+ for (const Reference *ref : stubAtom) {
+ if (const DefinedAtom* lp = dyn_cast<DefinedAtom>(ref->target())) {
+ if (lp->contentType() != DefinedAtom::typeLazyPointer)
+ continue;
+ for (const Reference *ref2 : *lp) {
+ if (ref2->kindValue() != lazyPointer)
+ continue;
+ return ref2->target()->name();
+ }
+ }
+ }
+ return "stub";
+/// Extract displacement from an ARM b/bl/blx instruction.
+int32_t ArchHandler_arm::getDisplacementFromArmBranch(uint32_t instruction) {
+ // Sign-extend imm24
+ int32_t displacement = (instruction & 0x00FFFFFF) << 2;
+ if ((displacement & 0x02000000) != 0)
+ displacement |= 0xFC000000;
+ // If this is BLX and H bit set, add 2.
+ if ((instruction & 0xFF000000) == 0xFB000000)
+ displacement += 2;
+ return displacement;
+/// Update an ARM b/bl/blx instruction, switching bl <-> blx as needed.
+uint32_t ArchHandler_arm::setDisplacementInArmBranch(uint32_t instruction,
+ int32_t displacement,
+ bool targetIsThumb) {
+ assert((displacement <= 33554428) && (displacement > (-33554432))
+ && "arm branch out of range");
+ bool is_blx = ((instruction & 0xF0000000) == 0xF0000000);
+ uint32_t newInstruction = (instruction & 0xFF000000);
+ uint32_t h = 0;
+ if (targetIsThumb) {
+ // Force use of BLX.
+ newInstruction = 0xFA000000;
+ if (!is_blx) {
+ assert(((instruction & 0xF0000000) == 0xE0000000)
+ && "no conditional arm blx");
+ assert(((instruction & 0xFF000000) == 0xEB000000)
+ && "no arm pc-rel BX instruction");
+ }
+ if (displacement & 2)
+ h = 1;
+ }
+ else {
+ // Force use of B/BL.
+ if (is_blx)
+ newInstruction = 0xEB000000;
+ }
+ newInstruction |= (h << 24) | ((displacement >> 2) & 0x00FFFFFF);
+ return newInstruction;
+/// Extract displacement from a thumb b/bl/blx instruction.
+int32_t ArchHandler_arm::getDisplacementFromThumbBranch(uint32_t instruction,
+ uint32_t instrAddr) {
+ bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
+ uint32_t s = (instruction >> 10) & 0x1;
+ uint32_t j1 = (instruction >> 29) & 0x1;
+ uint32_t j2 = (instruction >> 27) & 0x1;
+ uint32_t imm10 = instruction & 0x3FF;
+ uint32_t imm11 = (instruction >> 16) & 0x7FF;
+ uint32_t i1 = (j1 == s);
+ uint32_t i2 = (j2 == s);
+ uint32_t dis =
+ (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
+ int32_t sdis = dis;
+ int32_t result = s ? (sdis | 0xFE000000) : sdis;
+ if (is_blx && (instrAddr & 0x2)) {
+ // The thumb blx instruction always has low bit of imm11 as zero. The way
+ // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that
+ // the blx instruction always 4-byte aligns the pc before adding the
+ // displacement from the blx. We must emulate that when decoding this.
+ result -= 2;
+ }
+ return result;
+/// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed.
+uint32_t ArchHandler_arm::setDisplacementInThumbBranch(uint32_t instruction,
+ uint32_t instrAddr,
+ int32_t displacement,
+ bool targetIsThumb) {
+ assert((displacement <= 16777214) && (displacement > (-16777216))
+ && "thumb branch out of range");
+ bool is_bl = ((instruction & 0xD000F800) == 0xD000F000);
+ bool is_blx = ((instruction & 0xD000F800) == 0xC000F000);
+ bool is_b = ((instruction & 0xD000F800) == 0x9000F000);
+ uint32_t newInstruction = (instruction & 0xD000F800);
+ if (is_bl || is_blx) {
+ if (targetIsThumb) {
+ newInstruction = 0xD000F000; // Use bl
+ } else {
+ newInstruction = 0xC000F000; // Use blx
+ // See note in getDisplacementFromThumbBranch() about blx.
+ if (instrAddr & 0x2)
+ displacement += 2;
+ }
+ } else if (is_b) {
+ assert(targetIsThumb && "no pc-rel thumb branch instruction that "
+ "switches to arm mode");
+ }
+ else {
+ llvm_unreachable("thumb branch22 reloc on a non-branch instruction");
+ }
+ uint32_t s = (uint32_t)(displacement >> 24) & 0x1;
+ uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1;
+ uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1;
+ uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF;
+ uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF;
+ uint32_t j1 = (i1 == s);
+ uint32_t j2 = (i2 == s);
+ uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11;
+ uint32_t firstDisp = (s << 10) | imm10;
+ newInstruction |= (nextDisp << 16) | firstDisp;
+ return newInstruction;
+bool ArchHandler_arm::isThumbMovw(uint32_t instruction) {
+ return (instruction & 0x8000FBF0) == 0x0000F240;
+bool ArchHandler_arm::isThumbMovt(uint32_t instruction) {
+ return (instruction & 0x8000FBF0) == 0x0000F2C0;
+bool ArchHandler_arm::isArmMovw(uint32_t instruction) {
+ return (instruction & 0x0FF00000) == 0x03000000;
+bool ArchHandler_arm::isArmMovt(uint32_t instruction) {
+ return (instruction & 0x0FF00000) == 0x03400000;
+uint16_t ArchHandler_arm::getWordFromThumbMov(uint32_t instruction) {
+ assert(isThumbMovw(instruction) || isThumbMovt(instruction));
+ uint32_t i = ((instruction & 0x00000400) >> 10);
+ uint32_t imm4 = (instruction & 0x0000000F);
+ uint32_t imm3 = ((instruction & 0x70000000) >> 28);
+ uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
+ return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
+uint16_t ArchHandler_arm::getWordFromArmMov(uint32_t instruction) {
+ assert(isArmMovw(instruction) || isArmMovt(instruction));
+ uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
+ uint32_t imm12 = (instruction & 0x00000FFF);
+ return (imm4 << 12) | imm12;
+uint32_t ArchHandler_arm::setWordFromThumbMov(uint32_t instr, uint16_t word) {
+ assert(isThumbMovw(instr) || isThumbMovt(instr));
+ uint32_t imm4 = (word & 0xF000) >> 12;
+ uint32_t i = (word & 0x0800) >> 11;
+ uint32_t imm3 = (word & 0x0700) >> 8;
+ uint32_t imm8 = word & 0x00FF;
+ return (instr & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16);
+uint32_t ArchHandler_arm::setWordFromArmMov(uint32_t instr, uint16_t word) {
+ assert(isArmMovw(instr) || isArmMovt(instr));
+ uint32_t imm4 = (word & 0xF000) >> 12;
+ uint32_t imm12 = word & 0x0FFF;
+ return (instr & 0xFFF0F000) | (imm4 << 16) | imm12;
+uint32_t ArchHandler_arm::clearThumbBit(uint32_t value, const Atom *target) {
+ // The assembler often adds one to the address of a thumb function.
+ // We need to undo that so it does not look like an addend.
+ if (value & 1) {
+ if (isa<DefinedAtom>(target)) {
+ const MachODefinedAtom *machoTarget =
+ reinterpret_cast<const MachODefinedAtom *>(target);
+ if (machoTarget->isThumb())
+ value &= -2; // mask off thumb-bit
+ }
+ }
+ return value;
+llvm::Error ArchHandler_arm::getReferenceInfo(
+ const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBig,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind,
+ const lld::Atom **target, Reference::Addend *addend) {
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ uint64_t targetAddress;
+ uint32_t instruction = *(const ulittle32_t *)fixupContent;
+ int32_t displacement;
+ switch (relocPattern(reloc)) {
+ case ARM_THUMB_RELOC_BR22 | rPcRel | rExtern | rLength4:
+ // ex: bl _foo (and _foo is undefined)
+ if ((instruction & 0xD000F800) == 0x9000F000)
+ *kind = thumb_b22;
+ else
+ *kind = thumb_bl22;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ // Instruction contains branch to addend.
+ displacement = getDisplacementFromThumbBranch(instruction, fixupAddress);
+ *addend = fixupAddress + 4 + displacement;
+ return llvm::Error::success();
+ case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4:
+ // ex: bl _foo (and _foo is defined)
+ if ((instruction & 0xD000F800) == 0x9000F000)
+ *kind = thumb_b22;
+ else
+ *kind = thumb_bl22;
+ displacement = getDisplacementFromThumbBranch(instruction, fixupAddress);
+ targetAddress = fixupAddress + 4 + displacement;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ case ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4:
+ // ex: bl _foo+4 (and _foo is defined)
+ if ((instruction & 0xD000F800) == 0x9000F000)
+ *kind = thumb_b22;
+ else
+ *kind = thumb_bl22;
+ displacement = getDisplacementFromThumbBranch(instruction, fixupAddress);
+ targetAddress = fixupAddress + 4 + displacement;
+ if (auto ec = atomFromAddress(0, reloc.value, target, addend))
+ return ec;
+ // reloc.value is target atom's address. Instruction contains branch
+ // to atom+addend.
+ *addend += (targetAddress - reloc.value);
+ return llvm::Error::success();
+ case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4:
+ // ex: bl _foo (and _foo is undefined)
+ if (((instruction & 0x0F000000) == 0x0A000000)
+ && ((instruction & 0xF0000000) != 0xF0000000))
+ *kind = arm_b24;
+ else
+ *kind = arm_bl24;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ // Instruction contains branch to addend.
+ displacement = getDisplacementFromArmBranch(instruction);
+ *addend = fixupAddress + 8 + displacement;
+ return llvm::Error::success();
+ case ARM_RELOC_BR24 | rPcRel | rLength4:
+ // ex: bl _foo (and _foo is defined)
+ if (((instruction & 0x0F000000) == 0x0A000000)
+ && ((instruction & 0xF0000000) != 0xF0000000))
+ *kind = arm_b24;
+ else
+ *kind = arm_bl24;
+ displacement = getDisplacementFromArmBranch(instruction);
+ targetAddress = fixupAddress + 8 + displacement;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ case ARM_RELOC_BR24 | rScattered | rPcRel | rLength4:
+ // ex: bl _foo+4 (and _foo is defined)
+ if (((instruction & 0x0F000000) == 0x0A000000)
+ && ((instruction & 0xF0000000) != 0xF0000000))
+ *kind = arm_b24;
+ else
+ *kind = arm_bl24;
+ displacement = getDisplacementFromArmBranch(instruction);
+ targetAddress = fixupAddress + 8 + displacement;
+ if (auto ec = atomFromAddress(0, reloc.value, target, addend))
+ return ec;
+ // reloc.value is target atom's address. Instruction contains branch
+ // to atom+addend.
+ *addend += (targetAddress - reloc.value);
+ return llvm::Error::success();
+ case ARM_RELOC_VANILLA | rExtern | rLength4:
+ // ex: .long _foo (and _foo is undefined)
+ *kind = pointer32;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = instruction;
+ return llvm::Error::success();
+ case ARM_RELOC_VANILLA | rLength4:
+ // ex: .long _foo (and _foo is defined)
+ *kind = pointer32;
+ if (auto ec = atomFromAddress(reloc.symbol, instruction, target, addend))
+ return ec;
+ *addend = clearThumbBit((uint32_t) * addend, *target);
+ return llvm::Error::success();
+ case ARM_RELOC_VANILLA | rScattered | rLength4:
+ // ex: .long _foo+a (and _foo is defined)
+ *kind = pointer32;
+ if (auto ec = atomFromAddress(0, reloc.value, target, addend))
+ return ec;
+ *addend += (clearThumbBit(instruction, *target) - reloc.value);
+ return llvm::Error::success();
+ default:
+ return llvm::make_error<GenericError>("unsupported arm relocation type");
+ }
+ return llvm::Error::success();
+ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBig,
+ bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddr,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) {
+ bool pointerDiff = false;
+ bool funcRel;
+ bool top;
+ bool thumbReloc;
+ switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) {
+ case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo) << 16 |
+ ARM_RELOC_PAIR | rScattered | rLenThmbLo):
+ // ex: movw r1, :lower16:(_x-L1) [thumb mode]
+ *kind = thumb_movw_funcRel;
+ funcRel = true;
+ top = false;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi) << 16 |
+ ARM_RELOC_PAIR | rScattered | rLenThmbHi):
+ // ex: movt r1, :upper16:(_x-L1) [thumb mode]
+ *kind = thumb_movt_funcRel;
+ funcRel = true;
+ top = true;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo) << 16 |
+ ARM_RELOC_PAIR | rScattered | rLenArmLo):
+ // ex: movw r1, :lower16:(_x-L1) [arm mode]
+ *kind = arm_movw_funcRel;
+ funcRel = true;
+ top = false;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi) << 16 |
+ ARM_RELOC_PAIR | rScattered | rLenArmHi):
+ // ex: movt r1, :upper16:(_x-L1) [arm mode]
+ *kind = arm_movt_funcRel;
+ funcRel = true;
+ top = true;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF | rLenThmbLo) << 16 |
+ ARM_RELOC_PAIR | rLenThmbLo):
+ // ex: movw r1, :lower16:_x [thumb mode]
+ *kind = thumb_movw;
+ funcRel = false;
+ top = false;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF | rLenThmbHi) << 16 |
+ ARM_RELOC_PAIR | rLenThmbHi):
+ // ex: movt r1, :upper16:_x [thumb mode]
+ *kind = thumb_movt;
+ funcRel = false;
+ top = true;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF | rLenArmLo) << 16 |
+ ARM_RELOC_PAIR | rLenArmLo):
+ // ex: movw r1, :lower16:_x [arm mode]
+ *kind = arm_movw;
+ funcRel = false;
+ top = false;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF | rLenArmHi) << 16 |
+ ARM_RELOC_PAIR | rLenArmHi):
+ // ex: movt r1, :upper16:_x [arm mode]
+ *kind = arm_movt;
+ funcRel = false;
+ top = true;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF | rScattered | rLenThmbLo) << 16 |
+ ARM_RELOC_PAIR | rLenThmbLo):
+ // ex: movw r1, :lower16:_x+a [thumb mode]
+ *kind = thumb_movw;
+ funcRel = false;
+ top = false;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF | rScattered | rLenThmbHi) << 16 |
+ ARM_RELOC_PAIR | rLenThmbHi):
+ // ex: movt r1, :upper16:_x+a [thumb mode]
+ *kind = thumb_movt;
+ funcRel = false;
+ top = true;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF | rScattered | rLenArmLo) << 16 |
+ ARM_RELOC_PAIR | rLenArmLo):
+ // ex: movw r1, :lower16:_x+a [arm mode]
+ *kind = arm_movw;
+ funcRel = false;
+ top = false;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF | rScattered | rLenArmHi) << 16 |
+ ARM_RELOC_PAIR | rLenArmHi):
+ // ex: movt r1, :upper16:_x+a [arm mode]
+ *kind = arm_movt;
+ funcRel = false;
+ top = true;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF | rExtern | rLenThmbLo) << 16 |
+ ARM_RELOC_PAIR | rLenThmbLo):
+ // ex: movw r1, :lower16:_undef [thumb mode]
+ *kind = thumb_movw;
+ funcRel = false;
+ top = false;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF | rExtern | rLenThmbHi) << 16 |
+ ARM_RELOC_PAIR | rLenThmbHi):
+ // ex: movt r1, :upper16:_undef [thumb mode]
+ *kind = thumb_movt;
+ funcRel = false;
+ top = true;
+ thumbReloc = true;
+ break;
+ case ((ARM_RELOC_HALF | rExtern | rLenArmLo) << 16 |
+ ARM_RELOC_PAIR | rLenArmLo):
+ // ex: movw r1, :lower16:_undef [arm mode]
+ *kind = arm_movw;
+ funcRel = false;
+ top = false;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_HALF | rExtern | rLenArmHi) << 16 |
+ ARM_RELOC_PAIR | rLenArmHi):
+ // ex: movt r1, :upper16:_undef [arm mode]
+ *kind = arm_movt;
+ funcRel = false;
+ top = true;
+ thumbReloc = false;
+ break;
+ case ((ARM_RELOC_SECTDIFF | rScattered | rLength4) << 16 |
+ ARM_RELOC_PAIR | rScattered | rLength4):
+ case ((ARM_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 |
+ ARM_RELOC_PAIR | rScattered | rLength4):
+ // ex: .long _foo - .
+ pointerDiff = true;
+ break;
+ default:
+ return llvm::make_error<GenericError>("unsupported arm relocation pair");
+ }
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ uint32_t instruction = *(const ulittle32_t *)fixupContent;
+ uint32_t value;
+ uint32_t fromAddress;
+ uint32_t toAddress;
+ uint16_t instruction16;
+ uint16_t other16;
+ const lld::Atom *fromTarget;
+ Reference::Addend offsetInTo;
+ Reference::Addend offsetInFrom;
+ if (pointerDiff) {
+ toAddress = reloc1.value;
+ fromAddress = reloc2.value;
+ if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo))
+ return ec;
+ if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom))
+ return ec;
+ if (scatterable && (fromTarget != inAtom))
+ return llvm::make_error<GenericError>(
+ "SECTDIFF relocation where subtrahend label is not in atom");
+ *kind = delta32;
+ value = clearThumbBit(instruction, *target);
+ *addend = (int32_t)(value - (toAddress - fixupAddress));
+ } else if (funcRel) {
+ toAddress = reloc1.value;
+ fromAddress = reloc2.value;
+ if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo))
+ return ec;
+ if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom))
+ return ec;
+ if (fromTarget != inAtom)
+ return llvm::make_error<GenericError>("ARM_RELOC_HALF_SECTDIFF relocation"
+ " where subtrahend label is not in atom");
+ other16 = (reloc2.offset & 0xFFFF);
+ if (thumbReloc) {
+ if (top) {
+ if (!isThumbMovt(instruction))
+ return llvm::make_error<GenericError>("expected movt instruction");
+ }
+ else {
+ if (!isThumbMovw(instruction))
+ return llvm::make_error<GenericError>("expected movw instruction");
+ }
+ instruction16 = getWordFromThumbMov(instruction);
+ }
+ else {
+ if (top) {
+ if (!isArmMovt(instruction))
+ return llvm::make_error<GenericError>("expected movt instruction");
+ }
+ else {
+ if (!isArmMovw(instruction))
+ return llvm::make_error<GenericError>("expected movw instruction");
+ }
+ instruction16 = getWordFromArmMov(instruction);
+ }
+ if (top)
+ value = (instruction16 << 16) | other16;
+ else
+ value = (other16 << 16) | instruction16;
+ value = clearThumbBit(value, *target);
+ int64_t ta = (int64_t) value - (toAddress - fromAddress);
+ *addend = ta - offsetInFrom;
+ return llvm::Error::success();
+ } else {
+ uint32_t sectIndex;
+ if (thumbReloc) {
+ if (top) {
+ if (!isThumbMovt(instruction))
+ return llvm::make_error<GenericError>("expected movt instruction");
+ }
+ else {
+ if (!isThumbMovw(instruction))
+ return llvm::make_error<GenericError>("expected movw instruction");
+ }
+ instruction16 = getWordFromThumbMov(instruction);
+ }
+ else {
+ if (top) {
+ if (!isArmMovt(instruction))
+ return llvm::make_error<GenericError>("expected movt instruction");
+ }
+ else {
+ if (!isArmMovw(instruction))
+ return llvm::make_error<GenericError>("expected movw instruction");
+ }
+ instruction16 = getWordFromArmMov(instruction);
+ }
+ other16 = (reloc2.offset & 0xFFFF);
+ if (top)
+ value = (instruction16 << 16) | other16;
+ else
+ value = (other16 << 16) | instruction16;
+ if (reloc1.isExtern) {
+ if (auto ec = atomFromSymbolIndex(reloc1.symbol, target))
+ return ec;
+ *addend = value;
+ } else {
+ if (reloc1.scattered) {
+ toAddress = reloc1.value;
+ sectIndex = 0;
+ } else {
+ toAddress = value;
+ sectIndex = reloc1.symbol;
+ }
+ if (auto ec = atomFromAddr(sectIndex, toAddress, target, &offsetInTo))
+ return ec;
+ *addend = value - toAddress;
+ }
+ }
+ return llvm::Error::success();
+void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress,
+ bool &thumbMode, bool targetIsThumb) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::ARM);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ int32_t displacement;
+ uint16_t value16;
+ uint32_t value32;
+ switch (static_cast<ArmKind>(ref.kindValue())) {
+ case modeThumbCode:
+ thumbMode = true;
+ break;
+ case modeArmCode:
+ thumbMode = false;
+ break;
+ case modeData:
+ break;
+ case thumb_b22:
+ case thumb_bl22:
+ assert(thumbMode);
+ displacement = (targetAddress - (fixupAddress + 4)) + ref.addend();
+ value32 = setDisplacementInThumbBranch(*loc32, fixupAddress,
+ displacement, targetIsThumb);
+ *loc32 = value32;
+ break;
+ case thumb_movw:
+ assert(thumbMode);
+ value16 = (targetAddress + ref.addend()) & 0xFFFF;
+ if (targetIsThumb)
+ value16 |= 1;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case thumb_movt:
+ assert(thumbMode);
+ value16 = (targetAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case thumb_movw_funcRel:
+ assert(thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF;
+ if (targetIsThumb)
+ value16 |= 1;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case thumb_movt_funcRel:
+ assert(thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case arm_b24:
+ case arm_bl24:
+ assert(!thumbMode);
+ displacement = (targetAddress - (fixupAddress + 8)) + ref.addend();
+ value32 = setDisplacementInArmBranch(*loc32, displacement, targetIsThumb);
+ *loc32 = value32;
+ break;
+ case arm_movw:
+ assert(!thumbMode);
+ value16 = (targetAddress + ref.addend()) & 0xFFFF;
+ if (targetIsThumb)
+ value16 |= 1;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case arm_movt:
+ assert(!thumbMode);
+ value16 = (targetAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case arm_movw_funcRel:
+ assert(!thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF;
+ if (targetIsThumb)
+ value16 |= 1;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case arm_movt_funcRel:
+ assert(!thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case pointer32:
+ if (targetIsThumb)
+ *loc32 = targetAddress + ref.addend() + 1;
+ else
+ *loc32 = targetAddress + ref.addend();
+ break;
+ case delta32:
+ if (targetIsThumb)
+ *loc32 = targetAddress - fixupAddress + ref.addend() + 1;
+ else
+ *loc32 = targetAddress - fixupAddress + ref.addend();
+ break;
+ case lazyPointer:
+ // do nothing
+ break;
+ case lazyImmediateLocation:
+ *loc32 = ref.addend();
+ break;
+ case invalid:
+ llvm_unreachable("invalid ARM Reference Kind");
+ break;
+ }
+void ArchHandler_arm::generateAtomContent(const DefinedAtom &atom,
+ bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) {
+ // Copy raw bytes.
+ std::copy(atom.rawContent().begin(), atom.rawContent().end(),
+ atomContentBuffer.begin());
+ // Apply fix-ups.
+ bool thumbMode = false;
+ for (const Reference *ref : atom) {
+ uint32_t offset = ref->offsetInAtom();
+ const Atom *target = ref->target();
+ uint64_t targetAddress = 0;
+ bool targetIsThumb = false;
+ if (const DefinedAtom *defTarg = dyn_cast<DefinedAtom>(target)) {
+ targetAddress = findAddress(*target);
+ targetIsThumb = isThumbFunction(*defTarg);
+ }
+ uint64_t atomAddress = findAddress(atom);
+ uint64_t fixupAddress = atomAddress + offset;
+ if (relocatable) {
+ applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress,
+ targetAddress, atomAddress, thumbMode,
+ targetIsThumb);
+ } else {
+ applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress,
+ targetAddress, atomAddress, thumbMode, targetIsThumb);
+ }
+ }
+bool ArchHandler_arm::useExternalRelocationTo(const Atom &target) {
+ // Undefined symbols are referenced via external relocations.
+ if (isa<UndefinedAtom>(&target))
+ return true;
+ if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) {
+ switch (defAtom->merge()) {
+ case DefinedAtom::mergeAsTentative:
+ // Tentative definitions are referenced via external relocations.
+ return true;
+ case DefinedAtom::mergeAsWeak:
+ case DefinedAtom::mergeAsWeakAndAddressUsed:
+ // Global weak-defs are referenced via external relocations.
+ return (defAtom->scope() == DefinedAtom::scopeGlobal);
+ default:
+ break;
+ }
+ }
+ // Everything else is reference via an internal relocation.
+ return false;
+void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress,
+ bool &thumbMode,
+ bool targetIsThumb) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::ARM);
+ bool useExternalReloc = useExternalRelocationTo(*ref.target());
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ int32_t displacement;
+ uint16_t value16;
+ uint32_t value32;
+ bool targetIsUndef = isa<UndefinedAtom>(ref.target());
+ switch (static_cast<ArmKind>(ref.kindValue())) {
+ case modeThumbCode:
+ thumbMode = true;
+ break;
+ case modeArmCode:
+ thumbMode = false;
+ break;
+ case modeData:
+ break;
+ case thumb_b22:
+ case thumb_bl22:
+ assert(thumbMode);
+ if (useExternalReloc)
+ displacement = (ref.addend() - (fixupAddress + 4));
+ else
+ displacement = (targetAddress - (fixupAddress + 4)) + ref.addend();
+ value32 = setDisplacementInThumbBranch(*loc32, fixupAddress,
+ displacement,
+ targetIsUndef || targetIsThumb);
+ *loc32 = value32;
+ break;
+ case thumb_movw:
+ assert(thumbMode);
+ if (useExternalReloc)
+ value16 = ref.addend() & 0xFFFF;
+ else
+ value16 = (targetAddress + ref.addend()) & 0xFFFF;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case thumb_movt:
+ assert(thumbMode);
+ if (useExternalReloc)
+ value16 = ref.addend() >> 16;
+ else
+ value16 = (targetAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case thumb_movw_funcRel:
+ assert(thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case thumb_movt_funcRel:
+ assert(thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromThumbMov(*loc32, value16);
+ break;
+ case arm_b24:
+ case arm_bl24:
+ assert(!thumbMode);
+ if (useExternalReloc)
+ displacement = (ref.addend() - (fixupAddress + 8));
+ else
+ displacement = (targetAddress - (fixupAddress + 8)) + ref.addend();
+ value32 = setDisplacementInArmBranch(*loc32, displacement,
+ targetIsThumb);
+ *loc32 = value32;
+ break;
+ case arm_movw:
+ assert(!thumbMode);
+ if (useExternalReloc)
+ value16 = ref.addend() & 0xFFFF;
+ else
+ value16 = (targetAddress + ref.addend()) & 0xFFFF;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case arm_movt:
+ assert(!thumbMode);
+ if (useExternalReloc)
+ value16 = ref.addend() >> 16;
+ else
+ value16 = (targetAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case arm_movw_funcRel:
+ assert(!thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case arm_movt_funcRel:
+ assert(!thumbMode);
+ value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16;
+ *loc32 = setWordFromArmMov(*loc32, value16);
+ break;
+ case pointer32:
+ *loc32 = targetAddress + ref.addend();
+ break;
+ case delta32:
+ *loc32 = targetAddress - fixupAddress + ref.addend();
+ break;
+ case lazyPointer:
+ case lazyImmediateLocation:
+ // do nothing
+ break;
+ case invalid:
+ llvm_unreachable("invalid ARM Reference Kind");
+ break;
+ }
+void ArchHandler_arm::appendSectionRelocations(
+ const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom,
+ normalized::Relocations &relocs) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::ARM);
+ uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom();
+ bool useExternalReloc = useExternalRelocationTo(*ref.target());
+ uint32_t targetAtomAddress;
+ uint32_t fromAtomAddress;
+ uint16_t other16;
+ switch (static_cast<ArmKind>(ref.kindValue())) {
+ case modeThumbCode:
+ case modeArmCode:
+ case modeData:
+ // Do nothing.
+ break;
+ case thumb_b22:
+ case thumb_bl22:
+ if (useExternalReloc) {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_THUMB_RELOC_BR22 | rExtern | rPcRel | rLength4);
+ } else {
+ if (ref.addend() != 0)
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4);
+ else
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_THUMB_RELOC_BR22 | rPcRel | rLength4);
+ }
+ break;
+ case thumb_movw:
+ if (useExternalReloc) {
+ other16 = ref.addend() >> 16;
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_RELOC_HALF | rExtern | rLenThmbLo);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenThmbLo);
+ } else {
+ targetAtomAddress = addressForAtom(*ref.target());
+ if (ref.addend() != 0) {
+ other16 = (targetAtomAddress + ref.addend()) >> 16;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF | rScattered | rLenThmbLo);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenThmbLo);
+ } else {
+ other16 = (targetAtomAddress + ref.addend()) >> 16;
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_RELOC_HALF | rLenThmbLo);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenThmbLo);
+ }
+ }
+ break;
+ case thumb_movt:
+ if (useExternalReloc) {
+ other16 = ref.addend() & 0xFFFF;
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_RELOC_HALF | rExtern | rLenThmbHi);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenThmbHi);
+ } else {
+ targetAtomAddress = addressForAtom(*ref.target());
+ if (ref.addend() != 0) {
+ other16 = (targetAtomAddress + ref.addend()) & 0xFFFF;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF | rScattered | rLenThmbHi);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenThmbHi);
+ } else {
+ other16 = (targetAtomAddress + ref.addend()) & 0xFFFF;
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_RELOC_HALF | rLenThmbHi);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenThmbHi);
+ }
+ }
+ break;
+ case thumb_movw_funcRel:
+ fromAtomAddress = addressForAtom(atom);
+ targetAtomAddress = addressForAtom(*ref.target());
+ other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo);
+ appendReloc(relocs, other16, 0, fromAtomAddress,
+ ARM_RELOC_PAIR | rScattered | rLenThmbLo);
+ break;
+ case thumb_movt_funcRel:
+ fromAtomAddress = addressForAtom(atom);
+ targetAtomAddress = addressForAtom(*ref.target());
+ other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi);
+ appendReloc(relocs, other16, 0, fromAtomAddress,
+ ARM_RELOC_PAIR | rScattered | rLenThmbHi);
+ break;
+ case arm_b24:
+ case arm_bl24:
+ if (useExternalReloc) {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_RELOC_BR24 | rExtern | rPcRel | rLength4);
+ } else {
+ if (ref.addend() != 0)
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ ARM_RELOC_BR24 | rScattered | rPcRel | rLength4);
+ else
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_RELOC_BR24 | rPcRel | rLength4);
+ }
+ break;
+ case arm_movw:
+ if (useExternalReloc) {
+ other16 = ref.addend() >> 16;
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_RELOC_HALF | rExtern | rLenArmLo);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenArmLo);
+ } else {
+ targetAtomAddress = addressForAtom(*ref.target());
+ if (ref.addend() != 0) {
+ other16 = (targetAtomAddress + ref.addend()) >> 16;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF | rScattered | rLenArmLo);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenArmLo);
+ } else {
+ other16 = (targetAtomAddress + ref.addend()) >> 16;
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_RELOC_HALF | rLenArmLo);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenArmLo);
+ }
+ }
+ break;
+ case arm_movt:
+ if (useExternalReloc) {
+ other16 = ref.addend() & 0xFFFF;
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_RELOC_HALF | rExtern | rLenArmHi);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenArmHi);
+ } else {
+ targetAtomAddress = addressForAtom(*ref.target());
+ if (ref.addend() != 0) {
+ other16 = (targetAtomAddress + ref.addend()) & 0xFFFF;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF | rScattered | rLenArmHi);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenArmHi);
+ } else {
+ other16 = (targetAtomAddress + ref.addend()) & 0xFFFF;
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_RELOC_HALF | rLenArmHi);
+ appendReloc(relocs, other16, 0, 0,
+ ARM_RELOC_PAIR | rLenArmHi);
+ }
+ }
+ break;
+ case arm_movw_funcRel:
+ fromAtomAddress = addressForAtom(atom);
+ targetAtomAddress = addressForAtom(*ref.target());
+ other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo);
+ appendReloc(relocs, other16, 0, fromAtomAddress,
+ ARM_RELOC_PAIR | rScattered | rLenArmLo);
+ break;
+ case arm_movt_funcRel:
+ fromAtomAddress = addressForAtom(atom);
+ targetAtomAddress = addressForAtom(*ref.target());
+ other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF;
+ appendReloc(relocs, sectionOffset, 0, targetAtomAddress,
+ ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi);
+ appendReloc(relocs, other16, 0, fromAtomAddress,
+ ARM_RELOC_PAIR | rScattered | rLenArmHi);
+ break;
+ case pointer32:
+ if (useExternalReloc) {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM_RELOC_VANILLA | rExtern | rLength4);
+ }
+ else {
+ if (ref.addend() != 0)
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ ARM_RELOC_VANILLA | rScattered | rLength4);
+ else
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ ARM_RELOC_VANILLA | rLength4);
+ }
+ break;
+ case delta32:
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ ARM_RELOC_SECTDIFF | rScattered | rLength4);
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) +
+ ref.offsetInAtom(),
+ ARM_RELOC_PAIR | rScattered | rLength4);
+ break;
+ case lazyPointer:
+ case lazyImmediateLocation:
+ // do nothing
+ break;
+ case invalid:
+ llvm_unreachable("invalid ARM Reference Kind");
+ break;
+ }
+void ArchHandler_arm::addAdditionalReferences(MachODefinedAtom &atom) {
+ if (atom.isThumb()) {
+ atom.addReference(Reference::KindNamespace::mach_o,
+ Reference::KindArch::ARM, modeThumbCode, 0, &atom, 0);
+ }
+bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) {
+ for (const Reference *ref : atom) {
+ if (ref->offsetInAtom() != 0)
+ return false;
+ if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
+ continue;
+ assert(ref->kindArch() == Reference::KindArch::ARM);
+ if (ref->kindValue() == modeThumbCode)
+ return true;
+ }
+ return false;
+class Thumb2ToArmShimAtom : public SimpleDefinedAtom {
+ Thumb2ToArmShimAtom(MachOFile &file, StringRef targetName,
+ const DefinedAtom &target)
+ : SimpleDefinedAtom(file) {
+ addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM,
+ ArchHandler_arm::modeThumbCode, 0, this, 0);
+ addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM,
+ ArchHandler_arm::delta32, 8, &target, 0);
+ std::string name = std::string(targetName) + "$shim";
+ StringRef tmp(name);
+ _name = tmp.copy(file.allocator());
+ }
+ ~Thumb2ToArmShimAtom() override = default;
+ StringRef name() const override {
+ return _name;
+ }
+ ContentType contentType() const override {
+ return DefinedAtom::typeCode;
+ }
+ Alignment alignment() const override { return 4; }
+ uint64_t size() const override {
+ return 12;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR_X;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ static const uint8_t bytes[] =
+ { 0xDF, 0xF8, 0x04, 0xC0, // ldr ip, pc + 4
+ 0xFF, 0x44, // add ip, pc, ip
+ 0x60, 0x47, // ldr pc, [ip]
+ 0x00, 0x00, 0x00, 0x00 }; // .long target - this
+ assert(sizeof(bytes) == size());
+ return llvm::makeArrayRef(bytes, sizeof(bytes));
+ }
+ StringRef _name;
+class ArmToThumbShimAtom : public SimpleDefinedAtom {
+ ArmToThumbShimAtom(MachOFile &file, StringRef targetName,
+ const DefinedAtom &target)
+ : SimpleDefinedAtom(file) {
+ addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM,
+ ArchHandler_arm::delta32, 12, &target, 0);
+ std::string name = std::string(targetName) + "$shim";
+ StringRef tmp(name);
+ _name = tmp.copy(file.allocator());
+ }
+ ~ArmToThumbShimAtom() override = default;
+ StringRef name() const override {
+ return _name;
+ }
+ ContentType contentType() const override {
+ return DefinedAtom::typeCode;
+ }
+ Alignment alignment() const override { return 4; }
+ uint64_t size() const override {
+ return 16;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR_X;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ static const uint8_t bytes[] =
+ { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 4
+ 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip
+ 0x1C, 0xFF, 0x2F, 0xE1, // ldr pc, [ip]
+ 0x00, 0x00, 0x00, 0x00 }; // .long target - this
+ assert(sizeof(bytes) == size());
+ return llvm::makeArrayRef(bytes, sizeof(bytes));
+ }
+ StringRef _name;
+const DefinedAtom *ArchHandler_arm::createShim(MachOFile &file,
+ bool thumbToArm,
+ const DefinedAtom &target) {
+ bool isStub = (target.contentType() == DefinedAtom::typeStub);
+ StringRef targetName = isStub ? stubName(target) : target.name();
+ if (thumbToArm)
+ return new (file.allocator()) Thumb2ToArmShimAtom(file, targetName, target);
+ else
+ return new (file.allocator()) ArmToThumbShimAtom(file, targetName, target);
+std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm() {
+ return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm());
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp
new file mode 100644
index 000000000000..b9c815c5a320
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp
@@ -0,0 +1,898 @@
+//===- lib/FileFormat/MachO/ArchHandler_arm64.cpp -------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+namespace lld {
+namespace mach_o {
+using llvm::support::ulittle32_t;
+using llvm::support::ulittle64_t;
+using llvm::support::little32_t;
+using llvm::support::little64_t;
+class ArchHandler_arm64 : public ArchHandler {
+ ArchHandler_arm64() = default;
+ ~ArchHandler_arm64() override = default;
+ const Registry::KindStrings *kindStrings() override { return _sKindStrings; }
+ Reference::KindArch kindArch() override {
+ return Reference::KindArch::AArch64;
+ }
+ /// Used by GOTPass to locate GOT References
+ bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ assert(ref.kindArch() == Reference::KindArch::AArch64);
+ switch (ref.kindValue()) {
+ case gotPage21:
+ case gotOffset12:
+ canBypassGOT = true;
+ return true;
+ case delta32ToGOT:
+ case unwindCIEToPersonalityFunction:
+ case imageOffsetGot:
+ canBypassGOT = false;
+ return true;
+ default:
+ return false;
+ }
+ }
+ /// Used by GOTPass to update GOT References.
+ void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override {
+ // If GOT slot was instanciated, transform:
+ // gotPage21/gotOffset12 -> page21/offset12scale8
+ // If GOT slot optimized away, transform:
+ // gotPage21/gotOffset12 -> page21/addOffset12
+ assert(ref->kindNamespace() == Reference::KindNamespace::mach_o);
+ assert(ref->kindArch() == Reference::KindArch::AArch64);
+ switch (ref->kindValue()) {
+ case gotPage21:
+ const_cast<Reference *>(ref)->setKindValue(page21);
+ break;
+ case gotOffset12:
+ const_cast<Reference *>(ref)->setKindValue(targetNowGOT ?
+ offset12scale8 : addOffset12);
+ break;
+ case delta32ToGOT:
+ const_cast<Reference *>(ref)->setKindValue(delta32);
+ break;
+ case imageOffsetGot:
+ const_cast<Reference *>(ref)->setKindValue(imageOffset);
+ break;
+ default:
+ llvm_unreachable("Not a GOT reference");
+ }
+ }
+ const StubInfo &stubInfo() override { return _sStubInfo; }
+ bool isCallSite(const Reference &) override;
+ bool isNonCallBranch(const Reference &) override {
+ return false;
+ }
+ bool isPointer(const Reference &) override;
+ bool isPairedReloc(const normalized::Relocation &) override;
+ bool needsCompactUnwind() override {
+ return true;
+ }
+ Reference::KindValue imageOffsetKind() override {
+ return imageOffset;
+ }
+ Reference::KindValue imageOffsetKindIndirect() override {
+ return imageOffsetGot;
+ }
+ Reference::KindValue unwindRefToPersonalityFunctionKind() override {
+ return unwindCIEToPersonalityFunction;
+ }
+ Reference::KindValue unwindRefToCIEKind() override {
+ return negDelta32;
+ }
+ Reference::KindValue unwindRefToFunctionKind() override {
+ return unwindFDEToFunction;
+ }
+ Reference::KindValue unwindRefToEhFrameKind() override {
+ return unwindInfoToEhFrame;
+ }
+ Reference::KindValue pointerKind() override {
+ return pointer64;
+ }
+ Reference::KindValue lazyImmediateLocationKind() override {
+ return lazyImmediateLocation;
+ }
+ uint32_t dwarfCompactUnwindType() override {
+ return 0x03000000;
+ }
+ llvm::Error getReferenceInfo(const normalized::Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBig,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ llvm::Error
+ getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBig, bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override {
+ return (atom->contentType() == DefinedAtom::typeCString);
+ }
+ void generateAtomContent(const DefinedAtom &atom, bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) override;
+ void appendSectionRelocations(const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom,
+ normalized::Relocations &relocs) override;
+ static const Registry::KindStrings _sKindStrings[];
+ static const StubInfo _sStubInfo;
+ enum Arm64Kind : Reference::KindValue {
+ invalid, /// for error condition
+ // Kinds found in mach-o .o files:
+ branch26, /// ex: bl _foo
+ page21, /// ex: adrp x1, _foo@PAGE
+ offset12, /// ex: ldrb w0, [x1, _foo@PAGEOFF]
+ offset12scale2, /// ex: ldrs w0, [x1, _foo@PAGEOFF]
+ offset12scale4, /// ex: ldr w0, [x1, _foo@PAGEOFF]
+ offset12scale8, /// ex: ldr x0, [x1, _foo@PAGEOFF]
+ offset12scale16, /// ex: ldr q0, [x1, _foo@PAGEOFF]
+ gotPage21, /// ex: adrp x1, _foo@GOTPAGE
+ gotOffset12, /// ex: ldr w0, [x1, _foo@GOTPAGEOFF]
+ tlvPage21, /// ex: adrp x1, _foo@TLVPAGE
+ tlvOffset12, /// ex: ldr w0, [x1, _foo@TLVPAGEOFF]
+ pointer64, /// ex: .quad _foo
+ delta64, /// ex: .quad _foo - .
+ delta32, /// ex: .long _foo - .
+ negDelta32, /// ex: .long . - _foo
+ pointer64ToGOT, /// ex: .quad _foo@GOT
+ delta32ToGOT, /// ex: .long _foo@GOT - .
+ // Kinds introduced by Passes:
+ addOffset12, /// Location contains LDR to change into ADD.
+ lazyPointer, /// Location contains a lazy pointer.
+ lazyImmediateLocation, /// Location contains immediate value used in stub.
+ imageOffset, /// Location contains offset of atom in final image
+ imageOffsetGot, /// Location contains offset of GOT entry for atom in
+ /// final image (typically personality function).
+ unwindCIEToPersonalityFunction, /// Nearly delta32ToGOT, but cannot be
+ /// rematerialized in relocatable object
+ /// (yay for implicit contracts!).
+ unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in
+ /// relocatable object (yay for implicit contracts!).
+ unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to
+ /// refer to __eh_frame entry.
+ };
+ void applyFixupFinal(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress, uint64_t targetAddress,
+ uint64_t inAtomAddress, uint64_t imageBaseAddress,
+ FindAddressForAtom findSectionAddress);
+ void applyFixupRelocatable(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress, uint64_t targetAddress,
+ uint64_t inAtomAddress, bool targetUnnamed);
+ // Utility functions for inspecting/updating instructions.
+ static uint32_t setDisplacementInBranch26(uint32_t instr, int32_t disp);
+ static uint32_t setDisplacementInADRP(uint32_t instr, int64_t disp);
+ static Arm64Kind offset12KindFromInstruction(uint32_t instr);
+ static uint32_t setImm12(uint32_t instr, uint32_t offset);
+const Registry::KindStrings ArchHandler_arm64::_sKindStrings[] = {
+ LLD_KIND_STRING_ENTRY(offset12scale2),
+ LLD_KIND_STRING_ENTRY(offset12scale4),
+ LLD_KIND_STRING_ENTRY(offset12scale8),
+ LLD_KIND_STRING_ENTRY(offset12scale16),
+ LLD_KIND_STRING_ENTRY(lazyImmediateLocation),
+ LLD_KIND_STRING_ENTRY(imageOffsetGot),
+ LLD_KIND_STRING_ENTRY(unwindCIEToPersonalityFunction),
+ LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame),
+const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = {
+ "dyld_stub_binder",
+ // Lazy pointer references
+ { Reference::KindArch::AArch64, pointer64, 0, 0 },
+ { Reference::KindArch::AArch64, lazyPointer, 0, 0 },
+ // GOT pointer to dyld_stub_binder
+ { Reference::KindArch::AArch64, pointer64, 0, 0 },
+ // arm64 code alignment 2^1
+ 1,
+ // Stub size and code
+ 12,
+ { 0x10, 0x00, 0x00, 0x90, // ADRP X16, lazy_pointer@page
+ 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16, lazy_pointer@pageoff]
+ 0x00, 0x02, 0x1F, 0xD6 }, // BR X16
+ { Reference::KindArch::AArch64, page21, 0, 0 },
+ { true, offset12scale8, 4, 0 },
+ // Stub Helper size and code
+ 12,
+ { 0x50, 0x00, 0x00, 0x18, // LDR W16, L0
+ 0x00, 0x00, 0x00, 0x14, // LDR B helperhelper
+ 0x00, 0x00, 0x00, 0x00 }, // L0: .long 0
+ { Reference::KindArch::AArch64, lazyImmediateLocation, 8, 0 },
+ { Reference::KindArch::AArch64, branch26, 4, 0 },
+ // Stub helper image cache content type
+ DefinedAtom::typeGOT,
+ // Stub Helper-Common size and code
+ 24,
+ // Stub helper alignment
+ 2,
+ { 0x11, 0x00, 0x00, 0x90, // ADRP X17, dyld_ImageLoaderCache@page
+ 0x31, 0x02, 0x00, 0x91, // ADD X17, X17, dyld_ImageLoaderCache@pageoff
+ 0xF0, 0x47, 0xBF, 0xA9, // STP X16/X17, [SP, #-16]!
+ 0x10, 0x00, 0x00, 0x90, // ADRP X16, _fast_lazy_bind@page
+ 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16,_fast_lazy_bind@pageoff]
+ 0x00, 0x02, 0x1F, 0xD6 }, // BR X16
+ { Reference::KindArch::AArch64, page21, 0, 0 },
+ { true, offset12, 4, 0 },
+ { Reference::KindArch::AArch64, page21, 12, 0 },
+ { true, offset12scale8, 16, 0 }
+bool ArchHandler_arm64::isCallSite(const Reference &ref) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ assert(ref.kindArch() == Reference::KindArch::AArch64);
+ return (ref.kindValue() == branch26);
+bool ArchHandler_arm64::isPointer(const Reference &ref) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ assert(ref.kindArch() == Reference::KindArch::AArch64);
+ Reference::KindValue kind = ref.kindValue();
+ return (kind == pointer64);
+bool ArchHandler_arm64::isPairedReloc(const Relocation &r) {
+ return ((r.type == ARM64_RELOC_ADDEND) || (r.type == ARM64_RELOC_SUBTRACTOR));
+uint32_t ArchHandler_arm64::setDisplacementInBranch26(uint32_t instr,
+ int32_t displacement) {
+ assert((displacement <= 134217727) && (displacement > (-134217728)) &&
+ "arm64 branch out of range");
+ return (instr & 0xFC000000) | ((uint32_t)(displacement >> 2) & 0x03FFFFFF);
+uint32_t ArchHandler_arm64::setDisplacementInADRP(uint32_t instruction,
+ int64_t displacement) {
+ assert((displacement <= 0x100000000LL) && (displacement > (-0x100000000LL)) &&
+ "arm64 ADRP out of range");
+ assert(((instruction & 0x9F000000) == 0x90000000) &&
+ "reloc not on ADRP instruction");
+ uint32_t immhi = (displacement >> 9) & (0x00FFFFE0);
+ uint32_t immlo = (displacement << 17) & (0x60000000);
+ return (instruction & 0x9F00001F) | immlo | immhi;
+ArchHandler_arm64::offset12KindFromInstruction(uint32_t instruction) {
+ if (instruction & 0x08000000) {
+ switch ((instruction >> 30) & 0x3) {
+ case 0:
+ if ((instruction & 0x04800000) == 0x04800000)
+ return offset12scale16;
+ return offset12;
+ case 1:
+ return offset12scale2;
+ case 2:
+ return offset12scale4;
+ case 3:
+ return offset12scale8;
+ }
+ }
+ return offset12;
+uint32_t ArchHandler_arm64::setImm12(uint32_t instruction, uint32_t offset) {
+ assert(((offset & 0xFFFFF000) == 0) && "imm12 offset out of range");
+ uint32_t imm12 = offset << 10;
+ return (instruction & 0xFFC003FF) | imm12;
+llvm::Error ArchHandler_arm64::getReferenceInfo(
+ const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool isBig,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind,
+ const lld::Atom **target, Reference::Addend *addend) {
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ switch (relocPattern(reloc)) {
+ case ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4:
+ // ex: bl _foo
+ *kind = branch26;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4:
+ // ex: adrp x1, _foo@PAGE
+ *kind = page21;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4:
+ // ex: ldr x0, [x1, _foo@PAGEOFF]
+ *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent);
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4:
+ // ex: adrp x1, _foo@GOTPAGE
+ *kind = gotPage21;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4:
+ // ex: ldr x0, [x1, _foo@GOTPAGEOFF]
+ *kind = gotOffset12;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4:
+ // ex: adrp x1, _foo@TLVPAGE
+ *kind = tlvPage21;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4:
+ // ex: ldr x0, [x1, _foo@TLVPAGEOFF]
+ *kind = tlvOffset12;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_UNSIGNED | rExtern | rLength8:
+ // ex: .quad _foo + N
+ *kind = pointer64;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = *(const little64_t *)fixupContent;
+ return llvm::Error::success();
+ case ARM64_RELOC_UNSIGNED | rLength8:
+ // ex: .quad Lfoo + N
+ *kind = pointer64;
+ return atomFromAddress(reloc.symbol, *(const little64_t *)fixupContent,
+ target, addend);
+ case ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8:
+ // ex: .quad _foo@GOT
+ *kind = pointer64ToGOT;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4:
+ // ex: .long _foo@GOT - .
+ // If we are in an .eh_frame section, then the kind of the relocation should
+ // not be delta32ToGOT. It may instead be unwindCIEToPersonalityFunction.
+ if (inAtom->contentType() == DefinedAtom::typeCFI)
+ *kind = unwindCIEToPersonalityFunction;
+ else
+ *kind = delta32ToGOT;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = 0;
+ return llvm::Error::success();
+ default:
+ return llvm::make_error<GenericError>("unsupported arm64 relocation type");
+ }
+llvm::Error ArchHandler_arm64::getPairReferenceInfo(
+ const normalized::Relocation &reloc1, const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress,
+ bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind,
+ const lld::Atom **target, Reference::Addend *addend) {
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) {
+ case ((ARM64_RELOC_ADDEND | rLength4) << 16 |
+ ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4):
+ // ex: bl _foo+8
+ *kind = branch26;
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ *addend = reloc1.symbol;
+ return llvm::Error::success();
+ case ((ARM64_RELOC_ADDEND | rLength4) << 16 |
+ ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4):
+ // ex: adrp x1, _foo@PAGE
+ *kind = page21;
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ *addend = reloc1.symbol;
+ return llvm::Error::success();
+ case ((ARM64_RELOC_ADDEND | rLength4) << 16 |
+ ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): {
+ // ex: ldr w0, [x1, _foo@PAGEOFF]
+ uint32_t cont32 = (int32_t)*(const little32_t *)fixupContent;
+ *kind = offset12KindFromInstruction(cont32);
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ *addend = reloc1.symbol;
+ return llvm::Error::success();
+ }
+ case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 |
+ ARM64_RELOC_UNSIGNED | rExtern | rLength8):
+ // ex: .quad _foo - .
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ // If we are in an .eh_frame section, then the kind of the relocation should
+ // not be delta64. It may instead be unwindFDEToFunction.
+ if (inAtom->contentType() == DefinedAtom::typeCFI)
+ *kind = unwindFDEToFunction;
+ else
+ *kind = delta64;
+ // The offsets of the 2 relocations must match
+ if (reloc1.offset != reloc2.offset)
+ return llvm::make_error<GenericError>(
+ "paired relocs must have the same offset");
+ *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom;
+ return llvm::Error::success();
+ case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 |
+ ARM64_RELOC_UNSIGNED | rExtern | rLength4):
+ // ex: .quad _foo - .
+ *kind = delta32;
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom;
+ return llvm::Error::success();
+ default:
+ return llvm::make_error<GenericError>("unsupported arm64 relocation pair");
+ }
+void ArchHandler_arm64::generateAtomContent(
+ const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) {
+ // Copy raw bytes.
+ std::copy(atom.rawContent().begin(), atom.rawContent().end(),
+ atomContentBuffer.begin());
+ // Apply fix-ups.
+#ifndef NDEBUG
+ if (atom.begin() != atom.end()) {
+ DEBUG_WITH_TYPE("atom-content", llvm::dbgs()
+ << "Applying fixups to atom:\n"
+ << " address="
+ << llvm::format(" 0x%09lX", &atom)
+ << ", file=#"
+ << atom.file().ordinal()
+ << ", atom=#"
+ << atom.ordinal()
+ << ", name="
+ << atom.name()
+ << ", type="
+ << atom.contentType()
+ << "\n");
+ }
+ for (const Reference *ref : atom) {
+ uint32_t offset = ref->offsetInAtom();
+ const Atom *target = ref->target();
+ bool targetUnnamed = target->name().empty();
+ uint64_t targetAddress = 0;
+ if (isa<DefinedAtom>(target))
+ targetAddress = findAddress(*target);
+ uint64_t atomAddress = findAddress(atom);
+ uint64_t fixupAddress = atomAddress + offset;
+ if (relocatable) {
+ applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress,
+ targetAddress, atomAddress, targetUnnamed);
+ } else {
+ applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress,
+ targetAddress, atomAddress, imageBaseAddress,
+ findSectionAddress);
+ }
+ }
+void ArchHandler_arm64::applyFixupFinal(const Reference &ref, uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress,
+ uint64_t imageBaseAddress,
+ FindAddressForAtom findSectionAddress) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::AArch64);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc);
+ int32_t displacement;
+ uint32_t instruction;
+ uint32_t value32;
+ uint32_t value64;
+ switch (static_cast<Arm64Kind>(ref.kindValue())) {
+ case branch26:
+ displacement = (targetAddress - fixupAddress) + ref.addend();
+ *loc32 = setDisplacementInBranch26(*loc32, displacement);
+ return;
+ case page21:
+ case gotPage21:
+ case tlvPage21:
+ displacement =
+ ((targetAddress + ref.addend()) & (-4096)) - (fixupAddress & (-4096));
+ *loc32 = setDisplacementInADRP(*loc32, displacement);
+ return;
+ case offset12:
+ case gotOffset12:
+ case tlvOffset12:
+ displacement = (targetAddress + ref.addend()) & 0x00000FFF;
+ *loc32 = setImm12(*loc32, displacement);
+ return;
+ case offset12scale2:
+ displacement = (targetAddress + ref.addend()) & 0x00000FFF;
+ assert(((displacement & 0x1) == 0) &&
+ "scaled imm12 not accessing 2-byte aligneds");
+ *loc32 = setImm12(*loc32, displacement >> 1);
+ return;
+ case offset12scale4:
+ displacement = (targetAddress + ref.addend()) & 0x00000FFF;
+ assert(((displacement & 0x3) == 0) &&
+ "scaled imm12 not accessing 4-byte aligned");
+ *loc32 = setImm12(*loc32, displacement >> 2);
+ return;
+ case offset12scale8:
+ displacement = (targetAddress + ref.addend()) & 0x00000FFF;
+ assert(((displacement & 0x7) == 0) &&
+ "scaled imm12 not accessing 8-byte aligned");
+ *loc32 = setImm12(*loc32, displacement >> 3);
+ return;
+ case offset12scale16:
+ displacement = (targetAddress + ref.addend()) & 0x00000FFF;
+ assert(((displacement & 0xF) == 0) &&
+ "scaled imm12 not accessing 16-byte aligned");
+ *loc32 = setImm12(*loc32, displacement >> 4);
+ return;
+ case addOffset12:
+ instruction = *loc32;
+ assert(((instruction & 0xFFC00000) == 0xF9400000) &&
+ "GOT reloc is not an LDR instruction");
+ displacement = (targetAddress + ref.addend()) & 0x00000FFF;
+ value32 = 0x91000000 | (instruction & 0x000003FF);
+ instruction = setImm12(value32, displacement);
+ *loc32 = instruction;
+ return;
+ case pointer64:
+ case pointer64ToGOT:
+ *loc64 = targetAddress + ref.addend();
+ return;
+ case delta64:
+ case unwindFDEToFunction:
+ *loc64 = (targetAddress - fixupAddress) + ref.addend();
+ return;
+ case delta32:
+ case delta32ToGOT:
+ case unwindCIEToPersonalityFunction:
+ *loc32 = (targetAddress - fixupAddress) + ref.addend();
+ return;
+ case negDelta32:
+ *loc32 = fixupAddress - targetAddress + ref.addend();
+ return;
+ case lazyPointer:
+ // Do nothing
+ return;
+ case lazyImmediateLocation:
+ *loc32 = ref.addend();
+ return;
+ case imageOffset:
+ *loc32 = (targetAddress - imageBaseAddress) + ref.addend();
+ return;
+ case imageOffsetGot:
+ llvm_unreachable("imageOffsetGot should have been changed to imageOffset");
+ break;
+ case unwindInfoToEhFrame:
+ value64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend();
+ assert(value64 < 0xffffffU && "offset in __eh_frame too large");
+ *loc32 = (*loc32 & 0xff000000U) | value64;
+ return;
+ case invalid:
+ // Fall into llvm_unreachable().
+ break;
+ }
+ llvm_unreachable("invalid arm64 Reference Kind");
+void ArchHandler_arm64::applyFixupRelocatable(const Reference &ref,
+ uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress,
+ bool targetUnnamed) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::AArch64);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc);
+ switch (static_cast<Arm64Kind>(ref.kindValue())) {
+ case branch26:
+ *loc32 = setDisplacementInBranch26(*loc32, 0);
+ return;
+ case page21:
+ case gotPage21:
+ case tlvPage21:
+ *loc32 = setDisplacementInADRP(*loc32, 0);
+ return;
+ case offset12:
+ case offset12scale2:
+ case offset12scale4:
+ case offset12scale8:
+ case offset12scale16:
+ case gotOffset12:
+ case tlvOffset12:
+ *loc32 = setImm12(*loc32, 0);
+ return;
+ case pointer64:
+ if (targetUnnamed)
+ *loc64 = targetAddress + ref.addend();
+ else
+ *loc64 = ref.addend();
+ return;
+ case delta64:
+ *loc64 = ref.addend() + inAtomAddress - fixupAddress;
+ return;
+ case unwindFDEToFunction:
+ // We don't emit unwindFDEToFunction in -r mode as they are implicitly
+ // generated from the data in the __eh_frame section. So here we need
+ // to use the targetAddress so that we can generate the full relocation
+ // when we parse again later.
+ *loc64 = targetAddress - fixupAddress;
+ return;
+ case delta32:
+ *loc32 = ref.addend() + inAtomAddress - fixupAddress;
+ return;
+ case negDelta32:
+ // We don't emit negDelta32 in -r mode as they are implicitly
+ // generated from the data in the __eh_frame section. So here we need
+ // to use the targetAddress so that we can generate the full relocation
+ // when we parse again later.
+ *loc32 = fixupAddress - targetAddress + ref.addend();
+ return;
+ case pointer64ToGOT:
+ *loc64 = 0;
+ return;
+ case delta32ToGOT:
+ *loc32 = inAtomAddress - fixupAddress;
+ return;
+ case unwindCIEToPersonalityFunction:
+ // We don't emit unwindCIEToPersonalityFunction in -r mode as they are
+ // implicitly generated from the data in the __eh_frame section. So here we
+ // need to use the targetAddress so that we can generate the full relocation
+ // when we parse again later.
+ *loc32 = targetAddress - fixupAddress;
+ return;
+ case addOffset12:
+ llvm_unreachable("lazy reference kind implies GOT pass was run");
+ case lazyPointer:
+ case lazyImmediateLocation:
+ llvm_unreachable("lazy reference kind implies Stubs pass was run");
+ case imageOffset:
+ case imageOffsetGot:
+ case unwindInfoToEhFrame:
+ llvm_unreachable("fixup implies __unwind_info");
+ return;
+ case invalid:
+ // Fall into llvm_unreachable().
+ break;
+ }
+ llvm_unreachable("unknown arm64 Reference Kind");
+void ArchHandler_arm64::appendSectionRelocations(
+ const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom, normalized::Relocations &relocs) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::AArch64);
+ uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom();
+ switch (static_cast<Arm64Kind>(ref.kindValue())) {
+ case branch26:
+ if (ref.addend()) {
+ appendReloc(relocs, sectionOffset, ref.addend(), 0,
+ ARM64_RELOC_ADDEND | rLength4);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4);
+ } else {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4);
+ }
+ return;
+ case page21:
+ if (ref.addend()) {
+ appendReloc(relocs, sectionOffset, ref.addend(), 0,
+ ARM64_RELOC_ADDEND | rLength4);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4);
+ } else {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4);
+ }
+ return;
+ case offset12:
+ case offset12scale2:
+ case offset12scale4:
+ case offset12scale8:
+ case offset12scale16:
+ if (ref.addend()) {
+ appendReloc(relocs, sectionOffset, ref.addend(), 0,
+ ARM64_RELOC_ADDEND | rLength4);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_PAGEOFF12 | rExtern | rLength4);
+ } else {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_PAGEOFF12 | rExtern | rLength4);
+ }
+ return;
+ case gotPage21:
+ assert(ref.addend() == 0);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4);
+ return;
+ case gotOffset12:
+ assert(ref.addend() == 0);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4);
+ return;
+ case tlvPage21:
+ assert(ref.addend() == 0);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4);
+ return;
+ case tlvOffset12:
+ assert(ref.addend() == 0);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4);
+ return;
+ case pointer64:
+ if (ref.target()->name().empty())
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_UNSIGNED | rLength8);
+ else
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_UNSIGNED | rExtern | rLength8);
+ return;
+ case delta64:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ ARM64_RELOC_SUBTRACTOR | rExtern | rLength8);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_UNSIGNED | rExtern | rLength8);
+ return;
+ case delta32:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ ARM64_RELOC_SUBTRACTOR | rExtern | rLength4 );
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_UNSIGNED | rExtern | rLength4 );
+ return;
+ case pointer64ToGOT:
+ assert(ref.addend() == 0);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8);
+ return;
+ case delta32ToGOT:
+ assert(ref.addend() == 0);
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4);
+ return;
+ case addOffset12:
+ llvm_unreachable("lazy reference kind implies GOT pass was run");
+ case lazyPointer:
+ case lazyImmediateLocation:
+ llvm_unreachable("lazy reference kind implies Stubs pass was run");
+ case imageOffset:
+ case imageOffsetGot:
+ llvm_unreachable("deltas from mach_header can only be in final images");
+ case unwindCIEToPersonalityFunction:
+ case unwindFDEToFunction:
+ case unwindInfoToEhFrame:
+ case negDelta32:
+ // Do nothing.
+ return;
+ case invalid:
+ // Fall into llvm_unreachable().
+ break;
+ }
+ llvm_unreachable("unknown arm64 Reference Kind");
+std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm64() {
+ return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm64());
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp
new file mode 100644
index 000000000000..a2c68092724d
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp
@@ -0,0 +1,644 @@
+//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+namespace lld {
+namespace mach_o {
+using llvm::support::ulittle16_t;
+using llvm::support::ulittle32_t;
+using llvm::support::little16_t;
+using llvm::support::little32_t;
+class ArchHandler_x86 : public ArchHandler {
+ ArchHandler_x86() = default;
+ ~ArchHandler_x86() override = default;
+ const Registry::KindStrings *kindStrings() override { return _sKindStrings; }
+ Reference::KindArch kindArch() override { return Reference::KindArch::x86; }
+ const StubInfo &stubInfo() override { return _sStubInfo; }
+ bool isCallSite(const Reference &) override;
+ bool isNonCallBranch(const Reference &) override {
+ return false;
+ }
+ bool isPointer(const Reference &) override;
+ bool isPairedReloc(const normalized::Relocation &) override;
+ bool needsCompactUnwind() override {
+ return false;
+ }
+ Reference::KindValue imageOffsetKind() override {
+ return invalid;
+ }
+ Reference::KindValue imageOffsetKindIndirect() override {
+ return invalid;
+ }
+ Reference::KindValue unwindRefToPersonalityFunctionKind() override {
+ return invalid;
+ }
+ Reference::KindValue unwindRefToCIEKind() override {
+ return negDelta32;
+ }
+ Reference::KindValue unwindRefToFunctionKind() override{
+ return delta32;
+ }
+ Reference::KindValue lazyImmediateLocationKind() override {
+ return lazyImmediateLocation;
+ }
+ Reference::KindValue unwindRefToEhFrameKind() override {
+ return invalid;
+ }
+ Reference::KindValue pointerKind() override {
+ return invalid;
+ }
+ uint32_t dwarfCompactUnwindType() override {
+ return 0x04000000U;
+ }
+ llvm::Error getReferenceInfo(const normalized::Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ llvm::Error
+ getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap, bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ void generateAtomContent(const DefinedAtom &atom, bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) override;
+ void appendSectionRelocations(const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom,
+ normalized::Relocations &relocs) override;
+ bool isDataInCodeTransition(Reference::KindValue refKind) override {
+ return refKind == modeCode || refKind == modeData;
+ }
+ Reference::KindValue dataInCodeTransitionStart(
+ const MachODefinedAtom &atom) override {
+ return modeData;
+ }
+ Reference::KindValue dataInCodeTransitionEnd(
+ const MachODefinedAtom &atom) override {
+ return modeCode;
+ }
+ static const Registry::KindStrings _sKindStrings[];
+ static const StubInfo _sStubInfo;
+ enum X86Kind : Reference::KindValue {
+ invalid, /// for error condition
+ modeCode, /// Content starting at this offset is code.
+ modeData, /// Content starting at this offset is data.
+ // Kinds found in mach-o .o files:
+ branch32, /// ex: call _foo
+ branch16, /// ex: callw _foo
+ abs32, /// ex: movl _foo, %eax
+ funcRel32, /// ex: movl _foo-L1(%eax), %eax
+ pointer32, /// ex: .long _foo
+ delta32, /// ex: .long _foo - .
+ negDelta32, /// ex: .long . - _foo
+ // Kinds introduced by Passes:
+ lazyPointer, /// Location contains a lazy pointer.
+ lazyImmediateLocation, /// Location contains immediate value used in stub.
+ };
+ static bool useExternalRelocationTo(const Atom &target);
+ void applyFixupFinal(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress, uint64_t targetAddress,
+ uint64_t inAtomAddress);
+ void applyFixupRelocatable(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress);
+// ArchHandler_x86
+const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = {
+ LLD_KIND_STRING_ENTRY(lazyImmediateLocation),
+const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = {
+ "dyld_stub_binder",
+ // Lazy pointer references
+ { Reference::KindArch::x86, pointer32, 0, 0 },
+ { Reference::KindArch::x86, lazyPointer, 0, 0 },
+ // GOT pointer to dyld_stub_binder
+ { Reference::KindArch::x86, pointer32, 0, 0 },
+ // x86 code alignment
+ 1,
+ // Stub size and code
+ 6,
+ { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer
+ { Reference::KindArch::x86, abs32, 2, 0 },
+ { false, 0, 0, 0 },
+ // Stub Helper size and code
+ 10,
+ { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset
+ 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper
+ { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 },
+ { Reference::KindArch::x86, branch32, 6, 0 },
+ // Stub helper image cache content type
+ DefinedAtom::typeNonLazyPointer,
+ // Stub Helper-Common size and code
+ 12,
+ // Stub helper alignment
+ 2,
+ { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache
+ 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind
+ 0x90 }, // nop
+ { Reference::KindArch::x86, abs32, 1, 0 },
+ { false, 0, 0, 0 },
+ { Reference::KindArch::x86, abs32, 7, 0 },
+ { false, 0, 0, 0 }
+bool ArchHandler_x86::isCallSite(const Reference &ref) {
+ return (ref.kindValue() == branch32);
+bool ArchHandler_x86::isPointer(const Reference &ref) {
+ return (ref.kindValue() == pointer32);
+bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) {
+ if (!reloc.scattered)
+ return false;
+ return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) ||
+ (reloc.type == GENERIC_RELOC_SECTDIFF);
+ArchHandler_x86::getReferenceInfo(const Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) {
+ DefinedAtom::ContentPermissions perms;
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ uint64_t targetAddress;
+ switch (relocPattern(reloc)) {
+ case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4:
+ // ex: call _foo (and _foo undefined)
+ *kind = branch32;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent;
+ break;
+ case GENERIC_RELOC_VANILLA | rPcRel | rLength4:
+ // ex: call _foo (and _foo defined)
+ *kind = branch32;
+ targetAddress =
+ fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ break;
+ case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4:
+ // ex: call _foo+n (and _foo defined)
+ *kind = branch32;
+ targetAddress =
+ fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent;
+ if (auto ec = atomFromAddress(0, reloc.value, target, addend))
+ return ec;
+ *addend = targetAddress - reloc.value;
+ break;
+ case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2:
+ // ex: callw _foo (and _foo undefined)
+ *kind = branch16;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent;
+ break;
+ case GENERIC_RELOC_VANILLA | rPcRel | rLength2:
+ // ex: callw _foo (and _foo defined)
+ *kind = branch16;
+ targetAddress =
+ fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ break;
+ case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2:
+ // ex: callw _foo+n (and _foo defined)
+ *kind = branch16;
+ targetAddress =
+ fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent;
+ if (auto ec = atomFromAddress(0, reloc.value, target, addend))
+ return ec;
+ *addend = targetAddress - reloc.value;
+ break;
+ case GENERIC_RELOC_VANILLA | rExtern | rLength4:
+ // ex: movl _foo, %eax (and _foo undefined)
+ // ex: .long _foo (and _foo undefined)
+ perms = inAtom->permissions();
+ *kind =
+ ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32
+ : pointer32;
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = *(const ulittle32_t *)fixupContent;
+ break;
+ case GENERIC_RELOC_VANILLA | rLength4:
+ // ex: movl _foo, %eax (and _foo defined)
+ // ex: .long _foo (and _foo defined)
+ perms = inAtom->permissions();
+ *kind =
+ ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32
+ : pointer32;
+ targetAddress = *(const ulittle32_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ break;
+ case GENERIC_RELOC_VANILLA | rScattered | rLength4:
+ // ex: .long _foo+n (and _foo defined)
+ perms = inAtom->permissions();
+ *kind =
+ ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32
+ : pointer32;
+ if (auto ec = atomFromAddress(0, reloc.value, target, addend))
+ return ec;
+ *addend = *(const ulittle32_t *)fixupContent - reloc.value;
+ break;
+ default:
+ return llvm::make_error<GenericError>("unsupported i386 relocation type");
+ }
+ return llvm::Error::success();
+ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddr,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) {
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ DefinedAtom::ContentPermissions perms = inAtom->permissions();
+ uint32_t fromAddress;
+ uint32_t toAddress;
+ uint32_t value;
+ const lld::Atom *fromTarget;
+ Reference::Addend offsetInTo;
+ Reference::Addend offsetInFrom;
+ switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) {
+ case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 |
+ GENERIC_RELOC_PAIR | rScattered | rLength4):
+ case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 |
+ GENERIC_RELOC_PAIR | rScattered | rLength4):
+ toAddress = reloc1.value;
+ fromAddress = reloc2.value;
+ value = *(const little32_t *)fixupContent;
+ if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo))
+ return ec;
+ if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom))
+ return ec;
+ if (fromTarget != inAtom) {
+ if (*target != inAtom)
+ return llvm::make_error<GenericError>(
+ "SECTDIFF relocation where neither target is in atom");
+ *kind = negDelta32;
+ *addend = toAddress - value - fromAddress;
+ *target = fromTarget;
+ } else {
+ if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) {
+ // SECTDIFF relocations are used in i386 codegen where the function
+ // prolog does a CALL to the next instruction which POPs the return
+ // address into EBX which becomes the pic-base register. The POP
+ // instruction is label the used for the subtrahend in expressions.
+ // The funcRel32 kind represents the 32-bit delta to some symbol from
+ // the start of the function (atom) containing the funcRel32.
+ *kind = funcRel32;
+ uint32_t ta = fromAddress + value - toAddress;
+ *addend = ta - offsetInFrom;
+ } else {
+ *kind = delta32;
+ *addend = fromAddress + value - toAddress;
+ }
+ }
+ return llvm::Error::success();
+ break;
+ default:
+ return llvm::make_error<GenericError>("unsupported i386 relocation type");
+ }
+void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom,
+ bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) {
+ // Copy raw bytes.
+ std::copy(atom.rawContent().begin(), atom.rawContent().end(),
+ atomContentBuffer.begin());
+ // Apply fix-ups.
+ for (const Reference *ref : atom) {
+ uint32_t offset = ref->offsetInAtom();
+ const Atom *target = ref->target();
+ uint64_t targetAddress = 0;
+ if (isa<DefinedAtom>(target))
+ targetAddress = findAddress(*target);
+ uint64_t atomAddress = findAddress(atom);
+ uint64_t fixupAddress = atomAddress + offset;
+ if (relocatable) {
+ applyFixupRelocatable(*ref, &atomContentBuffer[offset],
+ fixupAddress, targetAddress,
+ atomAddress);
+ } else {
+ applyFixupFinal(*ref, &atomContentBuffer[offset],
+ fixupAddress, targetAddress,
+ atomAddress);
+ }
+ }
+void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::x86);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ switch (static_cast<X86Kind>(ref.kindValue())) {
+ case branch32:
+ *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend();
+ break;
+ case branch16:
+ *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend();
+ break;
+ case pointer32:
+ case abs32:
+ *loc32 = targetAddress + ref.addend();
+ break;
+ case funcRel32:
+ *loc32 = targetAddress - inAtomAddress + ref.addend();
+ break;
+ case delta32:
+ *loc32 = targetAddress - fixupAddress + ref.addend();
+ break;
+ case negDelta32:
+ *loc32 = fixupAddress - targetAddress + ref.addend();
+ break;
+ case modeCode:
+ case modeData:
+ case lazyPointer:
+ // do nothing
+ break;
+ case lazyImmediateLocation:
+ *loc32 = ref.addend();
+ break;
+ case invalid:
+ llvm_unreachable("invalid x86 Reference Kind");
+ break;
+ }
+void ArchHandler_x86::applyFixupRelocatable(const Reference &ref,
+ uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::x86);
+ bool useExternalReloc = useExternalRelocationTo(*ref.target());
+ ulittle16_t *loc16 = reinterpret_cast<ulittle16_t *>(loc);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ switch (static_cast<X86Kind>(ref.kindValue())) {
+ case branch32:
+ if (useExternalReloc)
+ *loc32 = ref.addend() - (fixupAddress + 4);
+ else
+ *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend();
+ break;
+ case branch16:
+ if (useExternalReloc)
+ *loc16 = ref.addend() - (fixupAddress + 2);
+ else
+ *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend();
+ break;
+ case pointer32:
+ case abs32:
+ *loc32 = targetAddress + ref.addend();
+ break;
+ case funcRel32:
+ *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME
+ break;
+ case delta32:
+ *loc32 = targetAddress - fixupAddress + ref.addend();
+ break;
+ case negDelta32:
+ *loc32 = fixupAddress - targetAddress + ref.addend();
+ break;
+ case modeCode:
+ case modeData:
+ case lazyPointer:
+ case lazyImmediateLocation:
+ // do nothing
+ break;
+ case invalid:
+ llvm_unreachable("invalid x86 Reference Kind");
+ break;
+ }
+bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) {
+ // Undefined symbols are referenced via external relocations.
+ if (isa<UndefinedAtom>(&target))
+ return true;
+ if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) {
+ switch (defAtom->merge()) {
+ case DefinedAtom::mergeAsTentative:
+ // Tentative definitions are referenced via external relocations.
+ return true;
+ case DefinedAtom::mergeAsWeak:
+ case DefinedAtom::mergeAsWeakAndAddressUsed:
+ // Global weak-defs are referenced via external relocations.
+ return (defAtom->scope() == DefinedAtom::scopeGlobal);
+ default:
+ break;
+ }
+ }
+ // Everything else is reference via an internal relocation.
+ return false;
+void ArchHandler_x86::appendSectionRelocations(
+ const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom,
+ normalized::Relocations &relocs) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::x86);
+ uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom();
+ bool useExternalReloc = useExternalRelocationTo(*ref.target());
+ switch (static_cast<X86Kind>(ref.kindValue())) {
+ case modeCode:
+ case modeData:
+ break;
+ case branch32:
+ if (useExternalReloc) {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4);
+ } else {
+ if (ref.addend() != 0)
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4);
+ else
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ GENERIC_RELOC_VANILLA | rPcRel | rLength4);
+ }
+ break;
+ case branch16:
+ if (useExternalReloc) {
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2);
+ } else {
+ if (ref.addend() != 0)
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2);
+ else
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0,
+ GENERIC_RELOC_VANILLA | rPcRel | rLength2);
+ }
+ break;
+ case pointer32:
+ case abs32:
+ if (useExternalReloc)
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ GENERIC_RELOC_VANILLA | rExtern | rLength4);
+ else {
+ if (ref.addend() != 0)
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ GENERIC_RELOC_VANILLA | rScattered | rLength4);
+ else
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ }
+ break;
+ case funcRel32:
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ GENERIC_RELOC_SECTDIFF | rScattered | rLength4);
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(),
+ GENERIC_RELOC_PAIR | rScattered | rLength4);
+ break;
+ case delta32:
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ GENERIC_RELOC_SECTDIFF | rScattered | rLength4);
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) +
+ ref.offsetInAtom(),
+ GENERIC_RELOC_PAIR | rScattered | rLength4);
+ break;
+ case negDelta32:
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) +
+ ref.offsetInAtom(),
+ GENERIC_RELOC_SECTDIFF | rScattered | rLength4);
+ appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()),
+ GENERIC_RELOC_PAIR | rScattered | rLength4);
+ break;
+ case lazyPointer:
+ case lazyImmediateLocation:
+ llvm_unreachable("lazy reference kind implies Stubs pass was run");
+ break;
+ case invalid:
+ llvm_unreachable("unknown x86 Reference Kind");
+ break;
+ }
+std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86() {
+ return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86());
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp
new file mode 100644
index 000000000000..fba3d530e484
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp
@@ -0,0 +1,866 @@
+//===- lib/FileFormat/MachO/ArchHandler_x86_64.cpp ------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+namespace lld {
+namespace mach_o {
+using llvm::support::ulittle32_t;
+using llvm::support::ulittle64_t;
+using llvm::support::little32_t;
+using llvm::support::little64_t;
+class ArchHandler_x86_64 : public ArchHandler {
+ ArchHandler_x86_64() = default;
+ ~ArchHandler_x86_64() override = default;
+ const Registry::KindStrings *kindStrings() override { return _sKindStrings; }
+ Reference::KindArch kindArch() override {
+ return Reference::KindArch::x86_64;
+ }
+ /// Used by GOTPass to locate GOT References
+ bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ switch (ref.kindValue()) {
+ case ripRel32GotLoad:
+ canBypassGOT = true;
+ return true;
+ case ripRel32Got:
+ canBypassGOT = false;
+ return true;
+ case imageOffsetGot:
+ canBypassGOT = false;
+ return true;
+ default:
+ return false;
+ }
+ }
+ bool isTLVAccess(const Reference &ref) const override {
+ assert(ref.kindNamespace() == Reference::KindNamespace::mach_o);
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ return ref.kindValue() == ripRel32Tlv;
+ }
+ void updateReferenceToTLV(const Reference *ref) override {
+ assert(ref->kindNamespace() == Reference::KindNamespace::mach_o);
+ assert(ref->kindArch() == Reference::KindArch::x86_64);
+ assert(ref->kindValue() == ripRel32Tlv);
+ const_cast<Reference*>(ref)->setKindValue(ripRel32);
+ }
+ /// Used by GOTPass to update GOT References
+ void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override {
+ assert(ref->kindNamespace() == Reference::KindNamespace::mach_o);
+ assert(ref->kindArch() == Reference::KindArch::x86_64);
+ switch (ref->kindValue()) {
+ case ripRel32Got:
+ assert(targetNowGOT && "target must be GOT");
+ case ripRel32GotLoad:
+ const_cast<Reference *>(ref)
+ ->setKindValue(targetNowGOT ? ripRel32 : ripRel32GotLoadNowLea);
+ break;
+ case imageOffsetGot:
+ const_cast<Reference *>(ref)->setKindValue(imageOffset);
+ break;
+ default:
+ llvm_unreachable("unknown GOT reference kind");
+ }
+ }
+ bool needsCompactUnwind() override {
+ return true;
+ }
+ Reference::KindValue imageOffsetKind() override {
+ return imageOffset;
+ }
+ Reference::KindValue imageOffsetKindIndirect() override {
+ return imageOffsetGot;
+ }
+ Reference::KindValue unwindRefToPersonalityFunctionKind() override {
+ return ripRel32Got;
+ }
+ Reference::KindValue unwindRefToCIEKind() override {
+ return negDelta32;
+ }
+ Reference::KindValue unwindRefToFunctionKind() override{
+ return unwindFDEToFunction;
+ }
+ Reference::KindValue lazyImmediateLocationKind() override {
+ return lazyImmediateLocation;
+ }
+ Reference::KindValue unwindRefToEhFrameKind() override {
+ return unwindInfoToEhFrame;
+ }
+ Reference::KindValue pointerKind() override {
+ return pointer64;
+ }
+ uint32_t dwarfCompactUnwindType() override {
+ return 0x04000000U;
+ }
+ const StubInfo &stubInfo() override { return _sStubInfo; }
+ bool isNonCallBranch(const Reference &) override {
+ return false;
+ }
+ bool isCallSite(const Reference &) override;
+ bool isPointer(const Reference &) override;
+ bool isPairedReloc(const normalized::Relocation &) override;
+ llvm::Error getReferenceInfo(const normalized::Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ llvm::Error
+ getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap, bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) override;
+ bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override {
+ return (atom->contentType() == DefinedAtom::typeCString);
+ }
+ void generateAtomContent(const DefinedAtom &atom, bool relocatable,
+ FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress,
+ uint64_t imageBase,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) override;
+ void appendSectionRelocations(const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom,
+ normalized::Relocations &relocs) override;
+ static const Registry::KindStrings _sKindStrings[];
+ static const StubInfo _sStubInfo;
+ enum X86_64Kind: Reference::KindValue {
+ invalid, /// for error condition
+ // Kinds found in mach-o .o files:
+ branch32, /// ex: call _foo
+ ripRel32, /// ex: movq _foo(%rip), %rax
+ ripRel32Minus1, /// ex: movb $0x12, _foo(%rip)
+ ripRel32Minus2, /// ex: movw $0x1234, _foo(%rip)
+ ripRel32Minus4, /// ex: movl $0x12345678, _foo(%rip)
+ ripRel32Anon, /// ex: movq L1(%rip), %rax
+ ripRel32Minus1Anon, /// ex: movb $0x12, L1(%rip)
+ ripRel32Minus2Anon, /// ex: movw $0x1234, L1(%rip)
+ ripRel32Minus4Anon, /// ex: movw $0x12345678, L1(%rip)
+ ripRel32GotLoad, /// ex: movq _foo@GOTPCREL(%rip), %rax
+ ripRel32Got, /// ex: pushq _foo@GOTPCREL(%rip)
+ ripRel32Tlv, /// ex: movq _foo@TLVP(%rip), %rdi
+ pointer64, /// ex: .quad _foo
+ pointer64Anon, /// ex: .quad L1
+ delta64, /// ex: .quad _foo - .
+ delta32, /// ex: .long _foo - .
+ delta64Anon, /// ex: .quad L1 - .
+ delta32Anon, /// ex: .long L1 - .
+ negDelta64, /// ex: .quad . - _foo
+ negDelta32, /// ex: .long . - _foo
+ // Kinds introduced by Passes:
+ ripRel32GotLoadNowLea, /// Target of GOT load is in linkage unit so
+ /// "movq _foo@GOTPCREL(%rip), %rax" can be changed
+ /// to "leaq _foo(%rip), %rax
+ lazyPointer, /// Location contains a lazy pointer.
+ lazyImmediateLocation, /// Location contains immediate value used in stub.
+ imageOffset, /// Location contains offset of atom in final image
+ imageOffsetGot, /// Location contains offset of GOT entry for atom in
+ /// final image (typically personality function).
+ unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in
+ /// relocatable object (yay for implicit contracts!).
+ unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to
+ /// refer to __eh_frame entry.
+ tlvInitSectionOffset /// Location contains offset tlv init-value atom
+ /// within the __thread_data section.
+ };
+ Reference::KindValue kindFromReloc(const normalized::Relocation &reloc);
+ void applyFixupFinal(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress, uint64_t targetAddress,
+ uint64_t inAtomAddress, uint64_t imageBaseAddress,
+ FindAddressForAtom findSectionAddress);
+ void applyFixupRelocatable(const Reference &ref, uint8_t *location,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress);
+const Registry::KindStrings ArchHandler_x86_64::_sKindStrings[] = {
+ LLD_KIND_STRING_ENTRY(ripRel32Minus1Anon),
+ LLD_KIND_STRING_ENTRY(ripRel32Minus2Anon),
+ LLD_KIND_STRING_ENTRY(ripRel32Minus4Anon),
+ LLD_KIND_STRING_ENTRY(ripRel32GotLoadNowLea),
+ LLD_KIND_STRING_ENTRY(lazyImmediateLocation),
+ LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame),
+ LLD_KIND_STRING_ENTRY(tlvInitSectionOffset),
+const ArchHandler::StubInfo ArchHandler_x86_64::_sStubInfo = {
+ "dyld_stub_binder",
+ // Lazy pointer references
+ { Reference::KindArch::x86_64, pointer64, 0, 0 },
+ { Reference::KindArch::x86_64, lazyPointer, 0, 0 },
+ // GOT pointer to dyld_stub_binder
+ { Reference::KindArch::x86_64, pointer64, 0, 0 },
+ // x86_64 code alignment 2^1
+ 1,
+ // Stub size and code
+ 6,
+ { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer
+ { Reference::KindArch::x86_64, ripRel32, 2, 0 },
+ { false, 0, 0, 0 },
+ // Stub Helper size and code
+ 10,
+ { 0x68, 0x00, 0x00, 0x00, 0x00, // pushq $lazy-info-offset
+ 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper
+ { Reference::KindArch::x86_64, lazyImmediateLocation, 1, 0 },
+ { Reference::KindArch::x86_64, branch32, 6, 0 },
+ // Stub helper image cache content type
+ DefinedAtom::typeNonLazyPointer,
+ // Stub Helper-Common size and code
+ 16,
+ // Stub helper alignment
+ 2,
+ { 0x4C, 0x8D, 0x1D, 0x00, 0x00, 0x00, 0x00, // leaq cache(%rip),%r11
+ 0x41, 0x53, // push %r11
+ 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *binder(%rip)
+ 0x90 }, // nop
+ { Reference::KindArch::x86_64, ripRel32, 3, 0 },
+ { false, 0, 0, 0 },
+ { Reference::KindArch::x86_64, ripRel32, 11, 0 },
+ { false, 0, 0, 0 }
+bool ArchHandler_x86_64::isCallSite(const Reference &ref) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ return (ref.kindValue() == branch32);
+bool ArchHandler_x86_64::isPointer(const Reference &ref) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return false;
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ Reference::KindValue kind = ref.kindValue();
+ return (kind == pointer64 || kind == pointer64Anon);
+bool ArchHandler_x86_64::isPairedReloc(const Relocation &reloc) {
+ return (reloc.type == X86_64_RELOC_SUBTRACTOR);
+ArchHandler_x86_64::kindFromReloc(const Relocation &reloc) {
+ switch(relocPattern(reloc)) {
+ case X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4:
+ return branch32;
+ case X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4:
+ return ripRel32;
+ case X86_64_RELOC_SIGNED | rPcRel | rLength4:
+ return ripRel32Anon;
+ case X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4:
+ return ripRel32Minus1;
+ case X86_64_RELOC_SIGNED_1 | rPcRel | rLength4:
+ return ripRel32Minus1Anon;
+ case X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4:
+ return ripRel32Minus2;
+ case X86_64_RELOC_SIGNED_2 | rPcRel | rLength4:
+ return ripRel32Minus2Anon;
+ case X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4:
+ return ripRel32Minus4;
+ case X86_64_RELOC_SIGNED_4 | rPcRel | rLength4:
+ return ripRel32Minus4Anon;
+ case X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4:
+ return ripRel32GotLoad;
+ case X86_64_RELOC_GOT | rPcRel | rExtern | rLength4:
+ return ripRel32Got;
+ case X86_64_RELOC_TLV | rPcRel | rExtern | rLength4:
+ return ripRel32Tlv;
+ case X86_64_RELOC_UNSIGNED | rExtern | rLength8:
+ return pointer64;
+ case X86_64_RELOC_UNSIGNED | rLength8:
+ return pointer64Anon;
+ default:
+ return invalid;
+ }
+ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) {
+ *kind = kindFromReloc(reloc);
+ if (*kind == invalid)
+ return llvm::make_error<GenericError>("unknown type");
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ uint64_t targetAddress;
+ switch (*kind) {
+ case branch32:
+ case ripRel32:
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = *(const little32_t *)fixupContent;
+ return llvm::Error::success();
+ case ripRel32Minus1:
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = (int32_t)*(const little32_t *)fixupContent + 1;
+ return llvm::Error::success();
+ case ripRel32Minus2:
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = (int32_t)*(const little32_t *)fixupContent + 2;
+ return llvm::Error::success();
+ case ripRel32Minus4:
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = (int32_t)*(const little32_t *)fixupContent + 4;
+ return llvm::Error::success();
+ case ripRel32Anon:
+ targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ case ripRel32Minus1Anon:
+ targetAddress = fixupAddress + 5 + *(const little32_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ case ripRel32Minus2Anon:
+ targetAddress = fixupAddress + 6 + *(const little32_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ case ripRel32Minus4Anon:
+ targetAddress = fixupAddress + 8 + *(const little32_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ case ripRel32GotLoad:
+ case ripRel32Got:
+ case ripRel32Tlv:
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ *addend = *(const little32_t *)fixupContent;
+ return llvm::Error::success();
+ case tlvInitSectionOffset:
+ case pointer64:
+ if (auto ec = atomFromSymbolIndex(reloc.symbol, target))
+ return ec;
+ // If this is the 3rd pointer of a tlv-thunk (i.e. the pointer to the TLV's
+ // initial value) we need to handle it specially.
+ if (inAtom->contentType() == DefinedAtom::typeThunkTLV &&
+ offsetInAtom == 16) {
+ *kind = tlvInitSectionOffset;
+ assert(*addend == 0 && "TLV-init has non-zero addend?");
+ } else
+ *addend = *(const little64_t *)fixupContent;
+ return llvm::Error::success();
+ case pointer64Anon:
+ targetAddress = *(const little64_t *)fixupContent;
+ return atomFromAddress(reloc.symbol, targetAddress, target, addend);
+ default:
+ llvm_unreachable("bad reloc kind");
+ }
+ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1,
+ const normalized::Relocation &reloc2,
+ const DefinedAtom *inAtom,
+ uint32_t offsetInAtom,
+ uint64_t fixupAddress, bool swap,
+ bool scatterable,
+ FindAtomBySectionAndAddress atomFromAddress,
+ FindAtomBySymbolIndex atomFromSymbolIndex,
+ Reference::KindValue *kind,
+ const lld::Atom **target,
+ Reference::Addend *addend) {
+ const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom];
+ uint64_t targetAddress;
+ const lld::Atom *fromTarget;
+ if (auto ec = atomFromSymbolIndex(reloc1.symbol, &fromTarget))
+ return ec;
+ switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) {
+ case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 |
+ X86_64_RELOC_UNSIGNED | rExtern | rLength8): {
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ uint64_t encodedAddend = (int64_t)*(const little64_t *)fixupContent;
+ if (inAtom == fromTarget) {
+ if (inAtom->contentType() == DefinedAtom::typeCFI)
+ *kind = unwindFDEToFunction;
+ else
+ *kind = delta64;
+ *addend = encodedAddend + offsetInAtom;
+ } else if (inAtom == *target) {
+ *kind = negDelta64;
+ *addend = encodedAddend - offsetInAtom;
+ *target = fromTarget;
+ } else
+ return llvm::make_error<GenericError>("Invalid pointer diff");
+ return llvm::Error::success();
+ }
+ case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 |
+ X86_64_RELOC_UNSIGNED | rExtern | rLength4): {
+ if (auto ec = atomFromSymbolIndex(reloc2.symbol, target))
+ return ec;
+ uint32_t encodedAddend = (int32_t)*(const little32_t *)fixupContent;
+ if (inAtom == fromTarget) {
+ *kind = delta32;
+ *addend = encodedAddend + offsetInAtom;
+ } else if (inAtom == *target) {
+ *kind = negDelta32;
+ *addend = encodedAddend - offsetInAtom;
+ *target = fromTarget;
+ } else
+ return llvm::make_error<GenericError>("Invalid pointer diff");
+ return llvm::Error::success();
+ }
+ case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 |
+ X86_64_RELOC_UNSIGNED | rLength8):
+ if (fromTarget != inAtom)
+ return llvm::make_error<GenericError>("pointer diff not in base atom");
+ *kind = delta64Anon;
+ targetAddress = offsetInAtom + (int64_t)*(const little64_t *)fixupContent;
+ return atomFromAddress(reloc2.symbol, targetAddress, target, addend);
+ case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 |
+ X86_64_RELOC_UNSIGNED | rLength4):
+ if (fromTarget != inAtom)
+ return llvm::make_error<GenericError>("pointer diff not in base atom");
+ *kind = delta32Anon;
+ targetAddress = offsetInAtom + (int32_t)*(const little32_t *)fixupContent;
+ return atomFromAddress(reloc2.symbol, targetAddress, target, addend);
+ default:
+ return llvm::make_error<GenericError>("unknown pair");
+ }
+void ArchHandler_x86_64::generateAtomContent(
+ const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress,
+ FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress,
+ llvm::MutableArrayRef<uint8_t> atomContentBuffer) {
+ // Copy raw bytes.
+ std::copy(atom.rawContent().begin(), atom.rawContent().end(),
+ atomContentBuffer.begin());
+ // Apply fix-ups.
+ for (const Reference *ref : atom) {
+ uint32_t offset = ref->offsetInAtom();
+ const Atom *target = ref->target();
+ uint64_t targetAddress = 0;
+ if (isa<DefinedAtom>(target))
+ targetAddress = findAddress(*target);
+ uint64_t atomAddress = findAddress(atom);
+ uint64_t fixupAddress = atomAddress + offset;
+ if (relocatable) {
+ applyFixupRelocatable(*ref, &atomContentBuffer[offset],
+ fixupAddress, targetAddress,
+ atomAddress);
+ } else {
+ applyFixupFinal(*ref, &atomContentBuffer[offset],
+ fixupAddress, targetAddress,
+ atomAddress, imageBaseAddress, findSectionAddress);
+ }
+ }
+void ArchHandler_x86_64::applyFixupFinal(
+ const Reference &ref, uint8_t *loc, uint64_t fixupAddress,
+ uint64_t targetAddress, uint64_t inAtomAddress, uint64_t imageBaseAddress,
+ FindAddressForAtom findSectionAddress) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc);
+ switch (static_cast<X86_64Kind>(ref.kindValue())) {
+ case branch32:
+ case ripRel32:
+ case ripRel32Anon:
+ case ripRel32Got:
+ case ripRel32GotLoad:
+ case ripRel32Tlv:
+ *loc32 = targetAddress - (fixupAddress + 4) + ref.addend();
+ return;
+ case pointer64:
+ case pointer64Anon:
+ *loc64 = targetAddress + ref.addend();
+ return;
+ case tlvInitSectionOffset:
+ *loc64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend();
+ return;
+ case ripRel32Minus1:
+ case ripRel32Minus1Anon:
+ *loc32 = targetAddress - (fixupAddress + 5) + ref.addend();
+ return;
+ case ripRel32Minus2:
+ case ripRel32Minus2Anon:
+ *loc32 = targetAddress - (fixupAddress + 6) + ref.addend();
+ return;
+ case ripRel32Minus4:
+ case ripRel32Minus4Anon:
+ *loc32 = targetAddress - (fixupAddress + 8) + ref.addend();
+ return;
+ case delta32:
+ case delta32Anon:
+ *loc32 = targetAddress - fixupAddress + ref.addend();
+ return;
+ case delta64:
+ case delta64Anon:
+ case unwindFDEToFunction:
+ *loc64 = targetAddress - fixupAddress + ref.addend();
+ return;
+ case ripRel32GotLoadNowLea:
+ // Change MOVQ to LEA
+ assert(loc[-2] == 0x8B);
+ loc[-2] = 0x8D;
+ *loc32 = targetAddress - (fixupAddress + 4) + ref.addend();
+ return;
+ case negDelta64:
+ *loc64 = fixupAddress - targetAddress + ref.addend();
+ return;
+ case negDelta32:
+ *loc32 = fixupAddress - targetAddress + ref.addend();
+ return;
+ case lazyPointer:
+ // Do nothing
+ return;
+ case lazyImmediateLocation:
+ *loc32 = ref.addend();
+ return;
+ case imageOffset:
+ case imageOffsetGot:
+ *loc32 = (targetAddress - imageBaseAddress) + ref.addend();
+ return;
+ case unwindInfoToEhFrame: {
+ uint64_t val = targetAddress - findSectionAddress(*ref.target()) + ref.addend();
+ assert(val < 0xffffffU && "offset in __eh_frame too large");
+ *loc32 = (*loc32 & 0xff000000U) | val;
+ return;
+ }
+ case invalid:
+ // Fall into llvm_unreachable().
+ break;
+ }
+ llvm_unreachable("invalid x86_64 Reference Kind");
+void ArchHandler_x86_64::applyFixupRelocatable(const Reference &ref,
+ uint8_t *loc,
+ uint64_t fixupAddress,
+ uint64_t targetAddress,
+ uint64_t inAtomAddress) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc);
+ ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc);
+ switch (static_cast<X86_64Kind>(ref.kindValue())) {
+ case branch32:
+ case ripRel32:
+ case ripRel32Got:
+ case ripRel32GotLoad:
+ case ripRel32Tlv:
+ *loc32 = ref.addend();
+ return;
+ case ripRel32Anon:
+ *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend();
+ return;
+ case tlvInitSectionOffset:
+ case pointer64:
+ *loc64 = ref.addend();
+ return;
+ case pointer64Anon:
+ *loc64 = targetAddress + ref.addend();
+ return;
+ case ripRel32Minus1:
+ *loc32 = ref.addend() - 1;
+ return;
+ case ripRel32Minus1Anon:
+ *loc32 = (targetAddress - (fixupAddress + 5)) + ref.addend();
+ return;
+ case ripRel32Minus2:
+ *loc32 = ref.addend() - 2;
+ return;
+ case ripRel32Minus2Anon:
+ *loc32 = (targetAddress - (fixupAddress + 6)) + ref.addend();
+ return;
+ case ripRel32Minus4:
+ *loc32 = ref.addend() - 4;
+ return;
+ case ripRel32Minus4Anon:
+ *loc32 = (targetAddress - (fixupAddress + 8)) + ref.addend();
+ return;
+ case delta32:
+ *loc32 = ref.addend() + inAtomAddress - fixupAddress;
+ return;
+ case delta32Anon:
+ // The value we write here should be the delta to the target
+ // after taking in to account the difference from the fixup back to the
+ // last defined label
+ // ie, if we have:
+ // _base: ...
+ // Lfixup: .quad Ltarget - .
+ // ...
+ // Ltarget:
+ //
+ // Then we want to encode the value (Ltarget + addend) - (LFixup - _base)
+ *loc32 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress);
+ return;
+ case delta64:
+ *loc64 = ref.addend() + inAtomAddress - fixupAddress;
+ return;
+ case delta64Anon:
+ // The value we write here should be the delta to the target
+ // after taking in to account the difference from the fixup back to the
+ // last defined label
+ // ie, if we have:
+ // _base: ...
+ // Lfixup: .quad Ltarget - .
+ // ...
+ // Ltarget:
+ //
+ // Then we want to encode the value (Ltarget + addend) - (LFixup - _base)
+ *loc64 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress);
+ return;
+ case negDelta64:
+ *loc64 = ref.addend() + fixupAddress - inAtomAddress;
+ return;
+ case negDelta32:
+ *loc32 = ref.addend() + fixupAddress - inAtomAddress;
+ return;
+ case ripRel32GotLoadNowLea:
+ llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run");
+ return;
+ case lazyPointer:
+ case lazyImmediateLocation:
+ llvm_unreachable("lazy reference kind implies Stubs pass was run");
+ return;
+ case imageOffset:
+ case imageOffsetGot:
+ case unwindInfoToEhFrame:
+ llvm_unreachable("fixup implies __unwind_info");
+ return;
+ case unwindFDEToFunction:
+ // Do nothing for now
+ return;
+ case invalid:
+ // Fall into llvm_unreachable().
+ break;
+ }
+ llvm_unreachable("unknown x86_64 Reference Kind");
+void ArchHandler_x86_64::appendSectionRelocations(
+ const DefinedAtom &atom,
+ uint64_t atomSectionOffset,
+ const Reference &ref,
+ FindSymbolIndexForAtom symbolIndexForAtom,
+ FindSectionIndexForAtom sectionIndexForAtom,
+ FindAddressForAtom addressForAtom,
+ normalized::Relocations &relocs) {
+ if (ref.kindNamespace() != Reference::KindNamespace::mach_o)
+ return;
+ assert(ref.kindArch() == Reference::KindArch::x86_64);
+ uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom();
+ switch (static_cast<X86_64Kind>(ref.kindValue())) {
+ case branch32:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4);
+ return;
+ case ripRel32:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4 );
+ return;
+ case ripRel32Anon:
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED | rPcRel | rLength4 );
+ return;
+ case ripRel32Got:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_GOT | rPcRel | rExtern | rLength4 );
+ return;
+ case ripRel32GotLoad:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4 );
+ return;
+ case ripRel32Tlv:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_TLV | rPcRel | rExtern | rLength4 );
+ return;
+ case tlvInitSectionOffset:
+ case pointer64:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_UNSIGNED | rExtern | rLength8);
+ return;
+ case pointer64Anon:
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_UNSIGNED | rLength8);
+ return;
+ case ripRel32Minus1:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4 );
+ return;
+ case ripRel32Minus1Anon:
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED_1 | rPcRel | rLength4 );
+ return;
+ case ripRel32Minus2:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4 );
+ return;
+ case ripRel32Minus2Anon:
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED_2 | rPcRel | rLength4 );
+ return;
+ case ripRel32Minus4:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4 );
+ return;
+ case ripRel32Minus4Anon:
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SIGNED_4 | rPcRel | rLength4 );
+ return;
+ case delta32:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 );
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_UNSIGNED | rExtern | rLength4 );
+ return;
+ case delta32Anon:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 );
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_UNSIGNED | rLength4 );
+ return;
+ case delta64:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 );
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_UNSIGNED | rExtern | rLength8 );
+ return;
+ case delta64Anon:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 );
+ appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_UNSIGNED | rLength8 );
+ return;
+ case unwindFDEToFunction:
+ case unwindInfoToEhFrame:
+ return;
+ case negDelta32:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 );
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ X86_64_RELOC_UNSIGNED | rExtern | rLength4 );
+ return;
+ case negDelta64:
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,
+ X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 );
+ appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0,
+ X86_64_RELOC_UNSIGNED | rExtern | rLength8 );
+ return;
+ case ripRel32GotLoadNowLea:
+ llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run");
+ return;
+ case lazyPointer:
+ case lazyImmediateLocation:
+ llvm_unreachable("lazy reference kind implies Stubs pass was run");
+ return;
+ case imageOffset:
+ case imageOffsetGot:
+ llvm_unreachable("__unwind_info references should have been resolved");
+ return;
+ case invalid:
+ // Fall into llvm_unreachable().
+ break;
+ }
+ llvm_unreachable("unknown x86_64 Reference Kind");
+std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86_64() {
+ return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86_64());
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h
new file mode 100644
index 000000000000..573efca9f6f9
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h
@@ -0,0 +1,181 @@
+//===- lib/ReaderWriter/MachO/Atoms.h ---------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Atom.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/SharedLibraryAtom.h"
+#include "lld/Core/Simple.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+#include <string>
+namespace lld {
+class File;
+namespace mach_o {
+class MachODefinedAtom : public SimpleDefinedAtom {
+ MachODefinedAtom(const File &f, const StringRef name, Scope scope,
+ ContentType type, Merge merge, bool thumb, bool noDeadStrip,
+ const ArrayRef<uint8_t> content, Alignment align)
+ : SimpleDefinedAtom(f), _name(name), _content(content),
+ _align(align), _contentType(type), _scope(scope), _merge(merge),
+ _thumb(thumb), _noDeadStrip(noDeadStrip) {}
+ // Constructor for zero-fill content
+ MachODefinedAtom(const File &f, const StringRef name, Scope scope,
+ ContentType type, uint64_t size, bool noDeadStrip,
+ Alignment align)
+ : SimpleDefinedAtom(f), _name(name),
+ _content(ArrayRef<uint8_t>(nullptr, size)), _align(align),
+ _contentType(type), _scope(scope), _merge(mergeNo), _thumb(false),
+ _noDeadStrip(noDeadStrip) {}
+ ~MachODefinedAtom() override = default;
+ uint64_t size() const override { return _content.size(); }
+ ContentType contentType() const override { return _contentType; }
+ Alignment alignment() const override { return _align; }
+ StringRef name() const override { return _name; }
+ Scope scope() const override { return _scope; }
+ Merge merge() const override { return _merge; }
+ DeadStripKind deadStrip() const override {
+ if (_contentType == DefinedAtom::typeInitializerPtr)
+ return deadStripNever;
+ if (_contentType == DefinedAtom::typeTerminatorPtr)
+ return deadStripNever;
+ if (_noDeadStrip)
+ return deadStripNever;
+ return deadStripNormal;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ // Note: Zerofill atoms have a content pointer which is null.
+ return _content;
+ }
+ bool isThumb() const { return _thumb; }
+ const StringRef _name;
+ const ArrayRef<uint8_t> _content;
+ const DefinedAtom::Alignment _align;
+ const ContentType _contentType;
+ const Scope _scope;
+ const Merge _merge;
+ const bool _thumb;
+ const bool _noDeadStrip;
+class MachODefinedCustomSectionAtom : public MachODefinedAtom {
+ MachODefinedCustomSectionAtom(const File &f, const StringRef name,
+ Scope scope, ContentType type, Merge merge,
+ bool thumb, bool noDeadStrip,
+ const ArrayRef<uint8_t> content,
+ StringRef sectionName, Alignment align)
+ : MachODefinedAtom(f, name, scope, type, merge, thumb, noDeadStrip,
+ content, align),
+ _sectionName(sectionName) {}
+ ~MachODefinedCustomSectionAtom() override = default;
+ SectionChoice sectionChoice() const override {
+ return DefinedAtom::sectionCustomRequired;
+ }
+ StringRef customSectionName() const override {
+ return _sectionName;
+ }
+ StringRef _sectionName;
+class MachOTentativeDefAtom : public SimpleDefinedAtom {
+ MachOTentativeDefAtom(const File &f, const StringRef name, Scope scope,
+ uint64_t size, DefinedAtom::Alignment align)
+ : SimpleDefinedAtom(f), _name(name), _scope(scope), _size(size),
+ _align(align) {}
+ ~MachOTentativeDefAtom() override = default;
+ uint64_t size() const override { return _size; }
+ Merge merge() const override { return DefinedAtom::mergeAsTentative; }
+ ContentType contentType() const override { return DefinedAtom::typeZeroFill; }
+ Alignment alignment() const override { return _align; }
+ StringRef name() const override { return _name; }
+ Scope scope() const override { return _scope; }
+ ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); }
+ const std::string _name;
+ const Scope _scope;
+ const uint64_t _size;
+ const DefinedAtom::Alignment _align;
+class MachOSharedLibraryAtom : public SharedLibraryAtom {
+ MachOSharedLibraryAtom(const File &file, StringRef name,
+ StringRef dylibInstallName, bool weakDef)
+ : SharedLibraryAtom(), _file(file), _name(name),
+ _dylibInstallName(dylibInstallName) {}
+ ~MachOSharedLibraryAtom() override = default;
+ StringRef loadName() const override { return _dylibInstallName; }
+ bool canBeNullAtRuntime() const override {
+ // FIXME: this may actually be changeable. For now, all symbols are strongly
+ // defined though.
+ return false;
+ }
+ const File &file() const override { return _file; }
+ StringRef name() const override { return _name; }
+ Type type() const override {
+ // Unused in MachO (I think).
+ return Type::Unknown;
+ }
+ uint64_t size() const override {
+ // Unused in MachO (I think)
+ return 0;
+ }
+ const File &_file;
+ StringRef _name;
+ StringRef _dylibInstallName;
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt
new file mode 100644
index 000000000000..37d1de432c0f
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt
@@ -0,0 +1,35 @@
+ ArchHandler.cpp
+ ArchHandler_arm.cpp
+ ArchHandler_arm64.cpp
+ ArchHandler_x86.cpp
+ ArchHandler_x86_64.cpp
+ CompactUnwindPass.cpp
+ GOTPass.cpp
+ LayoutPass.cpp
+ MachOLinkingContext.cpp
+ MachONormalizedFileBinaryReader.cpp
+ MachONormalizedFileBinaryWriter.cpp
+ MachONormalizedFileFromAtoms.cpp
+ MachONormalizedFileToAtoms.cpp
+ MachONormalizedFileYAML.cpp
+ ObjCPass.cpp
+ ShimPass.cpp
+ StubsPass.cpp
+ TLVPass.cpp
+ WriterMachO.cpp
+ DebugInfoDWARF
+ Demangle
+ Object
+ Support
+ lldCommon
+ lldCore
+ lldYAML
+ )
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp
new file mode 100644
index 000000000000..fa0aaa103eeb
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp
@@ -0,0 +1,585 @@
+//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file A pass to convert MachO's __compact_unwind sections into the final
+/// __unwind_info format used during runtime. See
+/// mach-o/compact_unwind_encoding.h for more details on the formats involved.
+#include "ArchHandler.h"
+#include "File.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "MachOPasses.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#define DEBUG_TYPE "macho-compact-unwind"
+namespace lld {
+namespace mach_o {
+namespace {
+struct CompactUnwindEntry {
+ const Atom *rangeStart;
+ const Atom *personalityFunction;
+ const Atom *lsdaLocation;
+ const Atom *ehFrame;
+ uint32_t rangeLength;
+ // There are 3 types of compact unwind entry, distinguished by the encoding
+ // value: 0 indicates a function with no unwind info;
+ // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to
+ // __eh_frame, and that the ehFrame entry will be valid; any other value is a
+ // real compact unwind entry -- personalityFunction will be set and
+ // lsdaLocation may be.
+ uint32_t encoding;
+ CompactUnwindEntry(const DefinedAtom *function)
+ : rangeStart(function), personalityFunction(nullptr),
+ lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()),
+ encoding(0) {}
+ CompactUnwindEntry()
+ : rangeStart(nullptr), personalityFunction(nullptr),
+ lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {}
+struct UnwindInfoPage {
+ ArrayRef<CompactUnwindEntry> entries;
+class UnwindInfoAtom : public SimpleDefinedAtom {
+ UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig,
+ std::vector<const Atom *> &personalities,
+ std::vector<uint32_t> &commonEncodings,
+ std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs)
+ : SimpleDefinedAtom(file), _archHandler(archHandler),
+ _commonEncodingsOffset(7 * sizeof(uint32_t)),
+ _personalityArrayOffset(_commonEncodingsOffset +
+ commonEncodings.size() * sizeof(uint32_t)),
+ _topLevelIndexOffset(_personalityArrayOffset +
+ personalities.size() * sizeof(uint32_t)),
+ _lsdaIndexOffset(_topLevelIndexOffset +
+ 3 * (pages.size() + 1) * sizeof(uint32_t)),
+ _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)),
+ _isBig(isBig) {
+ addHeader(commonEncodings.size(), personalities.size(), pages.size());
+ addCommonEncodings(commonEncodings);
+ addPersonalityFunctions(personalities);
+ addTopLevelIndexes(pages);
+ addLSDAIndexes(pages, numLSDAs);
+ addSecondLevelPages(pages);
+ }
+ ~UnwindInfoAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeProcessedUnwindInfo;
+ }
+ Alignment alignment() const override { return 4; }
+ uint64_t size() const override { return _contents.size(); }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR__;
+ }
+ ArrayRef<uint8_t> rawContent() const override { return _contents; }
+ void addHeader(uint32_t numCommon, uint32_t numPersonalities,
+ uint32_t numPages) {
+ using normalized::write32;
+ uint32_t headerSize = 7 * sizeof(uint32_t);
+ _contents.resize(headerSize);
+ uint8_t *headerEntries = _contents.data();
+ // version
+ write32(headerEntries, 1, _isBig);
+ // commonEncodingsArraySectionOffset
+ write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig);
+ // commonEncodingsArrayCount
+ write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig);
+ // personalityArraySectionOffset
+ write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset,
+ _isBig);
+ // personalityArrayCount
+ write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig);
+ // indexSectionOffset
+ write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig);
+ // indexCount
+ write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig);
+ }
+ /// Add the list of common encodings to the section; this is simply an array
+ /// of uint32_t compact values. Size has already been specified in the header.
+ void addCommonEncodings(std::vector<uint32_t> &commonEncodings) {
+ using normalized::write32;
+ _contents.resize(_commonEncodingsOffset +
+ commonEncodings.size() * sizeof(uint32_t));
+ uint8_t *commonEncodingsArea =
+ reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset);
+ for (uint32_t encoding : commonEncodings) {
+ write32(commonEncodingsArea, encoding, _isBig);
+ commonEncodingsArea += sizeof(uint32_t);
+ }
+ }
+ void addPersonalityFunctions(std::vector<const Atom *> personalities) {
+ _contents.resize(_personalityArrayOffset +
+ personalities.size() * sizeof(uint32_t));
+ for (unsigned i = 0; i < personalities.size(); ++i)
+ addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t),
+ personalities[i]);
+ }
+ void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) {
+ using normalized::write32;
+ uint32_t numIndexes = pages.size() + 1;
+ _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t));
+ uint32_t pageLoc = _firstPageOffset;
+ // The most difficult job here is calculating the LSDAs; everything else
+ // follows fairly naturally, but we can't state where the first
+ uint8_t *indexData = &_contents[_topLevelIndexOffset];
+ uint32_t numLSDAs = 0;
+ for (unsigned i = 0; i < pages.size(); ++i) {
+ // functionOffset
+ addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t),
+ pages[i].entries[0].rangeStart);
+ // secondLevelPagesSectionOffset
+ write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig);
+ write32(indexData + (3 * i + 2) * sizeof(uint32_t),
+ _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig);
+ for (auto &entry : pages[i].entries)
+ if (entry.lsdaLocation)
+ ++numLSDAs;
+ }
+ // Finally, write out the final sentinel index
+ auto &finalEntry = pages[pages.size() - 1].entries.back();
+ addImageReference(_topLevelIndexOffset +
+ 3 * pages.size() * sizeof(uint32_t),
+ finalEntry.rangeStart, finalEntry.rangeLength);
+ // secondLevelPagesSectionOffset => 0
+ write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t),
+ _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig);
+ }
+ void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) {
+ _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t));
+ uint32_t curOffset = _lsdaIndexOffset;
+ for (auto &page : pages) {
+ for (auto &entry : page.entries) {
+ if (!entry.lsdaLocation)
+ continue;
+ addImageReference(curOffset, entry.rangeStart);
+ addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation);
+ curOffset += 2 * sizeof(uint32_t);
+ }
+ }
+ }
+ void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) {
+ for (auto &page : pages) {
+ addRegularSecondLevelPage(page);
+ }
+ }
+ void addRegularSecondLevelPage(const UnwindInfoPage &page) {
+ uint32_t curPageOffset = _contents.size();
+ const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t);
+ uint32_t curPageSize =
+ headerSize + 2 * page.entries.size() * sizeof(uint32_t);
+ _contents.resize(curPageOffset + curPageSize);
+ using normalized::write32;
+ using normalized::write16;
+ // 2 => regular page
+ write32(&_contents[curPageOffset], 2, _isBig);
+ // offset of 1st entry
+ write16(&_contents[curPageOffset + 4], headerSize, _isBig);
+ write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig);
+ uint32_t pagePos = curPageOffset + headerSize;
+ for (auto &entry : page.entries) {
+ addImageReference(pagePos, entry.rangeStart);
+ write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding,
+ _isBig);
+ if ((entry.encoding & 0x0f000000U) ==
+ _archHandler.dwarfCompactUnwindType())
+ addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame);
+ pagePos += 2 * sizeof(uint32_t);
+ }
+ }
+ void addEhFrameReference(uint32_t offset, const Atom *dest,
+ Reference::Addend addend = 0) {
+ addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
+ _archHandler.unwindRefToEhFrameKind(), offset, dest, addend);
+ }
+ void addImageReference(uint32_t offset, const Atom *dest,
+ Reference::Addend addend = 0) {
+ addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
+ _archHandler.imageOffsetKind(), offset, dest, addend);
+ }
+ void addImageReferenceIndirect(uint32_t offset, const Atom *dest) {
+ addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
+ _archHandler.imageOffsetKindIndirect(), offset, dest, 0);
+ }
+ mach_o::ArchHandler &_archHandler;
+ std::vector<uint8_t> _contents;
+ uint32_t _commonEncodingsOffset;
+ uint32_t _personalityArrayOffset;
+ uint32_t _topLevelIndexOffset;
+ uint32_t _lsdaIndexOffset;
+ uint32_t _firstPageOffset;
+ bool _isBig;
+/// Pass for instantiating and optimizing GOT slots.
+class CompactUnwindPass : public Pass {
+ CompactUnwindPass(const MachOLinkingContext &context)
+ : _ctx(context), _archHandler(_ctx.archHandler()),
+ _file(*_ctx.make_file<MachOFile>("<mach-o Compact Unwind Pass>")),
+ _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) {
+ _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ }
+ llvm::Error perform(SimpleFile &mergedFile) override {
+ LLVM_DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n");
+ std::map<const Atom *, CompactUnwindEntry> unwindLocs;
+ std::map<const Atom *, const Atom *> dwarfFrames;
+ std::vector<const Atom *> personalities;
+ uint32_t numLSDAs = 0;
+ // First collect all __compact_unwind and __eh_frame entries, addressable by
+ // the function referred to.
+ collectCompactUnwindEntries(mergedFile, unwindLocs, personalities,
+ numLSDAs);
+ collectDwarfFrameEntries(mergedFile, dwarfFrames);
+ // Skip rest of pass if no unwind info.
+ if (unwindLocs.empty() && dwarfFrames.empty())
+ return llvm::Error::success();
+ // FIXME: if there are more than 4 personality functions then we need to
+ // defer to DWARF info for the ones we don't put in the list. They should
+ // also probably be sorted by frequency.
+ assert(personalities.size() <= 4);
+ // TODO: Find commmon encodings for use by compressed pages.
+ std::vector<uint32_t> commonEncodings;
+ // Now sort the entries by final address and fixup the compact encoding to
+ // its final form (i.e. set personality function bits & create DWARF
+ // references where needed).
+ std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries(
+ mergedFile, unwindLocs, personalities, dwarfFrames);
+ // Remove any unused eh-frame atoms.
+ pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames);
+ // Finally, we can start creating pages based on these entries.
+ LLVM_DEBUG(llvm::dbgs() << " Splitting entries into pages\n");
+ // FIXME: we split the entries into pages naively: lots of 4k pages followed
+ // by a small one. ld64 tried to minimize space and align them to real 4k
+ // boundaries. That might be worth doing, or perhaps we could perform some
+ // minor balancing for expected number of lookups.
+ std::vector<UnwindInfoPage> pages;
+ auto remainingInfos = llvm::makeArrayRef(unwindInfos);
+ do {
+ pages.push_back(UnwindInfoPage());
+ // FIXME: we only create regular pages at the moment. These can hold up to
+ // 1021 entries according to the documentation.
+ unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size());
+ pages.back().entries = remainingInfos.slice(0, entriesInPage);
+ remainingInfos = remainingInfos.slice(entriesInPage);
+ LLVM_DEBUG(llvm::dbgs()
+ << " Page from "
+ << pages.back().entries[0].rangeStart->name() << " to "
+ << pages.back().entries.back().rangeStart->name() << " + "
+ << llvm::format("0x%x",
+ pages.back().entries.back().rangeLength)
+ << " has " << entriesInPage << " entries\n");
+ } while (!remainingInfos.empty());
+ auto *unwind = new (_file.allocator())
+ UnwindInfoAtom(_archHandler, _file, _isBig, personalities,
+ commonEncodings, pages, numLSDAs);
+ mergedFile.addAtom(*unwind);
+ // Finally, remove all __compact_unwind atoms now that we've processed them.
+ mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) {
+ return atom->contentType() == DefinedAtom::typeCompactUnwindInfo;
+ });
+ return llvm::Error::success();
+ }
+ void collectCompactUnwindEntries(
+ const SimpleFile &mergedFile,
+ std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
+ std::vector<const Atom *> &personalities, uint32_t &numLSDAs) {
+ LLVM_DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n");
+ for (const DefinedAtom *atom : mergedFile.defined()) {
+ if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo)
+ continue;
+ auto unwindEntry = extractCompactUnwindEntry(atom);
+ unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry));
+ LLVM_DEBUG(llvm::dbgs() << " Entry for "
+ << unwindEntry.rangeStart->name() << ", encoding="
+ << llvm::format("0x%08x", unwindEntry.encoding));
+ if (unwindEntry.personalityFunction)
+ LLVM_DEBUG(llvm::dbgs()
+ << ", personality="
+ << unwindEntry.personalityFunction->name()
+ << ", lsdaLoc=" << unwindEntry.lsdaLocation->name());
+ LLVM_DEBUG(llvm::dbgs() << '\n');
+ // Count number of LSDAs we see, since we need to know how big the index
+ // will be while laying out the section.
+ if (unwindEntry.lsdaLocation)
+ ++numLSDAs;
+ // Gather the personality functions now, so that they're in deterministic
+ // order (derived from the DefinedAtom order).
+ if (unwindEntry.personalityFunction) {
+ auto pFunc = std::find(personalities.begin(), personalities.end(),
+ unwindEntry.personalityFunction);
+ if (pFunc == personalities.end())
+ personalities.push_back(unwindEntry.personalityFunction);
+ }
+ }
+ }
+ CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) {
+ CompactUnwindEntry entry;
+ for (const Reference *ref : *atom) {
+ switch (ref->offsetInAtom()) {
+ case 0:
+ // FIXME: there could legitimately be functions with multiple encoding
+ // entries. However, nothing produces them at the moment.
+ assert(ref->addend() == 0 && "unexpected offset into function");
+ entry.rangeStart = ref->target();
+ break;
+ case 0x10:
+ assert(ref->addend() == 0 && "unexpected offset into personality fn");
+ entry.personalityFunction = ref->target();
+ break;
+ case 0x18:
+ assert(ref->addend() == 0 && "unexpected offset into LSDA atom");
+ entry.lsdaLocation = ref->target();
+ break;
+ }
+ }
+ if (atom->rawContent().size() < 4 * sizeof(uint32_t))
+ return entry;
+ using normalized::read32;
+ entry.rangeLength =
+ read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig);
+ entry.encoding =
+ read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig);
+ return entry;
+ }
+ void
+ collectDwarfFrameEntries(const SimpleFile &mergedFile,
+ std::map<const Atom *, const Atom *> &dwarfFrames) {
+ for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) {
+ if (ehFrameAtom->contentType() != DefinedAtom::typeCFI)
+ continue;
+ if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom))
+ continue;
+ if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom))
+ dwarfFrames[function] = ehFrameAtom;
+ }
+ }
+ /// Every atom defined in __TEXT,__text needs an entry in the final
+ /// __unwind_info section (in order). These comes from two sources:
+ /// + Input __compact_unwind sections where possible (after adding the
+ /// personality function offset which is only known now).
+ /// + A synthesised reference to __eh_frame if there's no __compact_unwind
+ /// or too many personality functions to be accommodated.
+ std::vector<CompactUnwindEntry> createUnwindInfoEntries(
+ const SimpleFile &mergedFile,
+ const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
+ const std::vector<const Atom *> &personalities,
+ const std::map<const Atom *, const Atom *> &dwarfFrames) {
+ std::vector<CompactUnwindEntry> unwindInfos;
+ LLVM_DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n");
+ // The final order in the __unwind_info section must be derived from the
+ // order of typeCode atoms, since that's how they'll be put into the object
+ // file eventually (yuck!).
+ for (const DefinedAtom *atom : mergedFile.defined()) {
+ if (atom->contentType() != DefinedAtom::typeCode)
+ continue;
+ unwindInfos.push_back(finalizeUnwindInfoEntryForAtom(
+ atom, unwindLocs, personalities, dwarfFrames));
+ LLVM_DEBUG(llvm::dbgs()
+ << " Entry for " << atom->name() << ", final encoding="
+ << llvm::format("0x%08x", unwindInfos.back().encoding)
+ << '\n');
+ }
+ return unwindInfos;
+ }
+ /// Remove unused EH frames.
+ ///
+ /// An EH frame is considered unused if there is a corresponding compact
+ /// unwind atom that doesn't require the EH frame.
+ void pruneUnusedEHFrames(
+ SimpleFile &mergedFile,
+ const std::vector<CompactUnwindEntry> &unwindInfos,
+ const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
+ const std::map<const Atom *, const Atom *> &dwarfFrames) {
+ // Worklist of all 'used' FDEs.
+ std::vector<const DefinedAtom *> usedDwarfWorklist;
+ // We have to check two conditions when building the worklist:
+ // (1) EH frames used by compact unwind entries.
+ for (auto &entry : unwindInfos)
+ if (entry.ehFrame)
+ usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.ehFrame));
+ // (2) EH frames that reference functions with no corresponding compact
+ // unwind info.
+ for (auto &entry : dwarfFrames)
+ if (!unwindLocs.count(entry.first))
+ usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.second));
+ // Add all transitively referenced CFI atoms by processing the worklist.
+ std::set<const Atom *> usedDwarfFrames;
+ while (!usedDwarfWorklist.empty()) {
+ const DefinedAtom *cfiAtom = usedDwarfWorklist.back();
+ usedDwarfWorklist.pop_back();
+ usedDwarfFrames.insert(cfiAtom);
+ for (const auto *ref : *cfiAtom) {
+ const DefinedAtom *cfiTarget = dyn_cast<DefinedAtom>(ref->target());
+ if (cfiTarget->contentType() == DefinedAtom::typeCFI)
+ usedDwarfWorklist.push_back(cfiTarget);
+ }
+ }
+ // Finally, delete all unreferenced CFI atoms.
+ mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) {
+ if ((atom->contentType() == DefinedAtom::typeCFI) &&
+ !usedDwarfFrames.count(atom))
+ return true;
+ return false;
+ });
+ }
+ CompactUnwindEntry finalizeUnwindInfoEntryForAtom(
+ const DefinedAtom *function,
+ const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
+ const std::vector<const Atom *> &personalities,
+ const std::map<const Atom *, const Atom *> &dwarfFrames) {
+ auto unwindLoc = unwindLocs.find(function);
+ CompactUnwindEntry entry;
+ if (unwindLoc == unwindLocs.end()) {
+ // Default entry has correct encoding (0 => no unwind), but we need to
+ // synthesise the function.
+ entry.rangeStart = function;
+ entry.rangeLength = function->size();
+ } else
+ entry = unwindLoc->second;
+ // If there's no __compact_unwind entry, or it explicitly says to use
+ // __eh_frame, we need to try and fill in the correct DWARF atom.
+ if (entry.encoding == _archHandler.dwarfCompactUnwindType() ||
+ entry.encoding == 0) {
+ auto dwarfFrame = dwarfFrames.find(function);
+ if (dwarfFrame != dwarfFrames.end()) {
+ entry.encoding = _archHandler.dwarfCompactUnwindType();
+ entry.ehFrame = dwarfFrame->second;
+ }
+ }
+ auto personality = std::find(personalities.begin(), personalities.end(),
+ entry.personalityFunction);
+ uint32_t personalityIdx = personality == personalities.end()
+ ? 0
+ : personality - personalities.begin() + 1;
+ // FIXME: We should also use DWARF when there isn't enough room for the
+ // personality function in the compact encoding.
+ assert(personalityIdx < 4 && "too many personality functions");
+ entry.encoding |= personalityIdx << 28;
+ if (entry.lsdaLocation)
+ entry.encoding |= 1U << 30;
+ return entry;
+ }
+ const MachOLinkingContext &_ctx;
+ mach_o::ArchHandler &_archHandler;
+ MachOFile &_file;
+ bool _isBig;
+void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ assert(ctx.needsCompactUnwindPass());
+ pm.add(llvm::make_unique<CompactUnwindPass>(ctx));
+} // end namesapce mach_o
+} // end namesapce lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h
new file mode 100644
index 000000000000..28e41bf4263c
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/DebugInfo.h
@@ -0,0 +1,106 @@
+//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/Atom.h"
+#include <vector>
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+namespace lld {
+namespace mach_o {
+class DebugInfo {
+ enum class Kind {
+ Dwarf,
+ Stabs
+ };
+ Kind kind() const { return _kind; }
+ void setAllocator(std::unique_ptr<llvm::BumpPtrAllocator> allocator) {
+ _allocator = std::move(allocator);
+ }
+ DebugInfo(Kind kind) : _kind(kind) {}
+ std::unique_ptr<llvm::BumpPtrAllocator> _allocator;
+ Kind _kind;
+struct TranslationUnitSource {
+ StringRef name;
+ StringRef path;
+class DwarfDebugInfo : public DebugInfo {
+ DwarfDebugInfo(TranslationUnitSource tu)
+ : DebugInfo(Kind::Dwarf), _tu(std::move(tu)) {}
+ static inline bool classof(const DebugInfo *di) {
+ return di->kind() == Kind::Dwarf;
+ }
+ const TranslationUnitSource &translationUnitSource() const { return _tu; }
+ TranslationUnitSource _tu;
+struct Stab {
+ Stab(const Atom* atom, uint8_t type, uint8_t other, uint16_t desc,
+ uint32_t value, StringRef str)
+ : atom(atom), type(type), other(other), desc(desc), value(value),
+ str(str) {}
+ const class Atom* atom;
+ uint8_t type;
+ uint8_t other;
+ uint16_t desc;
+ uint32_t value;
+ StringRef str;
+inline raw_ostream& operator<<(raw_ostream &os, Stab &s) {
+ os << "Stab -- atom: " << llvm::format("%p", s.atom) << ", type: " << (uint32_t)s.type
+ << ", other: " << (uint32_t)s.other << ", desc: " << s.desc << ", value: " << s.value
+ << ", str: '" << s.str << "'";
+ return os;
+class StabsDebugInfo : public DebugInfo {
+ typedef std::vector<Stab> StabsList;
+ StabsDebugInfo(StabsList stabs)
+ : DebugInfo(Kind::Stabs), _stabs(std::move(stabs)) {}
+ static inline bool classof(const DebugInfo *di) {
+ return di->kind() == Kind::Stabs;
+ }
+ const StabsList& stabs() const { return _stabs; }
+ StabsList _stabs;
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h
new file mode 100644
index 000000000000..ab14e6d8c3e7
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h
@@ -0,0 +1,155 @@
+//===- lib/ReaderWriter/MachO/ExecutableAtoms.h ---------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Atoms.h"
+#include "File.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "lld/Core/UndefinedAtom.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+namespace lld {
+namespace mach_o {
+// CEntryFile adds an UndefinedAtom for "_main" so that the Resolving
+// phase will fail if "_main" is undefined.
+class CEntryFile : public SimpleFile {
+ CEntryFile(const MachOLinkingContext &context)
+ : SimpleFile("C entry", kindCEntryObject),
+ _undefMain(*this, context.entrySymbolName()) {
+ this->addAtom(_undefMain);
+ }
+ SimpleUndefinedAtom _undefMain;
+// StubHelperFile adds an UndefinedAtom for "dyld_stub_binder" so that
+// the Resolveing phase will fail if "dyld_stub_binder" is undefined.
+class StubHelperFile : public SimpleFile {
+ StubHelperFile(const MachOLinkingContext &context)
+ : SimpleFile("stub runtime", kindStubHelperObject),
+ _undefBinder(*this, context.binderSymbolName()) {
+ this->addAtom(_undefBinder);
+ }
+ SimpleUndefinedAtom _undefBinder;
+// MachHeaderAliasFile lazily instantiates the magic symbols that mark the start
+// of the mach_header for final linked images.
+class MachHeaderAliasFile : public SimpleFile {
+ MachHeaderAliasFile(const MachOLinkingContext &context)
+ : SimpleFile("mach_header symbols", kindHeaderObject) {
+ StringRef machHeaderSymbolName;
+ DefinedAtom::Scope symbolScope = DefinedAtom::scopeLinkageUnit;
+ StringRef dsoHandleName;
+ switch (context.outputMachOType()) {
+ case llvm::MachO::MH_OBJECT:
+ machHeaderSymbolName = "__mh_object_header";
+ break;
+ case llvm::MachO::MH_EXECUTE:
+ machHeaderSymbolName = "__mh_execute_header";
+ symbolScope = DefinedAtom::scopeGlobal;
+ dsoHandleName = "___dso_handle";
+ break;
+ case llvm::MachO::MH_FVMLIB:
+ llvm_unreachable("no mach_header symbol for file type");
+ case llvm::MachO::MH_CORE:
+ llvm_unreachable("no mach_header symbol for file type");
+ case llvm::MachO::MH_PRELOAD:
+ llvm_unreachable("no mach_header symbol for file type");
+ case llvm::MachO::MH_DYLIB:
+ machHeaderSymbolName = "__mh_dylib_header";
+ dsoHandleName = "___dso_handle";
+ break;
+ case llvm::MachO::MH_DYLINKER:
+ machHeaderSymbolName = "__mh_dylinker_header";
+ dsoHandleName = "___dso_handle";
+ break;
+ case llvm::MachO::MH_BUNDLE:
+ machHeaderSymbolName = "__mh_bundle_header";
+ dsoHandleName = "___dso_handle";
+ break;
+ case llvm::MachO::MH_DYLIB_STUB:
+ llvm_unreachable("no mach_header symbol for file type");
+ case llvm::MachO::MH_DSYM:
+ llvm_unreachable("no mach_header symbol for file type");
+ case llvm::MachO::MH_KEXT_BUNDLE:
+ dsoHandleName = "___dso_handle";
+ break;
+ }
+ if (!machHeaderSymbolName.empty())
+ _definedAtoms.push_back(new (allocator()) MachODefinedAtom(
+ *this, machHeaderSymbolName, symbolScope,
+ DefinedAtom::typeMachHeader, DefinedAtom::mergeNo, false,
+ true /* noDeadStrip */,
+ ArrayRef<uint8_t>(), DefinedAtom::Alignment(4096)));
+ if (!dsoHandleName.empty())
+ _definedAtoms.push_back(new (allocator()) MachODefinedAtom(
+ *this, dsoHandleName, DefinedAtom::scopeLinkageUnit,
+ DefinedAtom::typeDSOHandle, DefinedAtom::mergeNo, false,
+ true /* noDeadStrip */,
+ ArrayRef<uint8_t>(), DefinedAtom::Alignment(1)));
+ }
+ const AtomRange<DefinedAtom> defined() const override {
+ return _definedAtoms;
+ }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ return _noUndefinedAtoms;
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ return _noSharedLibraryAtoms;
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ return _noAbsoluteAtoms;
+ }
+ void clearAtoms() override {
+ _definedAtoms.clear();
+ _noUndefinedAtoms.clear();
+ _noSharedLibraryAtoms.clear();
+ _noAbsoluteAtoms.clear();
+ }
+ mutable AtomVector<DefinedAtom> _definedAtoms;
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h
new file mode 100644
index 000000000000..2bdd6342b477
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/File.h
@@ -0,0 +1,400 @@
+//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Atoms.h"
+#include "DebugInfo.h"
+#include "MachONormalizedFile.h"
+#include "lld/Core/SharedLibraryFile.h"
+#include "lld/Core/Simple.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Format.h"
+#include <unordered_map>
+namespace lld {
+namespace mach_o {
+using lld::mach_o::normalized::Section;
+class MachOFile : public SimpleFile {
+ /// Real file constructor - for on-disk files.
+ MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
+ : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject),
+ _mb(std::move(mb)), _ctx(ctx) {}
+ /// Dummy file constructor - for virtual files.
+ MachOFile(StringRef path)
+ : SimpleFile(path, File::kindMachObject) {}
+ void addDefinedAtom(StringRef name, Atom::Scope scope,
+ DefinedAtom::ContentType type, DefinedAtom::Merge merge,
+ uint64_t sectionOffset, uint64_t contentSize, bool thumb,
+ bool noDeadStrip, bool copyRefs,
+ const Section *inSection) {
+ assert(sectionOffset+contentSize <= inSection->content.size());
+ ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset,
+ contentSize);
+ if (copyRefs) {
+ // Make a copy of the atom's name and content that is owned by this file.
+ name = name.copy(allocator());
+ content = content.copy(allocator());
+ }
+ DefinedAtom::Alignment align(
+ inSection->alignment,
+ sectionOffset % inSection->alignment);
+ auto *atom =
+ new (allocator()) MachODefinedAtom(*this, name, scope, type, merge,
+ thumb, noDeadStrip, content, align);
+ addAtomForSection(inSection, atom, sectionOffset);
+ }
+ void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope,
+ DefinedAtom::ContentType type, DefinedAtom::Merge merge,
+ bool thumb, bool noDeadStrip, uint64_t sectionOffset,
+ uint64_t contentSize, StringRef sectionName,
+ bool copyRefs, const Section *inSection) {
+ assert(sectionOffset+contentSize <= inSection->content.size());
+ ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset,
+ contentSize);
+ if (copyRefs) {
+ // Make a copy of the atom's name and content that is owned by this file.
+ name = name.copy(allocator());
+ content = content.copy(allocator());
+ sectionName = sectionName.copy(allocator());
+ }
+ DefinedAtom::Alignment align(
+ inSection->alignment,
+ sectionOffset % inSection->alignment);
+ auto *atom =
+ new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type,
+ merge, thumb,
+ noDeadStrip, content,
+ sectionName, align);
+ addAtomForSection(inSection, atom, sectionOffset);
+ }
+ void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope,
+ uint64_t sectionOffset, uint64_t size,
+ bool noDeadStrip, bool copyRefs,
+ const Section *inSection) {
+ if (copyRefs) {
+ // Make a copy of the atom's name and content that is owned by this file.
+ name = name.copy(allocator());
+ }
+ DefinedAtom::Alignment align(
+ inSection->alignment,
+ sectionOffset % inSection->alignment);
+ DefinedAtom::ContentType type = DefinedAtom::typeUnknown;
+ switch (inSection->type) {
+ case llvm::MachO::S_ZEROFILL:
+ type = DefinedAtom::typeZeroFill;
+ break;
+ case llvm::MachO::S_THREAD_LOCAL_ZEROFILL:
+ type = DefinedAtom::typeTLVInitialZeroFill;
+ break;
+ default:
+ llvm_unreachable("Unrecognized zero-fill section");
+ }
+ auto *atom =
+ new (allocator()) MachODefinedAtom(*this, name, scope, type, size,
+ noDeadStrip, align);
+ addAtomForSection(inSection, atom, sectionOffset);
+ }
+ void addUndefinedAtom(StringRef name, bool copyRefs) {
+ if (copyRefs) {
+ // Make a copy of the atom's name that is owned by this file.
+ name = name.copy(allocator());
+ }
+ auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name);
+ addAtom(*atom);
+ _undefAtoms[name] = atom;
+ }
+ void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size,
+ DefinedAtom::Alignment align, bool copyRefs) {
+ if (copyRefs) {
+ // Make a copy of the atom's name that is owned by this file.
+ name = name.copy(allocator());
+ }
+ auto *atom =
+ new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align);
+ addAtom(*atom);
+ _undefAtoms[name] = atom;
+ }
+ /// Search this file for an the atom from 'section' that covers
+ /// 'offsetInSect'. Returns nullptr is no atom found.
+ MachODefinedAtom *findAtomCoveringAddress(const Section &section,
+ uint64_t offsetInSect,
+ uint32_t *foundOffsetAtom=nullptr) {
+ const auto &pos = _sectionAtoms.find(&section);
+ if (pos == _sectionAtoms.end())
+ return nullptr;
+ const auto &vec = pos->second;
+ assert(offsetInSect < section.content.size());
+ // Vector of atoms for section are already sorted, so do binary search.
+ const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect,
+ [offsetInSect](const SectionOffsetAndAtom &ao,
+ uint64_t targetAddr) -> bool {
+ // Each atom has a start offset of its slice of the
+ // section's content. This compare function must return true
+ // iff the atom's range is before the offset being searched for.
+ uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size();
+ return (atomsEndOffset <= offsetInSect);
+ });
+ if (atomPos == vec.end())
+ return nullptr;
+ if (foundOffsetAtom)
+ *foundOffsetAtom = offsetInSect - atomPos->offset;
+ return atomPos->atom;
+ }
+ /// Searches this file for an UndefinedAtom named 'name'. Returns
+ /// nullptr is no such atom found.
+ const lld::Atom *findUndefAtom(StringRef name) {
+ auto pos = _undefAtoms.find(name);
+ if (pos == _undefAtoms.end())
+ return nullptr;
+ return pos->second;
+ }
+ typedef std::function<void (MachODefinedAtom* atom)> DefinedAtomVisitor;
+ void eachDefinedAtom(DefinedAtomVisitor vistor) {
+ for (auto &sectAndAtoms : _sectionAtoms) {
+ for (auto &offAndAtom : sectAndAtoms.second) {
+ vistor(offAndAtom.atom);
+ }
+ }
+ }
+ typedef std::function<void(MachODefinedAtom *atom, uint64_t offset)>
+ SectionAtomVisitor;
+ void eachAtomInSection(const Section &section, SectionAtomVisitor visitor) {
+ auto pos = _sectionAtoms.find(&section);
+ if (pos == _sectionAtoms.end())
+ return;
+ auto vec = pos->second;
+ for (auto &offAndAtom : vec)
+ visitor(offAndAtom.atom, offAndAtom.offset);
+ }
+ MachOLinkingContext::Arch arch() const { return _arch; }
+ void setArch(MachOLinkingContext::Arch arch) { _arch = arch; }
+ MachOLinkingContext::OS OS() const { return _os; }
+ void setOS(MachOLinkingContext::OS os) { _os = os; }
+ MachOLinkingContext::ObjCConstraint objcConstraint() const {
+ return _objcConstraint;
+ }
+ void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) {
+ _objcConstraint = v;
+ }
+ uint32_t minVersion() const { return _minVersion; }
+ void setMinVersion(uint32_t v) { _minVersion = v; }
+ LoadCommandType minVersionLoadCommandKind() const {
+ return _minVersionLoadCommandKind;
+ }
+ void setMinVersionLoadCommandKind(LoadCommandType v) {
+ _minVersionLoadCommandKind = v;
+ }
+ uint32_t swiftVersion() const { return _swiftVersion; }
+ void setSwiftVersion(uint32_t v) { _swiftVersion = v; }
+ bool subsectionsViaSymbols() const {
+ return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
+ }
+ void setFlags(normalized::FileFlags v) { _flags = v; }
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const File *F) {
+ return F->kind() == File::kindMachObject;
+ }
+ void setDebugInfo(std::unique_ptr<DebugInfo> debugInfo) {
+ _debugInfo = std::move(debugInfo);
+ }
+ DebugInfo* debugInfo() const { return _debugInfo.get(); }
+ std::unique_ptr<DebugInfo> takeDebugInfo() { return std::move(_debugInfo); }
+ std::error_code doParse() override {
+ // Convert binary file to normalized mach-o.
+ auto normFile = normalized::readBinary(_mb, _ctx->arch());
+ if (auto ec = normFile.takeError())
+ return llvm::errorToErrorCode(std::move(ec));
+ // Convert normalized mach-o to atoms.
+ if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false))
+ return llvm::errorToErrorCode(std::move(ec));
+ return std::error_code();
+ }
+ struct SectionOffsetAndAtom { uint64_t offset; MachODefinedAtom *atom; };
+ void addAtomForSection(const Section *inSection, MachODefinedAtom* atom,
+ uint64_t sectionOffset) {
+ SectionOffsetAndAtom offAndAtom;
+ offAndAtom.offset = sectionOffset;
+ offAndAtom.atom = atom;
+ _sectionAtoms[inSection].push_back(offAndAtom);
+ addAtom(*atom);
+ }
+ typedef llvm::DenseMap<const normalized::Section *,
+ std::vector<SectionOffsetAndAtom>> SectionToAtoms;
+ typedef llvm::StringMap<const lld::Atom *> NameToAtom;
+ std::unique_ptr<MemoryBuffer> _mb;
+ MachOLinkingContext *_ctx;
+ SectionToAtoms _sectionAtoms;
+ NameToAtom _undefAtoms;
+ MachOLinkingContext::Arch _arch = MachOLinkingContext::arch_unknown;
+ MachOLinkingContext::OS _os = MachOLinkingContext::OS::unknown;
+ uint32_t _minVersion = 0;
+ LoadCommandType _minVersionLoadCommandKind = (LoadCommandType)0;
+ MachOLinkingContext::ObjCConstraint _objcConstraint =
+ MachOLinkingContext::objc_unknown;
+ uint32_t _swiftVersion = 0;
+ normalized::FileFlags _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
+ std::unique_ptr<DebugInfo> _debugInfo;
+class MachODylibFile : public SharedLibraryFile {
+ MachODylibFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
+ : SharedLibraryFile(mb->getBufferIdentifier()),
+ _mb(std::move(mb)), _ctx(ctx) {}
+ MachODylibFile(StringRef path) : SharedLibraryFile(path) {}
+ OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override {
+ // Pass down _installName so that if this requested symbol
+ // is re-exported through this dylib, the SharedLibraryAtom's loadName()
+ // is this dylib installName and not the implementation dylib's.
+ // NOTE: isData is not needed for dylibs (it matters for static libs).
+ return exports(name, _installName);
+ }
+ /// Adds symbol name that this dylib exports. The corresponding
+ /// SharedLibraryAtom is created lazily (since most symbols are not used).
+ void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) {
+ if (copyRefs) {
+ name = name.copy(allocator());
+ }
+ AtomAndFlags info(weakDef);
+ _nameToAtom[name] = info;
+ }
+ void addReExportedDylib(StringRef dylibPath) {
+ _reExportedDylibs.emplace_back(dylibPath);
+ }
+ StringRef installName() const { return _installName; }
+ uint32_t currentVersion() { return _currentVersion; }
+ uint32_t compatVersion() { return _compatVersion; }
+ void setInstallName(StringRef name) { _installName = name; }
+ void setCompatVersion(uint32_t version) { _compatVersion = version; }
+ void setCurrentVersion(uint32_t version) { _currentVersion = version; }
+ typedef std::function<MachODylibFile *(StringRef)> FindDylib;
+ void loadReExportedDylibs(FindDylib find) {
+ for (ReExportedDylib &entry : _reExportedDylibs) {
+ entry.file = find(entry.path);
+ }
+ }
+ StringRef getDSOName() const override { return _installName; }
+ std::error_code doParse() override {
+ // Convert binary file to normalized mach-o.
+ auto normFile = normalized::readBinary(_mb, _ctx->arch());
+ if (auto ec = normFile.takeError())
+ return llvm::errorToErrorCode(std::move(ec));
+ // Convert normalized mach-o to atoms.
+ if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false))
+ return llvm::errorToErrorCode(std::move(ec));
+ return std::error_code();
+ }
+ OwningAtomPtr<SharedLibraryAtom> exports(StringRef name,
+ StringRef installName) const {
+ // First, check if requested symbol is directly implemented by this dylib.
+ auto entry = _nameToAtom.find(name);
+ if (entry != _nameToAtom.end()) {
+ // FIXME: Make this map a set and only used in assert builds.
+ // Note, its safe to assert here as the resolver is the only client of
+ // this API and it only requests exports for undefined symbols.
+ // If we return from here we are no longer undefined so we should never
+ // get here again.
+ assert(!entry->second.atom && "Duplicate shared library export");
+ bool weakDef = entry->second.weakDef;
+ auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name,
+ installName,
+ weakDef);
+ entry->second.atom = atom;
+ return atom;
+ }
+ // Next, check if symbol is implemented in some re-exported dylib.
+ for (const ReExportedDylib &dylib : _reExportedDylibs) {
+ assert(dylib.file);
+ auto atom = dylib.file->exports(name, installName);
+ if (atom.get())
+ return atom;
+ }
+ // Symbol not exported or re-exported by this dylib.
+ return nullptr;
+ }
+ struct ReExportedDylib {
+ ReExportedDylib(StringRef p) : path(p), file(nullptr) { }
+ StringRef path;
+ MachODylibFile *file;
+ };
+ struct AtomAndFlags {
+ AtomAndFlags() : atom(nullptr), weakDef(false) { }
+ AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { }
+ const SharedLibraryAtom *atom;
+ bool weakDef;
+ };
+ std::unique_ptr<MemoryBuffer> _mb;
+ MachOLinkingContext *_ctx;
+ StringRef _installName;
+ uint32_t _currentVersion;
+ uint32_t _compatVersion;
+ std::vector<ReExportedDylib> _reExportedDylibs;
+ mutable std::unordered_map<StringRef, AtomAndFlags> _nameToAtom;
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h
new file mode 100644
index 000000000000..7ccd4f19f834
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h
@@ -0,0 +1,63 @@
+//===- lib/ReaderWriter/MachO/FlatNamespaceFile.h -------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "Atoms.h"
+#include "lld/Core/SharedLibraryFile.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/Support/Debug.h"
+namespace lld {
+namespace mach_o {
+// A FlateNamespaceFile instance may be added as a resolution source of last
+// resort, depending on how -flat_namespace and -undefined are set.
+class FlatNamespaceFile : public SharedLibraryFile {
+ FlatNamespaceFile(const MachOLinkingContext &context)
+ : SharedLibraryFile("flat namespace") { }
+ OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override {
+ return new (allocator()) MachOSharedLibraryAtom(*this, name, getDSOName(),
+ false);
+ }
+ StringRef getDSOName() const override { return "flat-namespace"; }
+ const AtomRange<DefinedAtom> defined() const override {
+ return _noDefinedAtoms;
+ }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ return _noUndefinedAtoms;
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ return _noSharedLibraryAtoms;
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ return _noAbsoluteAtoms;
+ }
+ void clearAtoms() override {
+ _noDefinedAtoms.clear();
+ _noUndefinedAtoms.clear();
+ _noSharedLibraryAtoms.clear();
+ _noAbsoluteAtoms.clear();
+ }
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp
new file mode 100644
index 000000000000..49e6f88d4aa4
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp
@@ -0,0 +1,184 @@
+//===- lib/ReaderWriter/MachO/GOTPass.cpp -----------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// This linker pass transforms all GOT kind references to real references.
+/// That is, in assembly you can write something like:
+/// movq foo@GOTPCREL(%rip), %rax
+/// which means you want to load a pointer to "foo" out of the GOT (global
+/// Offsets Table). In the object file, the Atom containing this instruction
+/// has a Reference whose target is an Atom named "foo" and the Reference
+/// kind is a GOT load. The linker needs to instantiate a pointer sized
+/// GOT entry. This is done be creating a GOT Atom to represent that pointer
+/// sized data in this pass, and altering the Atom graph so the Reference now
+/// points to the GOT Atom entry (corresponding to "foo") and changing the
+/// Reference Kind to reflect it is now pointing to a GOT entry (rather
+/// then needing a GOT entry).
+/// There is one optimization the linker can do here. If the target of the GOT
+/// is in the same linkage unit and does not need to be interposable, and
+/// the GOT use is just a load (not some other operation), this pass can
+/// transform that load into an LEA (add). This optimizes away one memory load
+/// which at runtime that could stall the pipeline. This optimization only
+/// works for architectures in which a (GOT) load instruction can be change to
+/// an LEA instruction that is the same size. The method isGOTAccess() should
+/// only return true for "canBypassGOT" if this optimization is supported.
+#include "ArchHandler.h"
+#include "File.h"
+#include "MachOPasses.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+namespace lld {
+namespace mach_o {
+// GOT Entry Atom created by the GOT pass.
+class GOTEntryAtom : public SimpleDefinedAtom {
+ GOTEntryAtom(const File &file, bool is64, StringRef name)
+ : SimpleDefinedAtom(file), _is64(is64), _name(name) { }
+ ~GOTEntryAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeGOT;
+ }
+ Alignment alignment() const override {
+ return _is64 ? 8 : 4;
+ }
+ uint64_t size() const override {
+ return _is64 ? 8 : 4;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permRW_;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ static const uint8_t zeros[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ return llvm::makeArrayRef(zeros, size());
+ }
+ StringRef slotName() const {
+ return _name;
+ }
+ const bool _is64;
+ StringRef _name;
+/// Pass for instantiating and optimizing GOT slots.
+class GOTPass : public Pass {
+ GOTPass(const MachOLinkingContext &context)
+ : _ctx(context), _archHandler(_ctx.archHandler()),
+ _file(*_ctx.make_file<MachOFile>("<mach-o GOT Pass>")) {
+ _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ }
+ llvm::Error perform(SimpleFile &mergedFile) override {
+ // Scan all references in all atoms.
+ for (const DefinedAtom *atom : mergedFile.defined()) {
+ for (const Reference *ref : *atom) {
+ // Look at instructions accessing the GOT.
+ bool canBypassGOT;
+ if (!_archHandler.isGOTAccess(*ref, canBypassGOT))
+ continue;
+ const Atom *target = ref->target();
+ assert(target != nullptr);
+ if (!shouldReplaceTargetWithGOTAtom(target, canBypassGOT)) {
+ // Update reference kind to reflect that target is a direct accesss.
+ _archHandler.updateReferenceToGOT(ref, false);
+ } else {
+ // Replace the target with a reference to a GOT entry.
+ const DefinedAtom *gotEntry = makeGOTEntry(target);
+ const_cast<Reference *>(ref)->setTarget(gotEntry);
+ // Update reference kind to reflect that target is now a GOT entry.
+ _archHandler.updateReferenceToGOT(ref, true);
+ }
+ }
+ }
+ // Sort and add all created GOT Atoms to master file
+ std::vector<const GOTEntryAtom *> entries;
+ entries.reserve(_targetToGOT.size());
+ for (auto &it : _targetToGOT)
+ entries.push_back(it.second);
+ std::sort(entries.begin(), entries.end(),
+ [](const GOTEntryAtom *left, const GOTEntryAtom *right) {
+ return (left->slotName().compare(right->slotName()) < 0);
+ });
+ for (const GOTEntryAtom *slot : entries)
+ mergedFile.addAtom(*slot);
+ return llvm::Error::success();
+ }
+ bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) {
+ // Accesses to shared library symbols must go through GOT.
+ if (isa<SharedLibraryAtom>(target))
+ return true;
+ // Accesses to interposable symbols in same linkage unit must also go
+ // through GOT.
+ const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target);
+ if (defTarget != nullptr &&
+ defTarget->interposable() != DefinedAtom::interposeNo) {
+ assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit);
+ return true;
+ }
+ // Target does not require indirection. So, if instruction allows GOT to be
+ // by-passed, do that optimization and don't create GOT entry.
+ return !canBypassGOT;
+ }
+ const DefinedAtom *makeGOTEntry(const Atom *target) {
+ auto pos = _targetToGOT.find(target);
+ if (pos == _targetToGOT.end()) {
+ auto *gotEntry = new (_file.allocator())
+ GOTEntryAtom(_file, _ctx.is64Bit(), target->name());
+ _targetToGOT[target] = gotEntry;
+ const ArchHandler::ReferenceInfo &nlInfo = _archHandler.stubInfo().
+ nonLazyPointerReferenceToBinder;
+ gotEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch,
+ nlInfo.kind, 0, target, 0);
+ return gotEntry;
+ }
+ return pos->second;
+ }
+ const MachOLinkingContext &_ctx;
+ mach_o::ArchHandler &_archHandler;
+ MachOFile &_file;
+ llvm::DenseMap<const Atom*, const GOTEntryAtom*> _targetToGOT;
+void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ assert(ctx.needsGOTPass());
+ pm.add(llvm::make_unique<GOTPass>(ctx));
+} // end namesapce mach_o
+} // end namesapce lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
new file mode 100644
index 000000000000..9058e4f562e2
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
@@ -0,0 +1,490 @@
+//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "LayoutPass.h"
+#include "lld/Core/Instrumentation.h"
+#include "lld/Core/PassManager.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
+#include <algorithm>
+#include <set>
+#include <utility>
+using namespace lld;
+#define DEBUG_TYPE "LayoutPass"
+namespace lld {
+namespace mach_o {
+static bool compareAtoms(const LayoutPass::SortKey &,
+ const LayoutPass::SortKey &,
+ LayoutPass::SortOverride customSorter);
+#ifndef NDEBUG
+// Return "reason (leftval, rightval)"
+static std::string formatReason(StringRef reason, int leftVal, int rightVal) {
+ return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")")
+ .str();
+// Less-than relationship of two atoms must be transitive, which is, if a < b
+// and b < c, a < c must be true. This function checks the transitivity by
+// checking the sort results.
+static void checkTransitivity(std::vector<LayoutPass::SortKey> &vec,
+ LayoutPass::SortOverride customSorter) {
+ for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) {
+ for (auto j = i + 1; j != e; ++j) {
+ assert(compareAtoms(*i, *j, customSorter));
+ assert(!compareAtoms(*j, *i, customSorter));
+ }
+ }
+// Helper functions to check follow-on graph.
+typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT;
+static std::string atomToDebugString(const Atom *atom) {
+ const DefinedAtom *definedAtom = dyn_cast<DefinedAtom>(atom);
+ std::string str;
+ llvm::raw_string_ostream s(str);
+ if (definedAtom->name().empty())
+ s << "<anonymous " << definedAtom << ">";
+ else
+ s << definedAtom->name();
+ s << " in ";
+ if (definedAtom->customSectionName().empty())
+ s << "<anonymous>";
+ else
+ s << definedAtom->customSectionName();
+ s.flush();
+ return str;
+static void showCycleDetectedError(const Registry &registry,
+ AtomToAtomT &followOnNexts,
+ const DefinedAtom *atom) {
+ const DefinedAtom *start = atom;
+ llvm::dbgs() << "There's a cycle in a follow-on chain!\n";
+ do {
+ llvm::dbgs() << " " << atomToDebugString(atom) << "\n";
+ for (const Reference *ref : *atom) {
+ StringRef kindValStr;
+ if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(),
+ ref->kindValue(), kindValStr)) {
+ kindValStr = "<unknown>";
+ }
+ llvm::dbgs() << " " << kindValStr
+ << ": " << atomToDebugString(ref->target()) << "\n";
+ }
+ atom = followOnNexts[atom];
+ } while (atom != start);
+ llvm::report_fatal_error("Cycle detected");
+/// Exit if there's a cycle in a followon chain reachable from the
+/// given root atom. Uses the tortoise and hare algorithm to detect a
+/// cycle.
+static void checkNoCycleInFollowonChain(const Registry &registry,
+ AtomToAtomT &followOnNexts,
+ const DefinedAtom *root) {
+ const DefinedAtom *tortoise = root;
+ const DefinedAtom *hare = followOnNexts[root];
+ while (true) {
+ if (!tortoise || !hare)
+ return;
+ if (tortoise == hare)
+ showCycleDetectedError(registry, followOnNexts, tortoise);
+ tortoise = followOnNexts[tortoise];
+ hare = followOnNexts[followOnNexts[hare]];
+ }
+static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots,
+ const DefinedAtom *atom) {
+ if (!atom) return;
+ auto i = followOnRoots.find(atom);
+ if (i == followOnRoots.end()) {
+ llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) +
+ "> has no follow-on root!"))
+ .str()
+ .c_str());
+ }
+ const DefinedAtom *ap = i->second;
+ while (true) {
+ const DefinedAtom *next = followOnRoots[ap];
+ if (!next) {
+ llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) +
+ "> is not reachable from its root!"))
+ .str()
+ .c_str());
+ }
+ if (next == ap)
+ return;
+ ap = next;
+ }
+static void printDefinedAtoms(const File::AtomRange<DefinedAtom> &atomRange) {
+ for (const DefinedAtom *atom : atomRange) {
+ llvm::dbgs() << " file=" << atom->file().path()
+ << ", name=" << atom->name()
+ << ", size=" << atom->size()
+ << ", type=" << atom->contentType()
+ << ", ordinal=" << atom->ordinal()
+ << "\n";
+ }
+/// Verify that the followon chain is sane. Should not be called in
+/// release binary.
+void LayoutPass::checkFollowonChain(const File::AtomRange<DefinedAtom> &range) {
+ ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain");
+ // Verify that there's no cycle in follow-on chain.
+ std::set<const DefinedAtom *> roots;
+ for (const auto &ai : _followOnRoots)
+ roots.insert(ai.second);
+ for (const DefinedAtom *root : roots)
+ checkNoCycleInFollowonChain(_registry, _followOnNexts, root);
+ // Verify that all the atoms in followOnNexts have references to
+ // their roots.
+ for (const auto &ai : _followOnNexts) {
+ checkReachabilityFromRoot(_followOnRoots, ai.first);
+ checkReachabilityFromRoot(_followOnRoots, ai.second);
+ }
+#endif // #ifndef NDEBUG
+/// The function compares atoms by sorting atoms in the following order
+/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup)
+/// b) Sorts atoms by their permissions
+/// c) Sorts atoms by their content
+/// d) Sorts atoms by custom sorter
+/// e) Sorts atoms on how they appear using File Ordinality
+/// f) Sorts atoms on how they appear within the File
+static bool compareAtomsSub(const LayoutPass::SortKey &lc,
+ const LayoutPass::SortKey &rc,
+ LayoutPass::SortOverride customSorter,
+ std::string &reason) {
+ const DefinedAtom *left = lc._atom.get();
+ const DefinedAtom *right = rc._atom.get();
+ if (left == right) {
+ reason = "same";
+ return false;
+ }
+ // Find the root of the chain if it is a part of a follow-on chain.
+ const DefinedAtom *leftRoot = lc._root;
+ const DefinedAtom *rightRoot = rc._root;
+ // Sort atoms by their ordinal overrides only if they fall in the same
+ // chain.
+ if (leftRoot == rightRoot) {
+ LLVM_DEBUG(reason = formatReason("override", lc._override, rc._override));
+ return lc._override < rc._override;
+ }
+ // Sort same permissions together.
+ DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions();
+ DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions();
+ if (leftPerms != rightPerms) {
+ reason = formatReason("contentPerms", (int)leftPerms, (int)rightPerms));
+ return leftPerms < rightPerms;
+ }
+ // Sort same content types together.
+ DefinedAtom::ContentType leftType = leftRoot->contentType();
+ DefinedAtom::ContentType rightType = rightRoot->contentType();
+ if (leftType != rightType) {
+ LLVM_DEBUG(reason =
+ formatReason("contentType", (int)leftType, (int)rightType));
+ return leftType < rightType;
+ }
+ // Use custom sorter if supplied.
+ if (customSorter) {
+ bool leftBeforeRight;
+ if (customSorter(leftRoot, rightRoot, leftBeforeRight))
+ return leftBeforeRight;
+ }
+ // Sort by .o order.
+ const File *leftFile = &leftRoot->file();
+ const File *rightFile = &rightRoot->file();
+ if (leftFile != rightFile) {
+ LLVM_DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(),
+ (int)rightFile->ordinal()));
+ return leftFile->ordinal() < rightFile->ordinal();
+ }
+ // Sort by atom order with .o file.
+ uint64_t leftOrdinal = leftRoot->ordinal();
+ uint64_t rightOrdinal = rightRoot->ordinal();
+ if (leftOrdinal != rightOrdinal) {
+ LLVM_DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(),
+ (int)rightRoot->ordinal()));
+ return leftOrdinal < rightOrdinal;
+ }
+ llvm::errs() << "Unordered: <" << left->name() << "> <"
+ << right->name() << ">\n";
+ llvm_unreachable("Atoms with Same Ordinal!");
+static bool compareAtoms(const LayoutPass::SortKey &lc,
+ const LayoutPass::SortKey &rc,
+ LayoutPass::SortOverride customSorter) {
+ std::string reason;
+ bool result = compareAtomsSub(lc, rc, customSorter, reason);
+ StringRef comp = result ? "<" : ">=";
+ llvm::dbgs() << "Layout: '" << lc._atom.get()->name()
+ << "' " << comp << " '"
+ << rc._atom.get()->name() << "' (" << reason << ")\n";
+ });
+ return result;
+LayoutPass::LayoutPass(const Registry &registry, SortOverride sorter)
+ : _registry(registry), _customSorter(std::move(sorter)) {}
+// Returns the atom immediately followed by the given atom in the followon
+// chain.
+const DefinedAtom *LayoutPass::findAtomFollowedBy(
+ const DefinedAtom *targetAtom) {
+ // Start from the beginning of the chain and follow the chain until
+ // we find the targetChain.
+ const DefinedAtom *atom = _followOnRoots[targetAtom];
+ while (true) {
+ const DefinedAtom *prevAtom = atom;
+ AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom);
+ // The target atom must be in the chain of its root.
+ assert(targetFollowOnAtomsIter != _followOnNexts.end());
+ atom = targetFollowOnAtomsIter->second;
+ if (atom == targetAtom)
+ return prevAtom;
+ }
+// Check if all the atoms followed by the given target atom are of size zero.
+// When this method is called, an atom being added is not of size zero and
+// will be added to the head of the followon chain. All the atoms between the
+// atom and the targetAtom (specified by layout-after) need to be of size zero
+// in this case. Otherwise the desired layout is impossible.
+bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) {
+ const DefinedAtom *atom = _followOnRoots[targetAtom];
+ while (true) {
+ if (atom == targetAtom)
+ return true;
+ if (atom->size() != 0)
+ // TODO: print warning that an impossible layout is being desired by the
+ // user.
+ return false;
+ AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom);
+ // The target atom must be in the chain of its root.
+ assert(targetFollowOnAtomsIter != _followOnNexts.end());
+ atom = targetFollowOnAtomsIter->second;
+ }
+// Set the root of all atoms in targetAtom's chain to the given root.
+void LayoutPass::setChainRoot(const DefinedAtom *targetAtom,
+ const DefinedAtom *root) {
+ // Walk through the followon chain and override each node's root.
+ while (true) {
+ _followOnRoots[targetAtom] = root;
+ AtomToAtomT::iterator targetFollowOnAtomsIter =
+ _followOnNexts.find(targetAtom);
+ if (targetFollowOnAtomsIter == _followOnNexts.end())
+ return;
+ targetAtom = targetFollowOnAtomsIter->second;
+ }
+/// This pass builds the followon tables described by two DenseMaps
+/// followOnRoots and followonNexts.
+/// The followOnRoots map contains a mapping of a DefinedAtom to its root
+/// The followOnNexts map contains a mapping of what DefinedAtom follows the
+/// current Atom
+/// The algorithm follows a very simple approach
+/// a) If the atom is first seen, then make that as the root atom
+/// b) The targetAtom which this Atom contains, has the root thats set to the
+/// root of the current atom
+/// c) If the targetAtom is part of a different tree and the root of the
+/// targetAtom is itself, Chain all the atoms that are contained in the tree
+/// to the current Tree
+/// d) If the targetAtom is part of a different chain and the root of the
+/// targetAtom until the targetAtom has all atoms of size 0, then chain the
+/// targetAtoms and its tree to the current chain
+void LayoutPass::buildFollowOnTable(const File::AtomRange<DefinedAtom> &range) {
+ ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable");
+ // Set the initial size of the followon and the followonNext hash to the
+ // number of atoms that we have.
+ _followOnRoots.reserve(range.size());
+ _followOnNexts.reserve(range.size());
+ for (const DefinedAtom *ai : range) {
+ for (const Reference *r : *ai) {
+ if (r->kindNamespace() != lld::Reference::KindNamespace::all ||
+ r->kindValue() != lld::Reference::kindLayoutAfter)
+ continue;
+ const DefinedAtom *targetAtom = dyn_cast<DefinedAtom>(r->target());
+ _followOnNexts[ai] = targetAtom;
+ // If we find a followon for the first time, let's make that atom as the
+ // root atom.
+ if (_followOnRoots.count(ai) == 0)
+ _followOnRoots[ai] = ai;
+ auto iter = _followOnRoots.find(targetAtom);
+ if (iter == _followOnRoots.end()) {
+ // If the targetAtom is not a root of any chain, let's make the root of
+ // the targetAtom to the root of the current chain.
+ // The expression m[i] = m[j] where m is a DenseMap and i != j is not
+ // safe. m[j] returns a reference, which would be invalidated when a
+ // rehashing occurs. If rehashing occurs to make room for m[i], m[j]
+ // becomes invalid, and that invalid reference would be used as the RHS
+ // value of the expression.
+ // Copy the value to workaround.
+ const DefinedAtom *tmp = _followOnRoots[ai];
+ _followOnRoots[targetAtom] = tmp;
+ continue;
+ }
+ if (iter->second == targetAtom) {
+ // If the targetAtom is the root of a chain, the chain becomes part of
+ // the current chain. Rewrite the subchain's root to the current
+ // chain's root.
+ setChainRoot(targetAtom, _followOnRoots[ai]);
+ continue;
+ }
+ // The targetAtom is already a part of a chain. If the current atom is
+ // of size zero, we can insert it in the middle of the chain just
+ // before the target atom, while not breaking other atom's followon
+ // relationships. If it's not, we can only insert the current atom at
+ // the beginning of the chain. All the atoms followed by the target
+ // atom must be of size zero in that case to satisfy the followon
+ // relationships.
+ size_t currentAtomSize = ai->size();
+ if (currentAtomSize == 0) {
+ const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom);
+ _followOnNexts[targetPrevAtom] = ai;
+ const DefinedAtom *tmp = _followOnRoots[targetPrevAtom];
+ _followOnRoots[ai] = tmp;
+ continue;
+ }
+ if (!checkAllPrevAtomsZeroSize(targetAtom))
+ break;
+ _followOnNexts[ai] = _followOnRoots[targetAtom];
+ setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]);
+ }
+ }
+/// Build an ordinal override map by traversing the followon chain, and
+/// assigning ordinals to each atom, if the atoms have their ordinals
+/// already assigned skip the atom and move to the next. This is the
+/// main map thats used to sort the atoms while comparing two atoms together
+LayoutPass::buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range) {
+ ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap");
+ uint64_t index = 0;
+ for (const DefinedAtom *ai : range) {
+ const DefinedAtom *atom = ai;
+ if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end())
+ continue;
+ AtomToAtomT::iterator start = _followOnRoots.find(atom);
+ if (start == _followOnRoots.end())
+ continue;
+ for (const DefinedAtom *nextAtom = start->second; nextAtom;
+ nextAtom = _followOnNexts[nextAtom]) {
+ AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom);
+ if (pos == _ordinalOverrideMap.end())
+ _ordinalOverrideMap[nextAtom] = index++;
+ }
+ }
+LayoutPass::decorate(File::AtomRange<DefinedAtom> &atomRange) const {
+ std::vector<SortKey> ret;
+ for (OwningAtomPtr<DefinedAtom> &atom : atomRange.owning_ptrs()) {
+ auto ri = _followOnRoots.find(atom.get());
+ auto oi = _ordinalOverrideMap.find(atom.get());
+ const auto *root = (ri == _followOnRoots.end()) ? atom.get() : ri->second;
+ uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second;
+ ret.push_back(SortKey(std::move(atom), root, override));
+ }
+ return ret;
+void LayoutPass::undecorate(File::AtomRange<DefinedAtom> &atomRange,
+ std::vector<SortKey> &keys) const {
+ size_t i = 0;
+ for (SortKey &k : keys)
+ atomRange[i++] = std::move(k._atom);
+/// Perform the actual pass
+llvm::Error LayoutPass::perform(SimpleFile &mergedFile) {
+ LLVM_DEBUG(llvm::dbgs() << "******** Laying out atoms:\n");
+ // sort the atoms
+ ScopedTask task(getDefaultDomain(), "LayoutPass");
+ File::AtomRange<DefinedAtom> atomRange = mergedFile.defined();
+ // Build follow on tables
+ buildFollowOnTable(atomRange);
+ // Check the structure of followon graph if running in debug mode.
+ LLVM_DEBUG(checkFollowonChain(atomRange));
+ // Build override maps
+ buildOrdinalOverrideMap(atomRange);
+ llvm::dbgs() << "unsorted atoms:\n";
+ printDefinedAtoms(atomRange);
+ });
+ std::vector<LayoutPass::SortKey> vec = decorate(atomRange);
+ sort(llvm::parallel::par, vec.begin(), vec.end(),
+ [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
+ return compareAtoms(l, r, _customSorter);
+ });
+ LLVM_DEBUG(checkTransitivity(vec, _customSorter));
+ undecorate(atomRange, vec);
+ llvm::dbgs() << "sorted atoms:\n";
+ printDefinedAtoms(atomRange);
+ });
+ LLVM_DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n");
+ return llvm::Error::success();
+void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ pm.add(llvm::make_unique<LayoutPass>(
+ ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right,
+ bool & leftBeforeRight) ->bool {
+ return ctx.customAtomOrderer(left, right, leftBeforeRight);
+ }));
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h
new file mode 100644
index 000000000000..c18777eded0a
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h
@@ -0,0 +1,119 @@
+//===------ lib/ReaderWriter/MachO/LayoutPass.h - Handles Layout of atoms -===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/File.h"
+#include "lld/Core/Pass.h"
+#include "lld/Core/Reader.h"
+#include "lld/Core/Simple.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+#include <string>
+#include <vector>
+namespace lld {
+class DefinedAtom;
+class SimpleFile;
+namespace mach_o {
+/// This linker pass does the layout of the atoms. The pass is done after the
+/// order their .o files were found on the command line, then by order of the
+/// atoms (address) in the .o file. But some atoms have a preferred location
+/// in their section (such as pinned to the start or end of the section), so
+/// the sort must take that into account too.
+class LayoutPass : public Pass {
+ struct SortKey {
+ SortKey(OwningAtomPtr<DefinedAtom> &&atom,
+ const DefinedAtom *root, uint64_t override)
+ : _atom(std::move(atom)), _root(root), _override(override) {}
+ OwningAtomPtr<DefinedAtom> _atom;
+ const DefinedAtom *_root;
+ uint64_t _override;
+ // Note, these are only here to appease MSVC bots which didn't like
+ // the same methods being implemented/deleted in OwningAtomPtr.
+ SortKey(SortKey &&key) : _atom(std::move(key._atom)), _root(key._root),
+ _override(key._override) {
+ key._root = nullptr;
+ }
+ SortKey &operator=(SortKey &&key) {
+ _atom = std::move(key._atom);
+ _root = key._root;
+ key._root = nullptr;
+ _override = key._override;
+ return *this;
+ }
+ private:
+ SortKey(const SortKey &) = delete;
+ void operator=(const SortKey&) = delete;
+ };
+ typedef std::function<bool (const DefinedAtom *left, const DefinedAtom *right,
+ bool &leftBeforeRight)> SortOverride;
+ LayoutPass(const Registry &registry, SortOverride sorter);
+ /// Sorts atoms in mergedFile by content type then by command line order.
+ llvm::Error perform(SimpleFile &mergedFile) override;
+ ~LayoutPass() override = default;
+ // Build the followOn atoms chain as specified by the kindLayoutAfter
+ // reference type
+ void buildFollowOnTable(const File::AtomRange<DefinedAtom> &range);
+ // Build a map of Atoms to ordinals for sorting the atoms
+ void buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range);
+ const Registry &_registry;
+ SortOverride _customSorter;
+ typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT;
+ typedef llvm::DenseMap<const DefinedAtom *, uint64_t> AtomToOrdinalT;
+ // A map to be used to sort atoms. It represents the order of atoms in the
+ // result; if Atom X is mapped to atom Y in this map, X will be located
+ // immediately before Y in the output file. Y might be mapped to another
+ // atom, constructing a follow-on chain. An atom cannot be mapped to more
+ // than one atom unless all but one atom are of size zero.
+ AtomToAtomT _followOnNexts;
+ // A map to be used to sort atoms. It's a map from an atom to its root of
+ // follow-on chain. A root atom is mapped to itself. If an atom is not in
+ // _followOnNexts, the atom is not in this map, and vice versa.
+ AtomToAtomT _followOnRoots;
+ AtomToOrdinalT _ordinalOverrideMap;
+ // Helper methods for buildFollowOnTable().
+ const DefinedAtom *findAtomFollowedBy(const DefinedAtom *targetAtom);
+ bool checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom);
+ void setChainRoot(const DefinedAtom *targetAtom, const DefinedAtom *root);
+ std::vector<SortKey> decorate(File::AtomRange<DefinedAtom> &atomRange) const;
+ void undecorate(File::AtomRange<DefinedAtom> &atomRange,
+ std::vector<SortKey> &keys) const;
+ // Check if the follow-on graph is a correct structure. For debugging only.
+ void checkFollowonChain(const File::AtomRange<DefinedAtom> &range);
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp
new file mode 100644
index 000000000000..61583963ddd7
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp
@@ -0,0 +1,1101 @@
+//===- lib/ReaderWriter/MachO/MachOLinkingContext.cpp ---------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Common/ErrorHandler.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "ArchHandler.h"
+#include "File.h"
+#include "FlatNamespaceFile.h"
+#include "MachONormalizedFile.h"
+#include "MachOPasses.h"
+#include "SectCreateFile.h"
+#include "lld/Common/Driver.h"
+#include "lld/Core/ArchiveLibraryFile.h"
+#include "lld/Core/PassManager.h"
+#include "lld/Core/Reader.h"
+#include "lld/Core/Writer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include <algorithm>
+using lld::mach_o::ArchHandler;
+using lld::mach_o::MachOFile;
+using lld::mach_o::MachODylibFile;
+using namespace llvm::MachO;
+namespace lld {
+bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) {
+ result = 0;
+ if (str.empty())
+ return false;
+ SmallVector<StringRef, 3> parts;
+ llvm::SplitString(str, parts, ".");
+ unsigned long long num;
+ if (llvm::getAsUnsignedInteger(parts[0], 10, num))
+ return true;
+ if (num > 65535)
+ return true;
+ result = num << 16;
+ if (parts.size() > 1) {
+ if (llvm::getAsUnsignedInteger(parts[1], 10, num))
+ return true;
+ if (num > 255)
+ return true;
+ result |= (num << 8);
+ }
+ if (parts.size() > 2) {
+ if (llvm::getAsUnsignedInteger(parts[2], 10, num))
+ return true;
+ if (num > 255)
+ return true;
+ result |= num;
+ }
+ return false;
+bool MachOLinkingContext::parsePackedVersion(StringRef str, uint64_t &result) {
+ result = 0;
+ if (str.empty())
+ return false;
+ SmallVector<StringRef, 5> parts;
+ llvm::SplitString(str, parts, ".");
+ unsigned long long num;
+ if (llvm::getAsUnsignedInteger(parts[0], 10, num))
+ return true;
+ if (num > 0xFFFFFF)
+ return true;
+ result = num << 40;
+ unsigned Shift = 30;
+ for (StringRef str : llvm::makeArrayRef(parts).slice(1)) {
+ if (llvm::getAsUnsignedInteger(str, 10, num))
+ return true;
+ if (num > 0x3FF)
+ return true;
+ result |= (num << Shift);
+ Shift -= 10;
+ }
+ return false;
+MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = {
+ { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL },
+ { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL },
+ { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL },
+ { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 },
+ { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 },
+ { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S },
+ { "arm64", arch_arm64, true, CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL },
+ { "", arch_unknown,false, 0, 0 }
+MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) {
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype))
+ return info->arch;
+ }
+ return arch_unknown;
+MachOLinkingContext::archFromName(StringRef archName) {
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->archName.equals(archName))
+ return info->arch;
+ }
+ return arch_unknown;
+StringRef MachOLinkingContext::nameFromArch(Arch arch) {
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->arch == arch)
+ return info->archName;
+ }
+ return "<unknown>";
+uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) {
+ assert(arch != arch_unknown);
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->arch == arch)
+ return info->cputype;
+ }
+ llvm_unreachable("Unknown arch type");
+uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) {
+ assert(arch != arch_unknown);
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->arch == arch)
+ return info->cpusubtype;
+ }
+ llvm_unreachable("Unknown arch type");
+bool MachOLinkingContext::isThinObjectFile(StringRef path, Arch &arch) {
+ return mach_o::normalized::isThinObjectFile(path, arch);
+bool MachOLinkingContext::sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset,
+ uint32_t &size) {
+ return mach_o::normalized::sliceFromFatFile(mb, _arch, offset, size);
+MachOLinkingContext::MachOLinkingContext() {}
+MachOLinkingContext::~MachOLinkingContext() {
+ // Atoms are allocated on BumpPtrAllocator's on File's.
+ // As we transfer atoms from one file to another, we need to clear all of the
+ // atoms before we remove any of the BumpPtrAllocator's.
+ auto &nodes = getNodes();
+ for (unsigned i = 0, e = nodes.size(); i != e; ++i) {
+ FileNode *node = dyn_cast<FileNode>(nodes[i].get());
+ if (!node)
+ continue;
+ File *file = node->getFile();
+ file->clearAtoms();
+ }
+void MachOLinkingContext::configure(HeaderFileType type, Arch arch, OS os,
+ uint32_t minOSVersion,
+ bool exportDynamicSymbols) {
+ _outputMachOType = type;
+ _arch = arch;
+ _os = os;
+ _osMinVersion = minOSVersion;
+ // If min OS not specified on command line, use reasonable defaults.
+ // Note that we only do sensible defaults when emitting something other than
+ // object and preload.
+ if (_outputMachOType != llvm::MachO::MH_OBJECT &&
+ _outputMachOType != llvm::MachO::MH_PRELOAD) {
+ if (minOSVersion == 0) {
+ switch (_arch) {
+ case arch_x86_64:
+ case arch_x86:
+ parsePackedVersion("10.8", _osMinVersion);
+ _os = MachOLinkingContext::OS::macOSX;
+ break;
+ case arch_armv6:
+ case arch_armv7:
+ case arch_armv7s:
+ case arch_arm64:
+ parsePackedVersion("7.0", _osMinVersion);
+ _os = MachOLinkingContext::OS::iOS;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ switch (_outputMachOType) {
+ case llvm::MachO::MH_EXECUTE:
+ // If targeting newer OS, use _main
+ if (minOS("10.8", "6.0")) {
+ _entrySymbolName = "_main";
+ } else {
+ // If targeting older OS, use start (in crt1.o)
+ _entrySymbolName = "start";
+ }
+ // __PAGEZERO defaults to 4GB on 64-bit (except for PP64 which lld does not
+ // support) and 4KB on 32-bit.
+ if (is64Bit(_arch)) {
+ _pageZeroSize = 0x100000000;
+ } else {
+ _pageZeroSize = 0x1000;
+ }
+ // Initial base address is __PAGEZERO size.
+ _baseAddress = _pageZeroSize;
+ // Make PIE by default when targetting newer OSs.
+ switch (os) {
+ case OS::macOSX:
+ if (minOSVersion >= 0x000A0700) // MacOSX 10.7
+ _pie = true;
+ break;
+ case OS::iOS:
+ if (minOSVersion >= 0x00040300) // iOS 4.3
+ _pie = true;
+ break;
+ case OS::iOS_simulator:
+ _pie = true;
+ break;
+ case OS::unknown:
+ break;
+ }
+ setGlobalsAreDeadStripRoots(exportDynamicSymbols);
+ break;
+ case llvm::MachO::MH_DYLIB:
+ setGlobalsAreDeadStripRoots(exportDynamicSymbols);
+ break;
+ case llvm::MachO::MH_BUNDLE:
+ break;
+ case llvm::MachO::MH_OBJECT:
+ _printRemainingUndefines = false;
+ _allowRemainingUndefines = true;
+ break;
+ default:
+ break;
+ }
+ // Set default segment page sizes based on arch.
+ if (arch == arch_arm64)
+ _pageSize = 4*4096;
+uint32_t MachOLinkingContext::getCPUType() const {
+ return cpuTypeFromArch(_arch);
+uint32_t MachOLinkingContext::getCPUSubType() const {
+ return cpuSubtypeFromArch(_arch);
+bool MachOLinkingContext::is64Bit(Arch arch) {
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->arch == arch) {
+ return (info->cputype & CPU_ARCH_ABI64);
+ }
+ }
+ // unknown archs are not 64-bit.
+ return false;
+bool MachOLinkingContext::isHostEndian(Arch arch) {
+ assert(arch != arch_unknown);
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->arch == arch) {
+ return (info->littleEndian == llvm::sys::IsLittleEndianHost);
+ }
+ }
+ llvm_unreachable("Unknown arch type");
+bool MachOLinkingContext::isBigEndian(Arch arch) {
+ assert(arch != arch_unknown);
+ for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) {
+ if (info->arch == arch) {
+ return ! info->littleEndian;
+ }
+ }
+ llvm_unreachable("Unknown arch type");
+bool MachOLinkingContext::is64Bit() const {
+ return is64Bit(_arch);
+bool MachOLinkingContext::outputTypeHasEntry() const {
+ switch (_outputMachOType) {
+ case MH_EXECUTE:
+ case MH_PRELOAD:
+ return true;
+ default:
+ return false;
+ }
+bool MachOLinkingContext::needsStubsPass() const {
+ switch (_outputMachOType) {
+ case MH_EXECUTE:
+ return !_outputMachOTypeStatic;
+ case MH_DYLIB:
+ case MH_BUNDLE:
+ return true;
+ default:
+ return false;
+ }
+bool MachOLinkingContext::needsGOTPass() const {
+ // GOT pass not used in -r mode.
+ if (_outputMachOType == MH_OBJECT)
+ return false;
+ // Only some arches use GOT pass.
+ switch (_arch) {
+ case arch_x86_64:
+ case arch_arm64:
+ return true;
+ default:
+ return false;
+ }
+bool MachOLinkingContext::needsCompactUnwindPass() const {
+ switch (_outputMachOType) {
+ case MH_EXECUTE:
+ case MH_DYLIB:
+ case MH_BUNDLE:
+ return archHandler().needsCompactUnwind();
+ default:
+ return false;
+ }
+bool MachOLinkingContext::needsObjCPass() const {
+ // ObjC pass is only needed if any of the inputs were ObjC.
+ return _objcConstraint != objc_unknown;
+bool MachOLinkingContext::needsShimPass() const {
+ // Shim pass only used in final executables.
+ if (_outputMachOType == MH_OBJECT)
+ return false;
+ // Only 32-bit arm arches use Shim pass.
+ switch (_arch) {
+ case arch_armv6:
+ case arch_armv7:
+ case arch_armv7s:
+ return true;
+ default:
+ return false;
+ }
+bool MachOLinkingContext::needsTLVPass() const {
+ switch (_outputMachOType) {
+ case MH_BUNDLE:
+ case MH_EXECUTE:
+ case MH_DYLIB:
+ return true;
+ default:
+ return false;
+ }
+StringRef MachOLinkingContext::binderSymbolName() const {
+ return archHandler().stubInfo().binderSymbolName;
+bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const {
+ uint32_t parsedVersion;
+ switch (_os) {
+ case OS::macOSX:
+ if (parsePackedVersion(mac, parsedVersion))
+ return false;
+ return _osMinVersion >= parsedVersion;
+ case OS::iOS:
+ case OS::iOS_simulator:
+ if (parsePackedVersion(iOS, parsedVersion))
+ return false;
+ return _osMinVersion >= parsedVersion;
+ case OS::unknown:
+ // If we don't know the target, then assume that we don't meet the min OS.
+ // This matches the ld64 behaviour
+ return false;
+ }
+ llvm_unreachable("invalid OS enum");
+bool MachOLinkingContext::addEntryPointLoadCommand() const {
+ if ((_outputMachOType == MH_EXECUTE) && !_outputMachOTypeStatic) {
+ return minOS("10.8", "6.0");
+ }
+ return false;
+bool MachOLinkingContext::addUnixThreadLoadCommand() const {
+ switch (_outputMachOType) {
+ case MH_EXECUTE:
+ if (_outputMachOTypeStatic)
+ return true;
+ else
+ return !minOS("10.8", "6.0");
+ break;
+ case MH_PRELOAD:
+ return true;
+ default:
+ return false;
+ }
+bool MachOLinkingContext::pathExists(StringRef path) const {
+ if (!_testingFileUsage)
+ return llvm::sys::fs::exists(path.str());
+ // Otherwise, we're in test mode: only files explicitly provided on the
+ // command-line exist.
+ std::string key = path.str();
+ std::replace(key.begin(), key.end(), '\\', '/');
+ return _existingPaths.find(key) != _existingPaths.end();
+bool MachOLinkingContext::fileExists(StringRef path) const {
+ bool found = pathExists(path);
+ // Log search misses.
+ if (!found)
+ addInputFileNotFound(path);
+ // When testing, file is never opened, so logging is done here.
+ if (_testingFileUsage && found)
+ addInputFileDependency(path);
+ return found;
+void MachOLinkingContext::setSysLibRoots(const StringRefVector &paths) {
+ _syslibRoots = paths;
+void MachOLinkingContext::addRpath(StringRef rpath) {
+ _rpaths.push_back(rpath);
+void MachOLinkingContext::addModifiedSearchDir(StringRef libPath,
+ bool isSystemPath) {
+ bool addedModifiedPath = false;
+ // -syslibroot only applies to absolute paths.
+ if (libPath.startswith("/")) {
+ for (auto syslibRoot : _syslibRoots) {
+ SmallString<256> path(syslibRoot);
+ llvm::sys::path::append(path, libPath);
+ if (pathExists(path)) {
+ _searchDirs.push_back(path.str().copy(_allocator));
+ addedModifiedPath = true;
+ }
+ }
+ }
+ if (addedModifiedPath)
+ return;
+ // Finally, if only one -syslibroot is given, system paths which aren't in it
+ // get suppressed.
+ if (_syslibRoots.size() != 1 || !isSystemPath) {
+ if (pathExists(libPath)) {
+ _searchDirs.push_back(libPath);
+ }
+ }
+void MachOLinkingContext::addFrameworkSearchDir(StringRef fwPath,
+ bool isSystemPath) {
+ bool pathAdded = false;
+ // -syslibroot only used with to absolute framework search paths.
+ if (fwPath.startswith("/")) {
+ for (auto syslibRoot : _syslibRoots) {
+ SmallString<256> path(syslibRoot);
+ llvm::sys::path::append(path, fwPath);
+ if (pathExists(path)) {
+ _frameworkDirs.push_back(path.str().copy(_allocator));
+ pathAdded = true;
+ }
+ }
+ }
+ // If fwPath found in any -syslibroot, then done.
+ if (pathAdded)
+ return;
+ // If only one -syslibroot, system paths not in that SDK are suppressed.
+ if (isSystemPath && (_syslibRoots.size() == 1))
+ return;
+ // Only use raw fwPath if that directory exists.
+ if (pathExists(fwPath))
+ _frameworkDirs.push_back(fwPath);
+MachOLinkingContext::searchDirForLibrary(StringRef path,
+ StringRef libName) const {
+ SmallString<256> fullPath;
+ if (libName.endswith(".o")) {
+ // A request ending in .o is special: just search for the file directly.
+ fullPath.assign(path);
+ llvm::sys::path::append(fullPath, libName);
+ if (fileExists(fullPath))
+ return fullPath.str().copy(_allocator);
+ return llvm::None;
+ }
+ // Search for dynamic library
+ fullPath.assign(path);
+ llvm::sys::path::append(fullPath, Twine("lib") + libName + ".dylib");
+ if (fileExists(fullPath))
+ return fullPath.str().copy(_allocator);
+ // If not, try for a static library
+ fullPath.assign(path);
+ llvm::sys::path::append(fullPath, Twine("lib") + libName + ".a");
+ if (fileExists(fullPath))
+ return fullPath.str().copy(_allocator);
+ return llvm::None;
+MachOLinkingContext::searchLibrary(StringRef libName) const {
+ SmallString<256> path;
+ for (StringRef dir : searchDirs()) {
+ llvm::Optional<StringRef> searchDir = searchDirForLibrary(dir, libName);
+ if (searchDir)
+ return searchDir;
+ }
+ return llvm::None;
+MachOLinkingContext::findPathForFramework(StringRef fwName) const{
+ SmallString<256> fullPath;
+ for (StringRef dir : frameworkDirs()) {
+ fullPath.assign(dir);
+ llvm::sys::path::append(fullPath, Twine(fwName) + ".framework", fwName);
+ if (fileExists(fullPath))
+ return fullPath.str().copy(_allocator);
+ }
+ return llvm::None;
+bool MachOLinkingContext::validateImpl() {
+ // TODO: if -arch not specified, look at arch of first .o file.
+ if (_currentVersion && _outputMachOType != MH_DYLIB) {
+ error("-current_version can only be used with dylibs");
+ return false;
+ }
+ if (_compatibilityVersion && _outputMachOType != MH_DYLIB) {
+ error("-compatibility_version can only be used with dylibs");
+ return false;
+ }
+ if (_deadStrippableDylib && _outputMachOType != MH_DYLIB) {
+ error("-mark_dead_strippable_dylib can only be used with dylibs");
+ return false;
+ }
+ if (!_bundleLoader.empty() && outputMachOType() != MH_BUNDLE) {
+ error("-bundle_loader can only be used with Mach-O bundles");
+ return false;
+ }
+ // If -exported_symbols_list used, all exported symbols must be defined.
+ if (_exportMode == ExportMode::whiteList) {
+ for (const auto &symbol : _exportedSymbols)
+ addInitialUndefinedSymbol(symbol.getKey());
+ }
+ // If -dead_strip, set up initial live symbols.
+ if (deadStrip()) {
+ // Entry point is live.
+ if (outputTypeHasEntry())
+ addDeadStripRoot(entrySymbolName());
+ // Lazy binding helper is live.
+ if (needsStubsPass())
+ addDeadStripRoot(binderSymbolName());
+ // If using -exported_symbols_list, make all exported symbols live.
+ if (_exportMode == ExportMode::whiteList) {
+ setGlobalsAreDeadStripRoots(false);
+ for (const auto &symbol : _exportedSymbols)
+ addDeadStripRoot(symbol.getKey());
+ }
+ }
+ addOutputFileDependency(outputPath());
+ return true;
+void MachOLinkingContext::addPasses(PassManager &pm) {
+ // objc pass should be before layout pass. Otherwise test cases may contain
+ // no atoms which confuses the layout pass.
+ if (needsObjCPass())
+ mach_o::addObjCPass(pm, *this);
+ mach_o::addLayoutPass(pm, *this);
+ if (needsStubsPass())
+ mach_o::addStubsPass(pm, *this);
+ if (needsCompactUnwindPass())
+ mach_o::addCompactUnwindPass(pm, *this);
+ if (needsGOTPass())
+ mach_o::addGOTPass(pm, *this);
+ if (needsTLVPass())
+ mach_o::addTLVPass(pm, *this);
+ if (needsShimPass())
+ mach_o::addShimPass(pm, *this); // Shim pass must run after stubs pass.
+Writer &MachOLinkingContext::writer() const {
+ if (!_writer)
+ _writer = createWriterMachO(*this);
+ return *_writer;
+MachOLinkingContext::getMemoryBuffer(StringRef path) {
+ addInputFileDependency(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
+ MemoryBuffer::getFileOrSTDIN(path);
+ if (std::error_code ec = mbOrErr.getError())
+ return ec;
+ std::unique_ptr<MemoryBuffer> mb = std::move(mbOrErr.get());
+ // If buffer contains a fat file, find required arch in fat buffer
+ // and switch buffer to point to just that required slice.
+ uint32_t offset;
+ uint32_t size;
+ if (sliceFromFatFile(mb->getMemBufferRef(), offset, size))
+ return MemoryBuffer::getFileSlice(path, size, offset);
+ return std::move(mb);
+MachODylibFile* MachOLinkingContext::loadIndirectDylib(StringRef path) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = getMemoryBuffer(path);
+ if (mbOrErr.getError())
+ return nullptr;
+ ErrorOr<std::unique_ptr<File>> fileOrErr =
+ registry().loadFile(std::move(mbOrErr.get()));
+ if (!fileOrErr)
+ return nullptr;
+ std::unique_ptr<File> &file = fileOrErr.get();
+ file->parse();
+ MachODylibFile *result = reinterpret_cast<MachODylibFile *>(file.get());
+ // Node object now owned by _indirectDylibs vector.
+ _indirectDylibs.push_back(std::move(file));
+ return result;
+MachODylibFile* MachOLinkingContext::findIndirectDylib(StringRef path) {
+ // See if already loaded.
+ auto pos = _pathToDylibMap.find(path);
+ if (pos != _pathToDylibMap.end())
+ return pos->second;
+ // Search -L paths if of the form "libXXX.dylib"
+ std::pair<StringRef, StringRef> split = path.rsplit('/');
+ StringRef leafName = split.second;
+ if (leafName.startswith("lib") && leafName.endswith(".dylib")) {
+ // FIXME: Need to enhance searchLibrary() to only look for .dylib
+ auto libPath = searchLibrary(leafName);
+ if (libPath)
+ return loadIndirectDylib(libPath.getValue());
+ }
+ // Try full path with sysroot.
+ for (StringRef sysPath : _syslibRoots) {
+ SmallString<256> fullPath;
+ fullPath.assign(sysPath);
+ llvm::sys::path::append(fullPath, path);
+ if (pathExists(fullPath))
+ return loadIndirectDylib(fullPath);
+ }
+ // Try full path.
+ if (pathExists(path)) {
+ return loadIndirectDylib(path);
+ }
+ return nullptr;
+uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const {
+ auto pos = _pathToDylibMap.find(installName);
+ if (pos != _pathToDylibMap.end())
+ return pos->second->currentVersion();
+ else
+ return 0x10000; // 1.0
+uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const {
+ auto pos = _pathToDylibMap.find(installName);
+ if (pos != _pathToDylibMap.end())
+ return pos->second->compatVersion();
+ else
+ return 0x10000; // 1.0
+void MachOLinkingContext::createImplicitFiles(
+ std::vector<std::unique_ptr<File> > &result) {
+ // Add indirect dylibs by asking each linked dylib to add its indirects.
+ // Iterate until no more dylibs get loaded.
+ size_t dylibCount = 0;
+ while (dylibCount != _allDylibs.size()) {
+ dylibCount = _allDylibs.size();
+ for (MachODylibFile *dylib : _allDylibs) {
+ dylib->loadReExportedDylibs([this] (StringRef path) -> MachODylibFile* {
+ return findIndirectDylib(path); });
+ }
+ }
+ // Let writer add output type specific extras.
+ writer().createImplicitFiles(result);
+ // If undefinedMode is != error, add a FlatNamespaceFile instance. This will
+ // provide a SharedLibraryAtom for symbols that aren't defined elsewhere.
+ if (undefinedMode() != UndefinedMode::error) {
+ result.emplace_back(new mach_o::FlatNamespaceFile(*this));
+ _flatNamespaceFile = result.back().get();
+ }
+void MachOLinkingContext::registerDylib(MachODylibFile *dylib,
+ bool upward) const {
+ std::lock_guard<std::mutex> lock(_dylibsMutex);
+ if (std::find(_allDylibs.begin(),
+ _allDylibs.end(), dylib) == _allDylibs.end())
+ _allDylibs.push_back(dylib);
+ _pathToDylibMap[dylib->installName()] = dylib;
+ // If path is different than install name, register path too.
+ if (!dylib->path().equals(dylib->installName()))
+ _pathToDylibMap[dylib->path()] = dylib;
+ if (upward)
+ _upwardDylibs.insert(dylib);
+bool MachOLinkingContext::isUpwardDylib(StringRef installName) const {
+ for (MachODylibFile *dylib : _upwardDylibs) {
+ if (dylib->installName().equals(installName))
+ return true;
+ }
+ return false;
+ArchHandler &MachOLinkingContext::archHandler() const {
+ if (!_archHandler)
+ _archHandler = ArchHandler::create(_arch);
+ return *_archHandler;
+void MachOLinkingContext::addSectionAlignment(StringRef seg, StringRef sect,
+ uint16_t align) {
+ SectionAlign entry = { seg, sect, align };
+ _sectAligns.push_back(entry);
+void MachOLinkingContext::addSectCreateSection(
+ StringRef seg, StringRef sect,
+ std::unique_ptr<MemoryBuffer> content) {
+ if (!_sectCreateFile) {
+ auto sectCreateFile = llvm::make_unique<mach_o::SectCreateFile>();
+ _sectCreateFile = sectCreateFile.get();
+ getNodes().push_back(llvm::make_unique<FileNode>(std::move(sectCreateFile)));
+ }
+ assert(_sectCreateFile && "sectcreate file does not exist.");
+ _sectCreateFile->addSection(seg, sect, std::move(content));
+bool MachOLinkingContext::sectionAligned(StringRef seg, StringRef sect,
+ uint16_t &align) const {
+ for (const SectionAlign &entry : _sectAligns) {
+ if (seg.equals(entry.segmentName) && sect.equals(entry.sectionName)) {
+ align = entry.align;
+ return true;
+ }
+ }
+ return false;
+void MachOLinkingContext::addExportSymbol(StringRef sym) {
+ // Support old crufty export lists with bogus entries.
+ if (sym.endswith(".eh") || sym.startswith(".objc_category_name_")) {
+ llvm::errs() << "warning: ignoring " << sym << " in export list\n";
+ return;
+ }
+ // Only i386 MacOSX uses old ABI, so don't change those.
+ if ((_os != OS::macOSX) || (_arch != arch_x86)) {
+ // ObjC has two differnent ABIs. Be nice and allow one export list work for
+ // both ABIs by renaming symbols.
+ if (sym.startswith(".objc_class_name_")) {
+ std::string abi2className("_OBJC_CLASS_$_");
+ abi2className += sym.substr(17);
+ _exportedSymbols.insert(copy(abi2className));
+ std::string abi2metaclassName("_OBJC_METACLASS_$_");
+ abi2metaclassName += sym.substr(17);
+ _exportedSymbols.insert(copy(abi2metaclassName));
+ return;
+ }
+ }
+ // FIXME: Support wildcards.
+ _exportedSymbols.insert(sym);
+bool MachOLinkingContext::exportSymbolNamed(StringRef sym) const {
+ switch (_exportMode) {
+ case ExportMode::globals:
+ llvm_unreachable("exportSymbolNamed() should not be called in this mode");
+ break;
+ case ExportMode::whiteList:
+ return _exportedSymbols.count(sym);
+ case ExportMode::blackList:
+ return !_exportedSymbols.count(sym);
+ }
+ llvm_unreachable("_exportMode unknown enum value");
+std::string MachOLinkingContext::demangle(StringRef symbolName) const {
+ // Only try to demangle symbols if -demangle on command line
+ if (!demangleSymbols())
+ return symbolName;
+ // Only try to demangle symbols that look like C++ symbols
+ if (!symbolName.startswith("__Z"))
+ return symbolName;
+ SmallString<256> symBuff;
+ StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff);
+ // Mach-O has extra leading underscore that needs to be removed.
+ const char *cstr = nullTermSym.data() + 1;
+ int status;
+ char *demangled = llvm::itaniumDemangle(cstr, nullptr, nullptr, &status);
+ if (demangled) {
+ std::string result(demangled);
+ // __cxa_demangle() always uses a malloc'ed buffer to return the result.
+ free(demangled);
+ return result;
+ }
+ return symbolName;
+static void addDependencyInfoHelper(llvm::raw_fd_ostream *DepInfo,
+ char Opcode, StringRef Path) {
+ if (!DepInfo)
+ return;
+ *DepInfo << Opcode;
+ *DepInfo << Path;
+ *DepInfo << '\0';
+std::error_code MachOLinkingContext::createDependencyFile(StringRef path) {
+ std::error_code ec;
+ _dependencyInfo = std::unique_ptr<llvm::raw_fd_ostream>(new
+ llvm::raw_fd_ostream(path, ec, llvm::sys::fs::F_None));
+ if (ec) {
+ _dependencyInfo.reset();
+ return ec;
+ }
+ addDependencyInfoHelper(_dependencyInfo.get(), 0x00, "lld" /*FIXME*/);
+ return std::error_code();
+void MachOLinkingContext::addInputFileDependency(StringRef path) const {
+ addDependencyInfoHelper(_dependencyInfo.get(), 0x10, path);
+void MachOLinkingContext::addInputFileNotFound(StringRef path) const {
+ addDependencyInfoHelper(_dependencyInfo.get(), 0x11, path);
+void MachOLinkingContext::addOutputFileDependency(StringRef path) const {
+ addDependencyInfoHelper(_dependencyInfo.get(), 0x40, path);
+void MachOLinkingContext::appendOrderedSymbol(StringRef symbol,
+ StringRef filename) {
+ // To support sorting static functions which may have the same name in
+ // multiple .o files, _orderFiles maps the symbol name to a vector
+ // of OrderFileNode each of which can specify a file prefix.
+ OrderFileNode info;
+ if (!filename.empty())
+ info.fileFilter = copy(filename);
+ info.order = _orderFileEntries++;
+ _orderFiles[symbol].push_back(info);
+MachOLinkingContext::findOrderOrdinal(const std::vector<OrderFileNode> &nodes,
+ const DefinedAtom *atom,
+ unsigned &ordinal) {
+ const File *objFile = &atom->file();
+ assert(objFile);
+ StringRef objName = objFile->path();
+ std::pair<StringRef, StringRef> dirAndLeaf = objName.rsplit('/');
+ if (!dirAndLeaf.second.empty())
+ objName = dirAndLeaf.second;
+ for (const OrderFileNode &info : nodes) {
+ if (info.fileFilter.empty()) {
+ // Have unprefixed symbol name in order file that matches this atom.
+ ordinal = info.order;
+ return true;
+ }
+ if (info.fileFilter.equals(objName)) {
+ // Have prefixed symbol name in order file that matches atom's path.
+ ordinal = info.order;
+ return true;
+ }
+ }
+ return false;
+bool MachOLinkingContext::customAtomOrderer(const DefinedAtom *left,
+ const DefinedAtom *right,
+ bool &leftBeforeRight) const {
+ // No custom sorting if no order file entries.
+ if (!_orderFileEntries)
+ return false;
+ // Order files can only order named atoms.
+ StringRef leftName = left->name();
+ StringRef rightName = right->name();
+ if (leftName.empty() || rightName.empty())
+ return false;
+ // If neither is in order file list, no custom sorter.
+ auto leftPos = _orderFiles.find(leftName);
+ auto rightPos = _orderFiles.find(rightName);
+ bool leftIsOrdered = (leftPos != _orderFiles.end());
+ bool rightIsOrdered = (rightPos != _orderFiles.end());
+ if (!leftIsOrdered && !rightIsOrdered)
+ return false;
+ // There could be multiple symbols with same name but different file prefixes.
+ unsigned leftOrder;
+ unsigned rightOrder;
+ bool foundLeft =
+ leftIsOrdered && findOrderOrdinal(leftPos->getValue(), left, leftOrder);
+ bool foundRight = rightIsOrdered &&
+ findOrderOrdinal(rightPos->getValue(), right, rightOrder);
+ if (!foundLeft && !foundRight)
+ return false;
+ // If only one is in order file list, ordered one goes first.
+ if (foundLeft != foundRight)
+ leftBeforeRight = foundLeft;
+ else
+ leftBeforeRight = (leftOrder < rightOrder);
+ return true;
+static bool isLibrary(const std::unique_ptr<Node> &elem) {
+ if (FileNode *node = dyn_cast<FileNode>(const_cast<Node *>(elem.get()))) {
+ File *file = node->getFile();
+ return isa<SharedLibraryFile>(file) || isa<ArchiveLibraryFile>(file);
+ }
+ return false;
+// The darwin linker processes input files in two phases. The first phase
+// links in all object (.o) files in command line order. The second phase
+// links in libraries in command line order.
+// In this function we reorder the input files so that all the object files
+// comes before any library file. We also make a group for the library files
+// so that the Resolver will reiterate over the libraries as long as we find
+// new undefines from libraries.
+void MachOLinkingContext::finalizeInputFiles() {
+ std::vector<std::unique_ptr<Node>> &elements = getNodes();
+ std::stable_sort(elements.begin(), elements.end(),
+ [](const std::unique_ptr<Node> &a,
+ const std::unique_ptr<Node> &b) {
+ return !isLibrary(a) && isLibrary(b);
+ });
+ size_t numLibs = std::count_if(elements.begin(), elements.end(), isLibrary);
+ elements.push_back(llvm::make_unique<GroupEnd>(numLibs));
+llvm::Error MachOLinkingContext::handleLoadedFile(File &file) {
+ auto *machoFile = dyn_cast<MachOFile>(&file);
+ if (!machoFile)
+ return llvm::Error::success();
+ // Check that the arch of the context matches that of the file.
+ // Also set the arch of the context if it didn't have one.
+ if (_arch == arch_unknown) {
+ _arch = machoFile->arch();
+ } else if (machoFile->arch() != arch_unknown && machoFile->arch() != _arch) {
+ // Archs are different.
+ return llvm::make_error<GenericError>(file.path() +
+ Twine(" cannot be linked due to incompatible architecture"));
+ }
+ // Check that the OS of the context matches that of the file.
+ // Also set the OS of the context if it didn't have one.
+ if (_os == OS::unknown) {
+ _os = machoFile->OS();
+ } else if (machoFile->OS() != OS::unknown && machoFile->OS() != _os) {
+ // OSes are different.
+ return llvm::make_error<GenericError>(file.path() +
+ Twine(" cannot be linked due to incompatible operating systems"));
+ }
+ // Check that if the objc info exists, that it is compatible with the target
+ // OS.
+ switch (machoFile->objcConstraint()) {
+ case objc_unknown:
+ // The file is not compiled with objc, so skip the checks.
+ break;
+ case objc_gc_only:
+ case objc_supports_gc:
+ llvm_unreachable("GC support should already have thrown an error");
+ case objc_retainReleaseForSimulator:
+ // The file is built with simulator objc, so make sure that the context
+ // is also building with simulator support.
+ if (_os != OS::iOS_simulator)
+ return llvm::make_error<GenericError>(file.path() +
+ Twine(" cannot be linked. It contains ObjC built for the simulator"
+ " while we are linking a non-simulator target"));
+ assert((_objcConstraint == objc_unknown ||
+ _objcConstraint == objc_retainReleaseForSimulator) &&
+ "Must be linking with retain/release for the simulator");
+ _objcConstraint = objc_retainReleaseForSimulator;
+ break;
+ case objc_retainRelease:
+ // The file is built without simulator objc, so make sure that the
+ // context is also building without simulator support.
+ if (_os == OS::iOS_simulator)
+ return llvm::make_error<GenericError>(file.path() +
+ Twine(" cannot be linked. It contains ObjC built for a non-simulator"
+ " target while we are linking a simulator target"));
+ assert((_objcConstraint == objc_unknown ||
+ _objcConstraint == objc_retainRelease) &&
+ "Must be linking with retain/release for a non-simulator target");
+ _objcConstraint = objc_retainRelease;
+ break;
+ }
+ // Check that the swift version of the context matches that of the file.
+ // Also set the swift version of the context if it didn't have one.
+ if (!_swiftVersion) {
+ _swiftVersion = machoFile->swiftVersion();
+ } else if (machoFile->swiftVersion() &&
+ machoFile->swiftVersion() != _swiftVersion) {
+ // Swift versions are different.
+ return llvm::make_error<GenericError>("different swift versions");
+ }
+ return llvm::Error::success();
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h
new file mode 100644
index 000000000000..7eeb8adbd84f
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h
@@ -0,0 +1,336 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file These data structures comprise the "normalized" view of
+/// mach-o object files. The normalized view is an in-memory only data structure
+/// which is always in native endianness and pointer size.
+/// The normalized view easily converts to and from YAML using YAML I/O.
+/// The normalized view converts to and from binary mach-o object files using
+/// the writeBinary() and readBinary() functions.
+/// The normalized view converts to and from lld::Atoms using the
+/// normalizedToAtoms() and normalizedFromAtoms().
+/// Overall, the conversion paths available look like:
+/// +---------------+
+/// | binary mach-o |
+/// +---------------+
+/// ^
+/// |
+/// v
+/// +------------+ +------+
+/// | normalized | <-> | yaml |
+/// +------------+ +------+
+/// ^
+/// |
+/// v
+/// +-------+
+/// | Atoms |
+/// +-------+
+#include "DebugInfo.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/YAMLTraits.h"
+using llvm::BumpPtrAllocator;
+using llvm::yaml::Hex64;
+using llvm::yaml::Hex32;
+using llvm::yaml::Hex16;
+using llvm::yaml::Hex8;
+using llvm::yaml::SequenceTraits;
+using llvm::MachO::HeaderFileType;
+using llvm::MachO::BindType;
+using llvm::MachO::RebaseType;
+using llvm::MachO::NListType;
+using llvm::MachO::RelocationInfoType;
+using llvm::MachO::SectionType;
+using llvm::MachO::LoadCommandType;
+using llvm::MachO::ExportSymbolKind;
+using llvm::MachO::DataRegionType;
+namespace lld {
+namespace mach_o {
+namespace normalized {
+/// The real mach-o relocation record is 8-bytes on disk and is
+/// encoded in one of two different bit-field patterns. This
+/// normalized form has the union of all possible fields.
+struct Relocation {
+ Relocation() : offset(0), scattered(false),
+ type(llvm::MachO::GENERIC_RELOC_VANILLA),
+ length(0), pcRel(false), isExtern(false), value(0),
+ symbol(0) { }
+ Hex32 offset;
+ bool scattered;
+ RelocationInfoType type;
+ uint8_t length;
+ bool pcRel;
+ bool isExtern;
+ Hex32 value;
+ uint32_t symbol;
+/// A typedef so that YAML I/O can treat this vector as a sequence.
+typedef std::vector<Relocation> Relocations;
+/// A typedef so that YAML I/O can process the raw bytes in a section.
+typedef std::vector<Hex8> ContentBytes;
+/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
+typedef std::vector<uint32_t> IndirectSymbols;
+/// A typedef so that YAML I/O can encode/decode section attributes.
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
+/// A typedef so that YAML I/O can encode/decode section alignment.
+LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment)
+/// Mach-O has a 32-bit and 64-bit section record. This normalized form
+/// can support either kind.
+struct Section {
+ Section() : type(llvm::MachO::S_REGULAR),
+ attributes(0), alignment(1), address(0) { }
+ StringRef segmentName;
+ StringRef sectionName;
+ SectionType type;
+ SectionAttr attributes;
+ SectionAlignment alignment;
+ Hex64 address;
+ ArrayRef<uint8_t> content;
+ Relocations relocations;
+ IndirectSymbols indirectSymbols;
+/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
+/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
+LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
+/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
+/// type and scope and mixed in the same n_type field. This normalized form
+/// works for any pointer size and separates out the type and scope.
+struct Symbol {
+ Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
+ StringRef name;
+ NListType type;
+ SymbolScope scope;
+ uint8_t sect;
+ SymbolDesc desc;
+ Hex64 value;
+/// Check whether the given section type indicates a zero-filled section.
+// FIXME: Utility functions of this kind should probably be moved into
+// llvm/Support.
+inline bool isZeroFillSection(SectionType T) {
+ return (T == llvm::MachO::S_ZEROFILL ||
+/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
+/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
+/// Segments are only used in normalized final linked images (not in relocatable
+/// object files). They specify how a range of the file is loaded.
+struct Segment {
+ StringRef name;
+ Hex64 address;
+ Hex64 size;
+ VMProtect init_access;
+ VMProtect max_access;
+/// Only used in normalized final linked images to specify on which dylibs
+/// it depends.
+struct DependentDylib {
+ StringRef path;
+ LoadCommandType kind;
+ PackedVersion compatVersion;
+ PackedVersion currentVersion;
+/// A normalized rebasing entry. Only used in normalized final linked images.
+struct RebaseLocation {
+ Hex32 segOffset;
+ uint8_t segIndex;
+ RebaseType kind;
+/// A normalized binding entry. Only used in normalized final linked images.
+struct BindLocation {
+ Hex32 segOffset;
+ uint8_t segIndex;
+ BindType kind;
+ bool canBeNull;
+ int ordinal;
+ StringRef symbolName;
+ Hex64 addend;
+/// A typedef so that YAML I/O can encode/decode export flags.
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
+/// A normalized export entry. Only used in normalized final linked images.
+struct Export {
+ StringRef name;
+ Hex64 offset;
+ ExportSymbolKind kind;
+ ExportFlags flags;
+ Hex32 otherOffset;
+ StringRef otherName;
+/// A normalized data-in-code entry.
+struct DataInCode {
+ Hex32 offset;
+ Hex16 length;
+ DataRegionType kind;
+/// A typedef so that YAML I/O can encode/decode mach_header.flags.
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
+struct NormalizedFile {
+ MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown;
+ HeaderFileType fileType = llvm::MachO::MH_OBJECT;
+ FileFlags flags = 0;
+ std::vector<Segment> segments; // Not used in object files.
+ std::vector<Section> sections;
+ // Symbols sorted by kind.
+ std::vector<Symbol> localSymbols;
+ std::vector<Symbol> globalSymbols;
+ std::vector<Symbol> undefinedSymbols;
+ std::vector<Symbol> stabsSymbols;
+ // Maps to load commands with no LINKEDIT content (final linked images only).
+ std::vector<DependentDylib> dependentDylibs;
+ StringRef installName; // dylibs only
+ PackedVersion compatVersion = 0; // dylibs only
+ PackedVersion currentVersion = 0; // dylibs only
+ bool hasUUID = false;
+ bool hasMinVersionLoadCommand = false;
+ bool generateDataInCodeLoadCommand = false;
+ std::vector<StringRef> rpaths;
+ Hex64 entryAddress = 0;
+ Hex64 stackSize = 0;
+ MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown;
+ Hex64 sourceVersion = 0;
+ PackedVersion minOSverson = 0;
+ PackedVersion sdkVersion = 0;
+ LoadCommandType minOSVersionKind = (LoadCommandType)0;
+ // Maps to load commands with LINKEDIT content (final linked images only).
+ Hex32 pageSize = 0;
+ std::vector<RebaseLocation> rebasingInfo;
+ std::vector<BindLocation> bindingInfo;
+ std::vector<BindLocation> weakBindingInfo;
+ std::vector<BindLocation> lazyBindingInfo;
+ std::vector<Export> exportInfo;
+ std::vector<uint8_t> functionStarts;
+ std::vector<DataInCode> dataInCode;
+ // TODO:
+ // code-signature
+ // split-seg-info
+ // function-starts
+ // For any allocations in this struct which need to be owned by this struct.
+ BumpPtrAllocator ownedAllocations;
+/// Tests if a file is a non-fat mach-o object file.
+bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
+/// If the buffer is a fat file with the request arch, then this function
+/// returns true with 'offset' and 'size' set to location of the arch slice
+/// within the buffer. Otherwise returns false;
+bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
+ uint32_t &offset, uint32_t &size);
+/// Reads a mach-o file and produces an in-memory normalized view.
+readBinary(std::unique_ptr<MemoryBuffer> &mb,
+ const MachOLinkingContext::Arch arch);
+/// Takes in-memory normalized view and writes a mach-o object file.
+llvm::Error writeBinary(const NormalizedFile &file, StringRef path);
+size_t headerAndLoadCommandsSize(const NormalizedFile &file);
+/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
+readYaml(std::unique_ptr<MemoryBuffer> &mb);
+/// Writes a yaml encoded mach-o files given an in-memory normalized view.
+std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
+normalizedObjectToAtoms(MachOFile *file,
+ const NormalizedFile &normalizedFile,
+ bool copyRefs);
+normalizedDylibToAtoms(MachODylibFile *file,
+ const NormalizedFile &normalizedFile,
+ bool copyRefs);
+/// Takes in-memory normalized dylib or object and parses it into lld::File
+normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+ bool copyRefs);
+/// Takes atoms and generates a normalized macho-o view.
+normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
+} // namespace normalized
+/// Class for interfacing mach-o yaml files into generic yaml parsing
+class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
+ MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
+ : _arch(arch) { }
+ bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
+ const MachOLinkingContext::Arch _arch;
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp
new file mode 100644
index 000000000000..7c2e833c090f
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp
@@ -0,0 +1,594 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file For mach-o object files, this implementation converts from
+/// mach-o on-disk binary format to in-memory normalized mach-o.
+/// +---------------+
+/// | binary mach-o |
+/// +---------------+
+/// |
+/// |
+/// v
+/// +------------+
+/// | normalized |
+/// +------------+
+#include "ArchHandler.h"
+#include "MachONormalizedFile.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "lld/Core/SharedLibraryFile.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <functional>
+#include <system_error>
+using namespace llvm::MachO;
+using llvm::object::ExportEntry;
+using llvm::file_magic;
+using llvm::object::MachOObjectFile;
+namespace lld {
+namespace mach_o {
+namespace normalized {
+// Utility to call a lambda expression on each load command.
+static llvm::Error forEachLoadCommand(
+ StringRef lcRange, unsigned lcCount, bool isBig, bool is64,
+ std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) {
+ const char* p = lcRange.begin();
+ for (unsigned i=0; i < lcCount; ++i) {
+ const load_command *lc = reinterpret_cast<const load_command*>(p);
+ load_command lcCopy;
+ const load_command *slc = lc;
+ if (isBig != llvm::sys::IsBigEndianHost) {
+ memcpy(&lcCopy, lc, sizeof(load_command));
+ swapStruct(lcCopy);
+ slc = &lcCopy;
+ }
+ if ( (p + slc->cmdsize) > lcRange.end() )
+ return llvm::make_error<GenericError>("Load command exceeds range");
+ if (func(slc->cmd, slc->cmdsize, p))
+ return llvm::Error::success();
+ p += slc->cmdsize;
+ }
+ return llvm::Error::success();
+static std::error_code appendRelocations(Relocations &relocs, StringRef buffer,
+ bool bigEndian,
+ uint32_t reloff, uint32_t nreloc) {
+ if ((reloff + nreloc*8) > buffer.size())
+ return make_error_code(llvm::errc::executable_format_error);
+ const any_relocation_info* relocsArray =
+ reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff);
+ for(uint32_t i=0; i < nreloc; ++i) {
+ relocs.push_back(unpackRelocation(relocsArray[i], bigEndian));
+ }
+ return std::error_code();
+static std::error_code
+appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig,
+ uint32_t istOffset, uint32_t istCount,
+ uint32_t startIndex, uint32_t count) {
+ if ((istOffset + istCount*4) > buffer.size())
+ return make_error_code(llvm::errc::executable_format_error);
+ if (startIndex+count > istCount)
+ return make_error_code(llvm::errc::executable_format_error);
+ const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data();
+ for(uint32_t i=0; i < count; ++i) {
+ isyms.push_back(read32(
+ indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig));
+ }
+ return std::error_code();
+template <typename T> static T readBigEndian(T t) {
+ if (llvm::sys::IsLittleEndianHost)
+ llvm::sys::swapByteOrder(t);
+ return t;
+static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) {
+ switch (read32(&mh->magic, false)) {
+ case llvm::MachO::MH_MAGIC:
+ is64 = false;
+ isBig = false;
+ return true;
+ case llvm::MachO::MH_MAGIC_64:
+ is64 = true;
+ isBig = false;
+ return true;
+ case llvm::MachO::MH_CIGAM:
+ is64 = false;
+ isBig = true;
+ return true;
+ case llvm::MachO::MH_CIGAM_64:
+ is64 = true;
+ isBig = true;
+ return true;
+ default:
+ return false;
+ }
+bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) {
+ // Try opening and mapping file at path.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path);
+ if (b.getError())
+ return false;
+ // If file length < 32 it is too small to be mach-o object file.
+ StringRef fileBuffer = b->get()->getBuffer();
+ if (fileBuffer.size() < 32)
+ return false;
+ // If file buffer does not start with MH_MAGIC (and variants), not obj file.
+ const mach_header *mh = reinterpret_cast<const mach_header *>(
+ fileBuffer.begin());
+ bool is64, isBig;
+ if (!isMachOHeader(mh, is64, isBig))
+ return false;
+ // If not MH_OBJECT, not object file.
+ if (read32(&mh->filetype, isBig) != MH_OBJECT)
+ return false;
+ // Lookup up arch from cpu/subtype pair.
+ arch = MachOLinkingContext::archFromCpuType(
+ read32(&mh->cputype, isBig),
+ read32(&mh->cpusubtype, isBig));
+ return true;
+bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
+ uint32_t &offset, uint32_t &size) {
+ const char *start = mb.getBufferStart();
+ const llvm::MachO::fat_header *fh =
+ reinterpret_cast<const llvm::MachO::fat_header *>(start);
+ if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC)
+ return false;
+ uint32_t nfat_arch = readBigEndian(fh->nfat_arch);
+ const fat_arch *fstart =
+ reinterpret_cast<const fat_arch *>(start + sizeof(fat_header));
+ const fat_arch *fend =
+ reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) +
+ sizeof(fat_arch) * nfat_arch);
+ const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch);
+ const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch);
+ for (const fat_arch *fa = fstart; fa < fend; ++fa) {
+ if ((readBigEndian(fa->cputype) == reqCpuType) &&
+ (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) {
+ offset = readBigEndian(fa->offset);
+ size = readBigEndian(fa->size);
+ if ((offset + size) > mb.getBufferSize())
+ return false;
+ return true;
+ }
+ }
+ return false;
+/// Reads a mach-o file and produces an in-memory normalized view.
+readBinary(std::unique_ptr<MemoryBuffer> &mb,
+ const MachOLinkingContext::Arch arch) {
+ // Make empty NormalizedFile.
+ std::unique_ptr<NormalizedFile> f(new NormalizedFile());
+ const char *start = mb->getBufferStart();
+ size_t objSize = mb->getBufferSize();
+ const mach_header *mh = reinterpret_cast<const mach_header *>(start);
+ uint32_t sliceOffset;
+ uint32_t sliceSize;
+ if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) {
+ start = &start[sliceOffset];
+ objSize = sliceSize;
+ mh = reinterpret_cast<const mach_header *>(start);
+ }
+ // Determine endianness and pointer size for mach-o file.
+ bool is64, isBig;
+ if (!isMachOHeader(mh, is64, isBig))
+ return llvm::make_error<GenericError>("File is not a mach-o");
+ // Endian swap header, if needed.
+ mach_header headerCopy;
+ const mach_header *smh = mh;
+ if (isBig != llvm::sys::IsBigEndianHost) {
+ memcpy(&headerCopy, mh, sizeof(mach_header));
+ swapStruct(headerCopy);
+ smh = &headerCopy;
+ }
+ // Validate head and load commands fit in buffer.
+ const uint32_t lcCount = smh->ncmds;
+ const char *lcStart =
+ start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header));
+ StringRef lcRange(lcStart, smh->sizeofcmds);
+ if (lcRange.end() > (start + objSize))
+ return llvm::make_error<GenericError>("Load commands exceed file size");
+ // Get architecture from mach_header.
+ f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype);
+ if (f->arch != arch) {
+ return llvm::make_error<GenericError>(
+ Twine("file is wrong architecture. Expected "
+ "(" + MachOLinkingContext::nameFromArch(arch)
+ + ") found ("
+ + MachOLinkingContext::nameFromArch(f->arch)
+ + ")" ));
+ }
+ // Copy file type and flags
+ f->fileType = HeaderFileType(smh->filetype);
+ f->flags = smh->flags;
+ // Pre-scan load commands looking for indirect symbol table.
+ uint32_t indirectSymbolTableOffset = 0;
+ uint32_t indirectSymbolTableCount = 0;
+ auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64,
+ [&](uint32_t cmd, uint32_t size,
+ const char *lc) -> bool {
+ if (cmd == LC_DYSYMTAB) {
+ const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc);
+ indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig);
+ indirectSymbolTableCount = read32(&d->nindirectsyms, isBig);
+ return true;
+ }
+ return false;
+ });
+ if (ec)
+ return std::move(ec);
+ // Walk load commands looking for segments/sections and the symbol table.
+ const data_in_code_entry *dataInCode = nullptr;
+ const dyld_info_command *dyldInfo = nullptr;
+ uint32_t dataInCodeSize = 0;
+ ec = forEachLoadCommand(lcRange, lcCount, isBig, is64,
+ [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
+ switch(cmd) {
+ case LC_SEGMENT_64:
+ if (is64) {
+ const segment_command_64 *seg =
+ reinterpret_cast<const segment_command_64*>(lc);
+ const unsigned sectionCount = read32(&seg->nsects, isBig);
+ const section_64 *sects = reinterpret_cast<const section_64*>
+ (lc + sizeof(segment_command_64));
+ const unsigned lcSize = sizeof(segment_command_64)
+ + sectionCount*sizeof(section_64);
+ // Verify sections don't extend beyond end of segment load command.
+ if (lcSize > size)
+ return true;
+ for (unsigned i=0; i < sectionCount; ++i) {
+ const section_64 *sect = &sects[i];
+ Section section;
+ section.segmentName = getString16(sect->segname);
+ section.sectionName = getString16(sect->sectname);
+ section.type = (SectionType)(read32(&sect->flags, isBig) &
+ section.attributes = read32(&sect->flags, isBig) & SECTION_ATTRIBUTES;
+ section.alignment = 1 << read32(&sect->align, isBig);
+ section.address = read64(&sect->addr, isBig);
+ const uint8_t *content =
+ (const uint8_t *)start + read32(&sect->offset, isBig);
+ size_t contentSize = read64(&sect->size, isBig);
+ // Note: this assign() is copying the content bytes. Ideally,
+ // we can use a custom allocator for vector to avoid the copy.
+ section.content = llvm::makeArrayRef(content, contentSize);
+ appendRelocations(section.relocations, mb->getBuffer(), isBig,
+ read32(&sect->reloff, isBig),
+ read32(&sect->nreloc, isBig));
+ if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
+ appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(),
+ isBig,
+ indirectSymbolTableOffset,
+ indirectSymbolTableCount,
+ read32(&sect->reserved1, isBig),
+ contentSize/4);
+ }
+ f->sections.push_back(section);
+ }
+ }
+ break;
+ case LC_SEGMENT:
+ if (!is64) {
+ const segment_command *seg =
+ reinterpret_cast<const segment_command*>(lc);
+ const unsigned sectionCount = read32(&seg->nsects, isBig);
+ const section *sects = reinterpret_cast<const section*>
+ (lc + sizeof(segment_command));
+ const unsigned lcSize = sizeof(segment_command)
+ + sectionCount*sizeof(section);
+ // Verify sections don't extend beyond end of segment load command.
+ if (lcSize > size)
+ return true;
+ for (unsigned i=0; i < sectionCount; ++i) {
+ const section *sect = &sects[i];
+ Section section;
+ section.segmentName = getString16(sect->segname);
+ section.sectionName = getString16(sect->sectname);
+ section.type = (SectionType)(read32(&sect->flags, isBig) &
+ section.attributes =
+ read32((const uint8_t *)&sect->flags, isBig) & SECTION_ATTRIBUTES;
+ section.alignment = 1 << read32(&sect->align, isBig);
+ section.address = read32(&sect->addr, isBig);
+ const uint8_t *content =
+ (const uint8_t *)start + read32(&sect->offset, isBig);
+ size_t contentSize = read32(&sect->size, isBig);
+ // Note: this assign() is copying the content bytes. Ideally,
+ // we can use a custom allocator for vector to avoid the copy.
+ section.content = llvm::makeArrayRef(content, contentSize);
+ appendRelocations(section.relocations, mb->getBuffer(), isBig,
+ read32(&sect->reloff, isBig),
+ read32(&sect->nreloc, isBig));
+ if (section.type == S_NON_LAZY_SYMBOL_POINTERS) {
+ appendIndirectSymbols(
+ section.indirectSymbols, mb->getBuffer(), isBig,
+ indirectSymbolTableOffset, indirectSymbolTableCount,
+ read32(&sect->reserved1, isBig), contentSize / 4);
+ }
+ f->sections.push_back(section);
+ }
+ }
+ break;
+ case LC_SYMTAB: {
+ const symtab_command *st = reinterpret_cast<const symtab_command*>(lc);
+ const char *strings = start + read32(&st->stroff, isBig);
+ const uint32_t strSize = read32(&st->strsize, isBig);
+ // Validate string pool and symbol table all in buffer.
+ if (read32((const uint8_t *)&st->stroff, isBig) +
+ read32((const uint8_t *)&st->strsize, isBig) >
+ objSize)
+ return true;
+ if (is64) {
+ const uint32_t symOffset = read32(&st->symoff, isBig);
+ const uint32_t symCount = read32(&st->nsyms, isBig);
+ if ( symOffset+(symCount*sizeof(nlist_64)) > objSize)
+ return true;
+ const nlist_64 *symbols =
+ reinterpret_cast<const nlist_64 *>(start + symOffset);
+ // Convert each nlist_64 to a lld::mach_o::normalized::Symbol.
+ for(uint32_t i=0; i < symCount; ++i) {
+ nlist_64 tempSym;
+ memcpy(&tempSym, &symbols[i], sizeof(nlist_64));
+ const nlist_64 *sin = &tempSym;
+ if (isBig != llvm::sys::IsBigEndianHost)
+ swapStruct(tempSym);
+ Symbol sout;
+ if (sin->n_strx > strSize)
+ return true;
+ sout.name = &strings[sin->n_strx];
+ sout.type = static_cast<NListType>(sin->n_type & (N_STAB|N_TYPE));
+ sout.scope = (sin->n_type & (N_PEXT|N_EXT));
+ sout.sect = sin->n_sect;
+ sout.desc = sin->n_desc;
+ sout.value = sin->n_value;
+ if (sin->n_type & N_STAB)
+ f->stabsSymbols.push_back(sout);
+ else if (sout.type == N_UNDF)
+ f->undefinedSymbols.push_back(sout);
+ else if (sin->n_type & N_EXT)
+ f->globalSymbols.push_back(sout);
+ else
+ f->localSymbols.push_back(sout);
+ }
+ } else {
+ const uint32_t symOffset = read32(&st->symoff, isBig);
+ const uint32_t symCount = read32(&st->nsyms, isBig);
+ if ( symOffset+(symCount*sizeof(nlist)) > objSize)
+ return true;
+ const nlist *symbols =
+ reinterpret_cast<const nlist *>(start + symOffset);
+ // Convert each nlist to a lld::mach_o::normalized::Symbol.
+ for(uint32_t i=0; i < symCount; ++i) {
+ const nlist *sin = &symbols[i];
+ nlist tempSym;
+ if (isBig != llvm::sys::IsBigEndianHost) {
+ tempSym = *sin; swapStruct(tempSym); sin = &tempSym;
+ }
+ Symbol sout;
+ if (sin->n_strx > strSize)
+ return true;
+ sout.name = &strings[sin->n_strx];
+ sout.type = (NListType)(sin->n_type & N_TYPE);
+ sout.scope = (sin->n_type & (N_PEXT|N_EXT));
+ sout.sect = sin->n_sect;
+ sout.desc = sin->n_desc;
+ sout.value = sin->n_value;
+ if (sout.type == N_UNDF)
+ f->undefinedSymbols.push_back(sout);
+ else if (sout.scope == (SymbolScope)N_EXT)
+ f->globalSymbols.push_back(sout);
+ else if (sin->n_type & N_STAB)
+ f->stabsSymbols.push_back(sout);
+ else
+ f->localSymbols.push_back(sout);
+ }
+ }
+ }
+ break;
+ case LC_ID_DYLIB: {
+ const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
+ f->installName = lc + read32(&dl->dylib.name, isBig);
+ f->currentVersion = read32(&dl->dylib.current_version, isBig);
+ f->compatVersion = read32(&dl->dylib.compatibility_version, isBig);
+ }
+ break;
+ case LC_DATA_IN_CODE: {
+ const linkedit_data_command *ldc =
+ reinterpret_cast<const linkedit_data_command*>(lc);
+ dataInCode = reinterpret_cast<const data_in_code_entry *>(
+ start + read32(&ldc->dataoff, isBig));
+ dataInCodeSize = read32(&ldc->datasize, isBig);
+ }
+ break;
+ const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
+ DependentDylib entry;
+ entry.path = lc + read32(&dl->dylib.name, isBig);
+ entry.kind = LoadCommandType(cmd);
+ entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig);
+ entry.currentVersion = read32(&dl->dylib.current_version, isBig);
+ f->dependentDylibs.push_back(entry);
+ }
+ break;
+ case LC_RPATH: {
+ const rpath_command *rpc = reinterpret_cast<const rpath_command *>(lc);
+ f->rpaths.push_back(lc + read32(&rpc->path, isBig));
+ }
+ break;
+ case LC_DYLD_INFO:
+ dyldInfo = reinterpret_cast<const dyld_info_command*>(lc);
+ break;
+ // If we are emitting an object file, then we may take the load command
+ // kind from these commands and pass it on to the output
+ // file.
+ f->minOSVersionKind = (LoadCommandType)cmd;
+ break;
+ }
+ return false;
+ });
+ if (ec)
+ return std::move(ec);
+ if (dataInCode) {
+ // Convert on-disk data_in_code_entry array to DataInCode vector.
+ for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) {
+ DataInCode entry;
+ entry.offset = read32(&dataInCode[i].offset, isBig);
+ entry.length = read16(&dataInCode[i].length, isBig);
+ entry.kind =
+ (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig);
+ f->dataInCode.push_back(entry);
+ }
+ }
+ if (dyldInfo) {
+ // If any exports, extract and add to normalized exportInfo vector.
+ if (dyldInfo->export_size) {
+ const uint8_t *trieStart = reinterpret_cast<const uint8_t *>(
+ start + read32(&dyldInfo->export_off, isBig));
+ ArrayRef<uint8_t> trie(trieStart, read32(&dyldInfo->export_size, isBig));
+ Error Err = Error::success();
+ for (const ExportEntry &trieExport : MachOObjectFile::exports(Err, trie)) {
+ Export normExport;
+ normExport.name = trieExport.name().copy(f->ownedAllocations);
+ normExport.offset = trieExport.address();
+ normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK);
+ normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK;
+ normExport.otherOffset = trieExport.other();
+ if (!trieExport.otherName().empty())
+ normExport.otherName = trieExport.otherName().copy(f->ownedAllocations);
+ f->exportInfo.push_back(normExport);
+ }
+ if (Err)
+ return std::move(Err);
+ }
+ }
+ return std::move(f);
+class MachOObjectReader : public Reader {
+ MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
+ bool canParse(file_magic magic, MemoryBufferRef mb) const override {
+ return (magic == file_magic::macho_object && mb.getBufferSize() > 32);
+ }
+ ErrorOr<std::unique_ptr<File>>
+ loadFile(std::unique_ptr<MemoryBuffer> mb,
+ const Registry &registry) const override {
+ std::unique_ptr<File> ret =
+ llvm::make_unique<MachOFile>(std::move(mb), &_ctx);
+ return std::move(ret);
+ }
+ MachOLinkingContext &_ctx;
+class MachODylibReader : public Reader {
+ MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {}
+ bool canParse(file_magic magic, MemoryBufferRef mb) const override {
+ switch (magic) {
+ case file_magic::macho_dynamically_linked_shared_lib:
+ case file_magic::macho_dynamically_linked_shared_lib_stub:
+ return mb.getBufferSize() > 32;
+ default:
+ return false;
+ }
+ }
+ ErrorOr<std::unique_ptr<File>>
+ loadFile(std::unique_ptr<MemoryBuffer> mb,
+ const Registry &registry) const override {
+ std::unique_ptr<File> ret =
+ llvm::make_unique<MachODylibFile>(std::move(mb), &_ctx);
+ return std::move(ret);
+ }
+ MachOLinkingContext &_ctx;
+} // namespace normalized
+} // namespace mach_o
+void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) {
+ MachOLinkingContext::Arch arch = ctx.arch();
+ add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx)));
+ add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx)));
+ addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(),
+ ctx.archHandler().kindStrings());
+ add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
+ new mach_o::MachOYamlIOTaggedDocumentHandler(arch)));
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h
new file mode 100644
index 000000000000..ee9e174b82e0
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h
@@ -0,0 +1,214 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "MachONormalizedFile.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/LEB128.h"
+#include <system_error>
+namespace lld {
+namespace mach_o {
+namespace normalized {
+class ByteBuffer {
+ ByteBuffer() : _ostream(_bytes) { }
+ void append_byte(uint8_t b) {
+ _ostream << b;
+ }
+ void append_uleb128(uint64_t value) {
+ llvm::encodeULEB128(value, _ostream);
+ }
+ void append_uleb128Fixed(uint64_t value, unsigned byteCount) {
+ unsigned min = llvm::getULEB128Size(value);
+ assert(min <= byteCount);
+ unsigned pad = byteCount - min;
+ llvm::encodeULEB128(value, _ostream, pad);
+ }
+ void append_sleb128(int64_t value) {
+ llvm::encodeSLEB128(value, _ostream);
+ }
+ void append_string(StringRef str) {
+ _ostream << str;
+ append_byte(0);
+ }
+ void align(unsigned alignment) {
+ while ( (_ostream.tell() % alignment) != 0 )
+ append_byte(0);
+ }
+ size_t size() {
+ return _ostream.tell();
+ }
+ const uint8_t *bytes() {
+ return reinterpret_cast<const uint8_t*>(_ostream.str().data());
+ }
+ SmallVector<char, 128> _bytes;
+ // Stream ivar must be after SmallVector ivar to construct properly.
+ llvm::raw_svector_ostream _ostream;
+using namespace llvm::support::endian;
+using llvm::sys::getSwappedBytes;
+template<typename T>
+static inline uint16_t read16(const T *loc, bool isBig) {
+ assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment");
+ return isBig ? read16be(loc) : read16le(loc);
+template<typename T>
+static inline uint32_t read32(const T *loc, bool isBig) {
+ assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment");
+ return isBig ? read32be(loc) : read32le(loc);
+template<typename T>
+static inline uint64_t read64(const T *loc, bool isBig) {
+ assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment");
+ return isBig ? read64be(loc) : read64le(loc);
+inline void write16(uint8_t *loc, uint16_t value, bool isBig) {
+ if (isBig)
+ write16be(loc, value);
+ else
+ write16le(loc, value);
+inline void write32(uint8_t *loc, uint32_t value, bool isBig) {
+ if (isBig)
+ write32be(loc, value);
+ else
+ write32le(loc, value);
+inline void write64(uint8_t *loc, uint64_t value, bool isBig) {
+ if (isBig)
+ write64be(loc, value);
+ else
+ write64le(loc, value);
+inline uint32_t
+bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit,
+ uint8_t bitCount) {
+ const uint32_t mask = ((1<<bitCount)-1);
+ const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit;
+ return (value >> shift) & mask;
+inline void
+bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits,
+ uint8_t firstBit, uint8_t bitCount) {
+ const uint32_t mask = ((1<<bitCount)-1);
+ assert((newBits & mask) == newBits);
+ const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit;
+ bits &= ~(mask << shift);
+ bits |= (newBits << shift);
+inline Relocation unpackRelocation(const llvm::MachO::any_relocation_info &r,
+ bool isBigEndian) {
+ uint32_t r0 = read32(&r.r_word0, isBigEndian);
+ uint32_t r1 = read32(&r.r_word1, isBigEndian);
+ Relocation result;
+ if (r0 & llvm::MachO::R_SCATTERED) {
+ // scattered relocation record always laid out like big endian bit field
+ result.offset = bitFieldExtract(r0, true, 8, 24);
+ result.scattered = true;
+ result.type = (RelocationInfoType)
+ bitFieldExtract(r0, true, 4, 4);
+ result.length = bitFieldExtract(r0, true, 2, 2);
+ result.pcRel = bitFieldExtract(r0, true, 1, 1);
+ result.isExtern = false;
+ result.value = r1;
+ result.symbol = 0;
+ } else {
+ result.offset = r0;
+ result.scattered = false;
+ result.type = (RelocationInfoType)
+ bitFieldExtract(r1, isBigEndian, 28, 4);
+ result.length = bitFieldExtract(r1, isBigEndian, 25, 2);
+ result.pcRel = bitFieldExtract(r1, isBigEndian, 24, 1);
+ result.isExtern = bitFieldExtract(r1, isBigEndian, 27, 1);
+ result.value = 0;
+ result.symbol = bitFieldExtract(r1, isBigEndian, 0, 24);
+ }
+ return result;
+inline llvm::MachO::any_relocation_info
+packRelocation(const Relocation &r, bool swap, bool isBigEndian) {
+ uint32_t r0 = 0;
+ uint32_t r1 = 0;
+ if (r.scattered) {
+ r1 = r.value;
+ bitFieldSet(r0, true, r.offset, 8, 24);
+ bitFieldSet(r0, true, r.type, 4, 4);
+ bitFieldSet(r0, true, r.length, 2, 2);
+ bitFieldSet(r0, true, r.pcRel, 1, 1);
+ bitFieldSet(r0, true, r.scattered, 0, 1); // R_SCATTERED
+ } else {
+ r0 = r.offset;
+ bitFieldSet(r1, isBigEndian, r.type, 28, 4);
+ bitFieldSet(r1, isBigEndian, r.isExtern, 27, 1);
+ bitFieldSet(r1, isBigEndian, r.length, 25, 2);
+ bitFieldSet(r1, isBigEndian, r.pcRel, 24, 1);
+ bitFieldSet(r1, isBigEndian, r.symbol, 0, 24);
+ }
+ llvm::MachO::any_relocation_info result;
+ result.r_word0 = swap ? getSwappedBytes(r0) : r0;
+ result.r_word1 = swap ? getSwappedBytes(r1) : r1;
+ return result;
+inline StringRef getString16(const char s[16]) {
+ // The StringRef(const char *) constructor passes the const char * to
+ // strlen(), so we can't use this constructor here, because if there is no
+ // null terminator in s, then strlen() will read past the end of the array.
+ return StringRef(s, strnlen(s, 16));
+inline void setString16(StringRef str, char s[16]) {
+ memset(s, 0, 16);
+ memcpy(s, str.begin(), (str.size() > 16) ? 16: str.size());
+// Implemented in normalizedToAtoms() and used by normalizedFromAtoms() so
+// that the same table can be used to map mach-o sections to and from
+// DefinedAtom::ContentType.
+void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
+ StringRef &segmentName,
+ StringRef &sectionName,
+ SectionType &sectionType,
+ SectionAttr &sectionAttrs,
+ bool &relocsToDefinedCanBeImplicit);
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp
new file mode 100644
index 000000000000..7ef0237e8c36
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp
@@ -0,0 +1,1552 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file For mach-o object files, this implementation converts normalized
+/// mach-o in memory to mach-o binary on disk.
+/// +---------------+
+/// | binary mach-o |
+/// +---------------+
+/// ^
+/// |
+/// |
+/// +------------+
+/// | normalized |
+/// +------------+
+#include "MachONormalizedFile.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <functional>
+#include <list>
+#include <map>
+#include <system_error>
+using namespace llvm::MachO;
+namespace lld {
+namespace mach_o {
+namespace normalized {
+struct TrieNode; // Forward declaration.
+struct TrieEdge : public llvm::ilist_node<TrieEdge> {
+ TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {}
+ StringRef _subString;
+ struct TrieNode *_child;
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld
+namespace llvm {
+using lld::mach_o::normalized::TrieEdge;
+template <>
+struct ilist_alloc_traits<TrieEdge> : ilist_noalloc_traits<TrieEdge> {};
+} // namespace llvm
+namespace lld {
+namespace mach_o {
+namespace normalized {
+struct TrieNode {
+ typedef llvm::ilist<TrieEdge> TrieEdgeList;
+ TrieNode(StringRef s)
+ : _cummulativeString(s), _address(0), _flags(0), _other(0),
+ _trieOffset(0), _hasExportInfo(false) {}
+ ~TrieNode() = default;
+ void addSymbol(const Export &entry, BumpPtrAllocator &allocator,
+ std::vector<TrieNode *> &allNodes);
+ void addOrderedNodes(const Export &entry,
+ std::vector<TrieNode *> &allNodes);
+ bool updateOffset(uint32_t &offset);
+ void appendToByteBuffer(ByteBuffer &out);
+ StringRef _cummulativeString;
+ TrieEdgeList _children;
+ uint64_t _address;
+ uint64_t _flags;
+ uint64_t _other;
+ StringRef _importedName;
+ uint32_t _trieOffset;
+ bool _hasExportInfo;
+ bool _ordered = false;
+/// Utility class for writing a mach-o binary file given an in-memory
+/// normalized file.
+class MachOFileLayout {
+ /// All layout computation is done in the constructor.
+ MachOFileLayout(const NormalizedFile &file);
+ /// Returns the final file size as computed in the constructor.
+ size_t size() const;
+ // Returns size of the mach_header and load commands.
+ size_t headerAndLoadCommandsSize() const;
+ /// Writes the normalized file as a binary mach-o file to the specified
+ /// path. This does not have a stream interface because the generated
+ /// file may need the 'x' bit set.
+ llvm::Error writeBinary(StringRef path);
+ uint32_t loadCommandsSize(uint32_t &count);
+ void buildFileOffsets();
+ void writeMachHeader();
+ llvm::Error writeLoadCommands();
+ void writeSectionContent();
+ void writeRelocations();
+ void writeSymbolTable();
+ void writeRebaseInfo();
+ void writeBindingInfo();
+ void writeLazyBindingInfo();
+ void writeExportInfo();
+ void writeFunctionStartsInfo();
+ void writeDataInCodeInfo();
+ void writeLinkEditContent();
+ void buildLinkEditInfo();
+ void buildRebaseInfo();
+ void buildBindInfo();
+ void buildLazyBindInfo();
+ void buildExportTrie();
+ void computeFunctionStartsSize();
+ void computeDataInCodeSize();
+ void computeSymbolTableSizes();
+ void buildSectionRelocations();
+ void appendSymbols(const std::vector<Symbol> &symbols,
+ uint32_t &symOffset, uint32_t &strOffset);
+ uint32_t indirectSymbolIndex(const Section &sect, uint32_t &index);
+ uint32_t indirectSymbolElementSize(const Section &sect);
+ // For use as template parameter to load command methods.
+ struct MachO64Trait {
+ typedef llvm::MachO::segment_command_64 command;
+ typedef llvm::MachO::section_64 section;
+ enum { LC = llvm::MachO::LC_SEGMENT_64 };
+ };
+ // For use as template parameter to load command methods.
+ struct MachO32Trait {
+ typedef llvm::MachO::segment_command command;
+ typedef llvm::MachO::section section;
+ enum { LC = llvm::MachO::LC_SEGMENT };
+ };
+ template <typename T>
+ llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc);
+ template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc);
+ uint32_t pointerAlign(uint32_t value);
+ static StringRef dyldPath();
+ struct SegExtraInfo {
+ uint32_t fileOffset;
+ uint32_t fileSize;
+ std::vector<const Section*> sections;
+ };
+ typedef std::map<const Segment*, SegExtraInfo> SegMap;
+ struct SectionExtraInfo {
+ uint32_t fileOffset;
+ };
+ typedef std::map<const Section*, SectionExtraInfo> SectionMap;
+ const NormalizedFile &_file;
+ std::error_code _ec;
+ uint8_t *_buffer;
+ const bool _is64;
+ const bool _swap;
+ const bool _bigEndianArch;
+ uint64_t _seg1addr;
+ uint32_t _startOfLoadCommands;
+ uint32_t _countOfLoadCommands;
+ uint32_t _endOfLoadCommands;
+ uint32_t _startOfRelocations;
+ uint32_t _startOfFunctionStarts;
+ uint32_t _startOfDataInCode;
+ uint32_t _startOfSymbols;
+ uint32_t _startOfIndirectSymbols;
+ uint32_t _startOfSymbolStrings;
+ uint32_t _endOfSymbolStrings;
+ uint32_t _symbolTableLocalsStartIndex;
+ uint32_t _symbolTableGlobalsStartIndex;
+ uint32_t _symbolTableUndefinesStartIndex;
+ uint32_t _symbolStringPoolSize;
+ uint32_t _symbolTableSize;
+ uint32_t _functionStartsSize;
+ uint32_t _dataInCodeSize;
+ uint32_t _indirectSymbolTableCount;
+ // Used in object file creation only
+ uint32_t _startOfSectionsContent;
+ uint32_t _endOfSectionsContent;
+ // Used in final linked image only
+ uint32_t _startOfLinkEdit;
+ uint32_t _startOfRebaseInfo;
+ uint32_t _endOfRebaseInfo;
+ uint32_t _startOfBindingInfo;
+ uint32_t _endOfBindingInfo;
+ uint32_t _startOfLazyBindingInfo;
+ uint32_t _endOfLazyBindingInfo;
+ uint32_t _startOfExportTrie;
+ uint32_t _endOfExportTrie;
+ uint32_t _endOfLinkEdit;
+ uint64_t _addressOfLinkEdit;
+ SegMap _segInfo;
+ SectionMap _sectInfo;
+ ByteBuffer _rebaseInfo;
+ ByteBuffer _bindingInfo;
+ ByteBuffer _lazyBindingInfo;
+ ByteBuffer _weakBindingInfo;
+ ByteBuffer _exportTrie;
+size_t headerAndLoadCommandsSize(const NormalizedFile &file) {
+ MachOFileLayout layout(file);
+ return layout.headerAndLoadCommandsSize();
+StringRef MachOFileLayout::dyldPath() {
+ return "/usr/lib/dyld";
+uint32_t MachOFileLayout::pointerAlign(uint32_t value) {
+ return llvm::alignTo(value, _is64 ? 8 : 4);
+size_t MachOFileLayout::headerAndLoadCommandsSize() const {
+ return _endOfLoadCommands;
+MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
+ : _file(file),
+ _is64(MachOLinkingContext::is64Bit(file.arch)),
+ _swap(!MachOLinkingContext::isHostEndian(file.arch)),
+ _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)),
+ _seg1addr(INT64_MAX) {
+ _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header);
+ const size_t segCommandBaseSize =
+ (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
+ const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section));
+ if (file.fileType == llvm::MachO::MH_OBJECT) {
+ // object files have just one segment load command containing all sections
+ _endOfLoadCommands = _startOfLoadCommands
+ + segCommandBaseSize
+ + file.sections.size() * sectsSize
+ + sizeof(symtab_command);
+ _countOfLoadCommands = 2;
+ if (file.hasMinVersionLoadCommand) {
+ _endOfLoadCommands += sizeof(version_min_command);
+ _countOfLoadCommands++;
+ }
+ if (!_file.functionStarts.empty()) {
+ _endOfLoadCommands += sizeof(linkedit_data_command);
+ _countOfLoadCommands++;
+ }
+ if (_file.generateDataInCodeLoadCommand) {
+ _endOfLoadCommands += sizeof(linkedit_data_command);
+ _countOfLoadCommands++;
+ }
+ // Assign file offsets to each section.
+ _startOfSectionsContent = _endOfLoadCommands;
+ unsigned relocCount = 0;
+ uint64_t offset = _startOfSectionsContent;
+ for (const Section &sect : file.sections) {
+ if (isZeroFillSection(sect.type))
+ _sectInfo[&sect].fileOffset = 0;
+ else {
+ offset = llvm::alignTo(offset, sect.alignment);
+ _sectInfo[&sect].fileOffset = offset;
+ offset += sect.content.size();
+ }
+ relocCount += sect.relocations.size();
+ }
+ _endOfSectionsContent = offset;
+ computeSymbolTableSizes();
+ computeFunctionStartsSize();
+ computeDataInCodeSize();
+ // Align start of relocations.
+ _startOfRelocations = pointerAlign(_endOfSectionsContent);
+ _startOfFunctionStarts = _startOfRelocations + relocCount * 8;
+ _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
+ _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
+ // Add Indirect symbol table.
+ _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
+ // Align start of symbol table and symbol strings.
+ _startOfSymbolStrings = _startOfIndirectSymbols
+ + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
+ _endOfSymbolStrings = _startOfSymbolStrings
+ + pointerAlign(_symbolStringPoolSize);
+ _endOfLinkEdit = _endOfSymbolStrings;
+ DEBUG_WITH_TYPE("MachOFileLayout",
+ llvm::dbgs() << "MachOFileLayout()\n"
+ << " startOfLoadCommands=" << _startOfLoadCommands << "\n"
+ << " countOfLoadCommands=" << _countOfLoadCommands << "\n"
+ << " endOfLoadCommands=" << _endOfLoadCommands << "\n"
+ << " startOfRelocations=" << _startOfRelocations << "\n"
+ << " startOfSymbols=" << _startOfSymbols << "\n"
+ << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
+ << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
+ << " startOfSectionsContent=" << _startOfSectionsContent << "\n"
+ << " endOfSectionsContent=" << _endOfSectionsContent << "\n");
+ } else {
+ // Final linked images have one load command per segment.
+ _endOfLoadCommands = _startOfLoadCommands
+ + loadCommandsSize(_countOfLoadCommands);
+ // Assign section file offsets.
+ buildFileOffsets();
+ buildLinkEditInfo();
+ // LINKEDIT of final linked images has in order:
+ // rebase info, binding info, lazy binding info, weak binding info,
+ // data-in-code, symbol table, indirect symbol table, symbol table strings.
+ _startOfRebaseInfo = _startOfLinkEdit;
+ _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size();
+ _startOfBindingInfo = _endOfRebaseInfo;
+ _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size();
+ _startOfLazyBindingInfo = _endOfBindingInfo;
+ _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
+ _startOfExportTrie = _endOfLazyBindingInfo;
+ _endOfExportTrie = _startOfExportTrie + _exportTrie.size();
+ _startOfFunctionStarts = _endOfExportTrie;
+ _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
+ _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
+ _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
+ _startOfSymbolStrings = _startOfIndirectSymbols
+ + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
+ _endOfSymbolStrings = _startOfSymbolStrings
+ + pointerAlign(_symbolStringPoolSize);
+ _endOfLinkEdit = _endOfSymbolStrings;
+ DEBUG_WITH_TYPE("MachOFileLayout",
+ llvm::dbgs() << "MachOFileLayout()\n"
+ << " startOfLoadCommands=" << _startOfLoadCommands << "\n"
+ << " countOfLoadCommands=" << _countOfLoadCommands << "\n"
+ << " endOfLoadCommands=" << _endOfLoadCommands << "\n"
+ << " startOfLinkEdit=" << _startOfLinkEdit << "\n"
+ << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n"
+ << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n"
+ << " startOfBindingInfo=" << _startOfBindingInfo << "\n"
+ << " endOfBindingInfo=" << _endOfBindingInfo << "\n"
+ << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n"
+ << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
+ << " startOfExportTrie=" << _startOfExportTrie << "\n"
+ << " endOfExportTrie=" << _endOfExportTrie << "\n"
+ << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n"
+ << " startOfDataInCode=" << _startOfDataInCode << "\n"
+ << " startOfSymbols=" << _startOfSymbols << "\n"
+ << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
+ << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
+ << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n");
+ }
+uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) {
+ uint32_t size = 0;
+ count = 0;
+ const size_t segCommandSize =
+ (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
+ const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section));
+ // Add LC_SEGMENT for each segment.
+ size += _file.segments.size() * segCommandSize;
+ count += _file.segments.size();
+ // Add section record for each section.
+ size += _file.sections.size() * sectionSize;
+ // If creating a dylib, add LC_ID_DYLIB.
+ if (_file.fileType == llvm::MachO::MH_DYLIB) {
+ size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1);
+ ++count;
+ }
+ size += sizeof(dyld_info_command);
+ ++count;
+ // Add LC_SYMTAB
+ size += sizeof(symtab_command);
+ ++count;
+ if (_file.fileType != llvm::MachO::MH_PRELOAD) {
+ size += sizeof(dysymtab_command);
+ ++count;
+ }
+ // If main executable add LC_LOAD_DYLINKER
+ if (_file.fileType == llvm::MachO::MH_EXECUTE) {
+ size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1);
+ ++count;
+ }
+ if (_file.hasMinVersionLoadCommand) {
+ size += sizeof(version_min_command);
+ ++count;
+ }
+ size += sizeof(source_version_command);
+ ++count;
+ // If main executable add LC_MAIN
+ if (_file.fileType == llvm::MachO::MH_EXECUTE) {
+ size += sizeof(entry_point_command);
+ ++count;
+ }
+ // Add LC_LOAD_DYLIB for each dependent dylib.
+ for (const DependentDylib &dep : _file.dependentDylibs) {
+ size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
+ ++count;
+ }
+ // Add LC_RPATH
+ for (const StringRef &path : _file.rpaths) {
+ size += pointerAlign(sizeof(rpath_command) + path.size() + 1);
+ ++count;
+ }
+ // Add LC_FUNCTION_STARTS if needed
+ if (!_file.functionStarts.empty()) {
+ size += sizeof(linkedit_data_command);
+ ++count;
+ }
+ // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries.
+ // FIXME: Zero length entries is only to match ld64. Should we change this?
+ if (_file.generateDataInCodeLoadCommand) {
+ size += sizeof(linkedit_data_command);
+ ++count;
+ }
+ return size;
+static bool overlaps(const Segment &s1, const Segment &s2) {
+ if (s2.address >= s1.address+s1.size)
+ return false;
+ if (s1.address >= s2.address+s2.size)
+ return false;
+ return true;
+static bool overlaps(const Section &s1, const Section &s2) {
+ if (s2.address >= s1.address+s1.content.size())
+ return false;
+ if (s1.address >= s2.address+s2.content.size())
+ return false;
+ return true;
+void MachOFileLayout::buildFileOffsets() {
+ // Verify no segments overlap
+ for (const Segment &sg1 : _file.segments) {
+ for (const Segment &sg2 : _file.segments) {
+ if (&sg1 == &sg2)
+ continue;
+ if (overlaps(sg1,sg2)) {
+ _ec = make_error_code(llvm::errc::executable_format_error);
+ return;
+ }
+ }
+ }
+ // Verify no sections overlap
+ for (const Section &s1 : _file.sections) {
+ for (const Section &s2 : _file.sections) {
+ if (&s1 == &s2)
+ continue;
+ if (overlaps(s1,s2)) {
+ _ec = make_error_code(llvm::errc::executable_format_error);
+ return;
+ }
+ }
+ }
+ // Build side table of extra info about segments and sections.
+ SegExtraInfo t;
+ t.fileOffset = 0;
+ for (const Segment &sg : _file.segments) {
+ _segInfo[&sg] = t;
+ }
+ SectionExtraInfo t2;
+ t2.fileOffset = 0;
+ // Assign sections to segments.
+ for (const Section &s : _file.sections) {
+ _sectInfo[&s] = t2;
+ bool foundSegment = false;
+ for (const Segment &sg : _file.segments) {
+ if (sg.name.equals(s.segmentName)) {
+ if ((s.address >= sg.address)
+ && (s.address+s.content.size() <= sg.address+sg.size)) {
+ _segInfo[&sg].sections.push_back(&s);
+ foundSegment = true;
+ break;
+ }
+ }
+ }
+ if (!foundSegment) {
+ _ec = make_error_code(llvm::errc::executable_format_error);
+ return;
+ }
+ }
+ // Assign file offsets.
+ uint32_t fileOffset = 0;
+ DEBUG_WITH_TYPE("MachOFileLayout",
+ llvm::dbgs() << "buildFileOffsets()\n");
+ for (const Segment &sg : _file.segments) {
+ _segInfo[&sg].fileOffset = fileOffset;
+ if ((_seg1addr == INT64_MAX) && sg.init_access)
+ _seg1addr = sg.address;
+ DEBUG_WITH_TYPE("MachOFileLayout",
+ llvm::dbgs() << " segment=" << sg.name
+ << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n");
+ uint32_t segFileSize = 0;
+ // A segment that is not zero-fill must use a least one page of disk space.
+ if (sg.init_access)
+ segFileSize = _file.pageSize;
+ for (const Section *s : _segInfo[&sg].sections) {
+ uint32_t sectOffset = s->address - sg.address;
+ uint32_t sectFileSize =
+ isZeroFillSection(s->type) ? 0 : s->content.size();
+ segFileSize = std::max(segFileSize, sectOffset + sectFileSize);
+ _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset;
+ DEBUG_WITH_TYPE("MachOFileLayout",
+ llvm::dbgs() << " section=" << s->sectionName
+ << ", fileOffset=" << fileOffset << "\n");
+ }
+ // round up all segments to page aligned, except __LINKEDIT
+ if (!sg.name.equals("__LINKEDIT")) {
+ _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize);
+ fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize);
+ }
+ _addressOfLinkEdit = sg.address + sg.size;
+ }
+ _startOfLinkEdit = fileOffset;
+size_t MachOFileLayout::size() const {
+ return _endOfSymbolStrings;
+void MachOFileLayout::writeMachHeader() {
+ auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch);
+ // dynamic x86 executables on newer OS version should also set the
+ // CPU_SUBTYPE_LIB64 mask in the CPU subtype.
+ // FIXME: Check that this is a dynamic executable, not a static one.
+ if (_file.fileType == llvm::MachO::MH_EXECUTE &&
+ cpusubtype == CPU_SUBTYPE_X86_64_ALL &&
+ _file.os == MachOLinkingContext::OS::macOSX) {
+ uint32_t version;
+ bool failed = MachOLinkingContext::parsePackedVersion("10.5", version);
+ if (!failed && _file.minOSverson >= version)
+ cpusubtype |= CPU_SUBTYPE_LIB64;
+ }
+ mach_header *mh = reinterpret_cast<mach_header*>(_buffer);
+ mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC;
+ mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch);
+ mh->cpusubtype = cpusubtype;
+ mh->filetype = _file.fileType;
+ mh->ncmds = _countOfLoadCommands;
+ mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands;
+ mh->flags = _file.flags;
+ if (_swap)
+ swapStruct(*mh);
+uint32_t MachOFileLayout::indirectSymbolIndex(const Section &sect,
+ uint32_t &index) {
+ if (sect.indirectSymbols.empty())
+ return 0;
+ uint32_t result = index;
+ index += sect.indirectSymbols.size();
+ return result;
+uint32_t MachOFileLayout::indirectSymbolElementSize(const Section &sect) {
+ if (sect.indirectSymbols.empty())
+ return 0;
+ if (sect.type != S_SYMBOL_STUBS)
+ return 0;
+ return sect.content.size() / sect.indirectSymbols.size();
+template <typename T>
+llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) {
+ typename T::command* seg = reinterpret_cast<typename T::command*>(lc);
+ seg->cmd = T::LC;
+ seg->cmdsize = sizeof(typename T::command)
+ + _file.sections.size() * sizeof(typename T::section);
+ uint8_t *next = lc + seg->cmdsize;
+ memset(seg->segname, 0, 16);
+ seg->vmaddr = 0;
+ seg->vmsize = _file.sections.back().address
+ + _file.sections.back().content.size();
+ seg->fileoff = _endOfLoadCommands;
+ seg->filesize = _sectInfo[&_file.sections.back()].fileOffset +
+ _file.sections.back().content.size() -
+ _sectInfo[&_file.sections.front()].fileOffset;
+ seg->nsects = _file.sections.size();
+ seg->flags = 0;
+ if (_swap)
+ swapStruct(*seg);
+ typename T::section *sout = reinterpret_cast<typename T::section*>
+ (lc+sizeof(typename T::command));
+ uint32_t relOffset = _startOfRelocations;
+ uint32_t indirectSymRunningIndex = 0;
+ for (const Section &sin : _file.sections) {
+ setString16(sin.sectionName, sout->sectname);
+ setString16(sin.segmentName, sout->segname);
+ sout->addr = sin.address;
+ sout->size = sin.content.size();
+ sout->offset = _sectInfo[&sin].fileOffset;
+ sout->align = llvm::Log2_32(sin.alignment);
+ sout->reloff = sin.relocations.empty() ? 0 : relOffset;
+ sout->nreloc = sin.relocations.size();
+ sout->flags = sin.type | sin.attributes;
+ sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex);
+ sout->reserved2 = indirectSymbolElementSize(sin);
+ relOffset += sin.relocations.size() * sizeof(any_relocation_info);
+ if (_swap)
+ swapStruct(*sout);
+ ++sout;
+ }
+ lc = next;
+ return llvm::Error::success();
+template <typename T>
+llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) {
+ uint32_t indirectSymRunningIndex = 0;
+ for (const Segment &seg : _file.segments) {
+ // Link edit has no sections and a custom range of address, so handle it
+ // specially.
+ SegExtraInfo &segInfo = _segInfo[&seg];
+ if (seg.name.equals("__LINKEDIT")) {
+ size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit;
+ typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
+ cmd->cmd = T::LC;
+ cmd->cmdsize = sizeof(typename T::command);
+ uint8_t *next = lc + cmd->cmdsize;
+ setString16("__LINKEDIT", cmd->segname);
+ cmd->vmaddr = _addressOfLinkEdit;
+ cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize);
+ cmd->fileoff = _startOfLinkEdit;
+ cmd->filesize = linkeditSize;
+ cmd->initprot = seg.init_access;
+ cmd->maxprot = seg.max_access;
+ cmd->nsects = 0;
+ cmd->flags = 0;
+ if (_swap)
+ swapStruct(*cmd);
+ lc = next;
+ continue;
+ }
+ // Write segment command with trailing sections.
+ typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
+ cmd->cmd = T::LC;
+ cmd->cmdsize = sizeof(typename T::command)
+ + segInfo.sections.size() * sizeof(typename T::section);
+ uint8_t *next = lc + cmd->cmdsize;
+ setString16(seg.name, cmd->segname);
+ cmd->vmaddr = seg.address;
+ cmd->vmsize = seg.size;
+ cmd->fileoff = segInfo.fileOffset;
+ cmd->filesize = segInfo.fileSize;
+ cmd->initprot = seg.init_access;
+ cmd->maxprot = seg.max_access;
+ cmd->nsects = segInfo.sections.size();
+ cmd->flags = 0;
+ if (_swap)
+ swapStruct(*cmd);
+ typename T::section *sect = reinterpret_cast<typename T::section*>
+ (lc+sizeof(typename T::command));
+ for (const Section *section : segInfo.sections) {
+ setString16(section->sectionName, sect->sectname);
+ setString16(section->segmentName, sect->segname);
+ sect->addr = section->address;
+ sect->size = section->content.size();
+ if (isZeroFillSection(section->type))
+ sect->offset = 0;
+ else
+ sect->offset = section->address - seg.address + segInfo.fileOffset;
+ sect->align = llvm::Log2_32(section->alignment);
+ sect->reloff = 0;
+ sect->nreloc = 0;
+ sect->flags = section->type | section->attributes;
+ sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex);
+ sect->reserved2 = indirectSymbolElementSize(*section);
+ if (_swap)
+ swapStruct(*sect);
+ ++sect;
+ }
+ lc = reinterpret_cast<uint8_t*>(next);
+ }
+ return llvm::Error::success();
+static void writeVersionMinLoadCommand(const NormalizedFile &_file,
+ bool _swap,
+ uint8_t *&lc) {
+ if (!_file.hasMinVersionLoadCommand)
+ return;
+ version_min_command *vm = reinterpret_cast<version_min_command*>(lc);
+ switch (_file.os) {
+ case MachOLinkingContext::OS::unknown:
+ vm->cmd = _file.minOSVersionKind;
+ vm->cmdsize = sizeof(version_min_command);
+ vm->version = _file.minOSverson;
+ vm->sdk = 0;
+ break;
+ case MachOLinkingContext::OS::macOSX:
+ vm->cmdsize = sizeof(version_min_command);
+ vm->version = _file.minOSverson;
+ vm->sdk = _file.sdkVersion;
+ break;
+ case MachOLinkingContext::OS::iOS:
+ case MachOLinkingContext::OS::iOS_simulator:
+ vm->cmdsize = sizeof(version_min_command);
+ vm->version = _file.minOSverson;
+ vm->sdk = _file.sdkVersion;
+ break;
+ }
+ if (_swap)
+ swapStruct(*vm);
+ lc += sizeof(version_min_command);
+llvm::Error MachOFileLayout::writeLoadCommands() {
+ uint8_t *lc = &_buffer[_startOfLoadCommands];
+ if (_file.fileType == llvm::MachO::MH_OBJECT) {
+ // Object files have one unnamed segment which holds all sections.
+ if (_is64) {
+ if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc))
+ return ec;
+ } else {
+ if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc))
+ return ec;
+ }
+ // Add LC_SYMTAB with symbol table info
+ symtab_command* st = reinterpret_cast<symtab_command*>(lc);
+ st->cmd = LC_SYMTAB;
+ st->cmdsize = sizeof(symtab_command);
+ st->symoff = _startOfSymbols;
+ st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() +
+ _file.globalSymbols.size() + _file.undefinedSymbols.size();
+ st->stroff = _startOfSymbolStrings;
+ st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
+ if (_swap)
+ swapStruct(*st);
+ lc += sizeof(symtab_command);
+ writeVersionMinLoadCommand(_file, _swap, lc);
+ // Add LC_FUNCTION_STARTS if needed.
+ if (_functionStartsSize != 0) {
+ linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
+ dl->cmdsize = sizeof(linkedit_data_command);
+ dl->dataoff = _startOfFunctionStarts;
+ dl->datasize = _functionStartsSize;
+ if (_swap)
+ swapStruct(*dl);
+ lc += sizeof(linkedit_data_command);
+ }
+ // Add LC_DATA_IN_CODE if requested.
+ if (_file.generateDataInCodeLoadCommand) {
+ linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
+ dl->cmd = LC_DATA_IN_CODE;
+ dl->cmdsize = sizeof(linkedit_data_command);
+ dl->dataoff = _startOfDataInCode;
+ dl->datasize = _dataInCodeSize;
+ if (_swap)
+ swapStruct(*dl);
+ lc += sizeof(linkedit_data_command);
+ }
+ } else {
+ // Final linked images have sections under segments.
+ if (_is64) {
+ if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc))
+ return ec;
+ } else {
+ if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc))
+ return ec;
+ }
+ // Add LC_ID_DYLIB command for dynamic libraries.
+ if (_file.fileType == llvm::MachO::MH_DYLIB) {
+ dylib_command *dc = reinterpret_cast<dylib_command*>(lc);
+ StringRef path = _file.installName;
+ uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1);
+ dc->cmd = LC_ID_DYLIB;
+ dc->cmdsize = size;
+ dc->dylib.name = sizeof(dylib_command); // offset
+ // needs to be some constant value different than the one in LC_LOAD_DYLIB
+ dc->dylib.timestamp = 1;
+ dc->dylib.current_version = _file.currentVersion;
+ dc->dylib.compatibility_version = _file.compatVersion;
+ if (_swap)
+ swapStruct(*dc);
+ memcpy(lc + sizeof(dylib_command), path.begin(), path.size());
+ lc[sizeof(dylib_command) + path.size()] = '\0';
+ lc += size;
+ }
+ dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc);
+ di->cmd = LC_DYLD_INFO_ONLY;
+ di->cmdsize = sizeof(dyld_info_command);
+ di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0;
+ di->rebase_size = _rebaseInfo.size();
+ di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0;
+ di->bind_size = _bindingInfo.size();
+ di->weak_bind_off = 0;
+ di->weak_bind_size = 0;
+ di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0;
+ di->lazy_bind_size = _lazyBindingInfo.size();
+ di->export_off = _exportTrie.size() ? _startOfExportTrie : 0;
+ di->export_size = _exportTrie.size();
+ if (_swap)
+ swapStruct(*di);
+ lc += sizeof(dyld_info_command);
+ // Add LC_SYMTAB with symbol table info.
+ symtab_command* st = reinterpret_cast<symtab_command*>(lc);
+ st->cmd = LC_SYMTAB;
+ st->cmdsize = sizeof(symtab_command);
+ st->symoff = _startOfSymbols;
+ st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() +
+ _file.globalSymbols.size() + _file.undefinedSymbols.size();
+ st->stroff = _startOfSymbolStrings;
+ st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
+ if (_swap)
+ swapStruct(*st);
+ lc += sizeof(symtab_command);
+ if (_file.fileType != llvm::MachO::MH_PRELOAD) {
+ dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc);
+ dst->cmd = LC_DYSYMTAB;
+ dst->cmdsize = sizeof(dysymtab_command);
+ dst->ilocalsym = _symbolTableLocalsStartIndex;
+ dst->nlocalsym = _file.stabsSymbols.size() +
+ _file.localSymbols.size();
+ dst->iextdefsym = _symbolTableGlobalsStartIndex;
+ dst->nextdefsym = _file.globalSymbols.size();
+ dst->iundefsym = _symbolTableUndefinesStartIndex;
+ dst->nundefsym = _file.undefinedSymbols.size();
+ dst->tocoff = 0;
+ dst->ntoc = 0;
+ dst->modtaboff = 0;
+ dst->nmodtab = 0;
+ dst->extrefsymoff = 0;
+ dst->nextrefsyms = 0;
+ dst->indirectsymoff = _startOfIndirectSymbols;
+ dst->nindirectsyms = _indirectSymbolTableCount;
+ dst->extreloff = 0;
+ dst->nextrel = 0;
+ dst->locreloff = 0;
+ dst->nlocrel = 0;
+ if (_swap)
+ swapStruct(*dst);
+ lc += sizeof(dysymtab_command);
+ }
+ // If main executable, add LC_LOAD_DYLINKER
+ if (_file.fileType == llvm::MachO::MH_EXECUTE) {
+ // Build LC_LOAD_DYLINKER load command.
+ uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1);
+ dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc);
+ dl->cmd = LC_LOAD_DYLINKER;
+ dl->cmdsize = size;
+ dl->name = sizeof(dylinker_command); // offset
+ if (_swap)
+ swapStruct(*dl);
+ memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size());
+ lc[sizeof(dylinker_command)+dyldPath().size()] = '\0';
+ lc += size;
+ }
+ writeVersionMinLoadCommand(_file, _swap, lc);
+ {
+ // Note, using a temporary here to appease UB as we may not be aligned
+ // enough for a struct containing a uint64_t when emitting a 32-bit binary
+ source_version_command sv;
+ sv.cmdsize = sizeof(source_version_command);
+ sv.version = _file.sourceVersion;
+ if (_swap)
+ swapStruct(sv);
+ memcpy(lc, &sv, sizeof(source_version_command));
+ lc += sizeof(source_version_command);
+ }
+ // If main executable, add LC_MAIN.
+ if (_file.fileType == llvm::MachO::MH_EXECUTE) {
+ // Build LC_MAIN load command.
+ // Note, using a temporary here to appease UB as we may not be aligned
+ // enough for a struct containing a uint64_t when emitting a 32-bit binary
+ entry_point_command ep;
+ ep.cmd = LC_MAIN;
+ ep.cmdsize = sizeof(entry_point_command);
+ ep.entryoff = _file.entryAddress - _seg1addr;
+ ep.stacksize = _file.stackSize;
+ if (_swap)
+ swapStruct(ep);
+ memcpy(lc, &ep, sizeof(entry_point_command));
+ lc += sizeof(entry_point_command);
+ }
+ // Add LC_LOAD_DYLIB commands
+ for (const DependentDylib &dep : _file.dependentDylibs) {
+ dylib_command* dc = reinterpret_cast<dylib_command*>(lc);
+ uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
+ dc->cmd = dep.kind;
+ dc->cmdsize = size;
+ dc->dylib.name = sizeof(dylib_command); // offset
+ // needs to be some constant value different than the one in LC_ID_DYLIB
+ dc->dylib.timestamp = 2;
+ dc->dylib.current_version = dep.currentVersion;
+ dc->dylib.compatibility_version = dep.compatVersion;
+ if (_swap)
+ swapStruct(*dc);
+ memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size());
+ lc[sizeof(dylib_command)+dep.path.size()] = '\0';
+ lc += size;
+ }
+ // Add LC_RPATH
+ for (const StringRef &path : _file.rpaths) {
+ rpath_command *rpc = reinterpret_cast<rpath_command *>(lc);
+ uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1);
+ rpc->cmd = LC_RPATH;
+ rpc->cmdsize = size;
+ rpc->path = sizeof(rpath_command); // offset
+ if (_swap)
+ swapStruct(*rpc);
+ memcpy(lc+sizeof(rpath_command), path.begin(), path.size());
+ lc[sizeof(rpath_command)+path.size()] = '\0';
+ lc += size;
+ }
+ // Add LC_FUNCTION_STARTS if needed.
+ if (_functionStartsSize != 0) {
+ linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
+ dl->cmdsize = sizeof(linkedit_data_command);
+ dl->dataoff = _startOfFunctionStarts;
+ dl->datasize = _functionStartsSize;
+ if (_swap)
+ swapStruct(*dl);
+ lc += sizeof(linkedit_data_command);
+ }
+ // Add LC_DATA_IN_CODE if requested.
+ if (_file.generateDataInCodeLoadCommand) {
+ linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
+ dl->cmd = LC_DATA_IN_CODE;
+ dl->cmdsize = sizeof(linkedit_data_command);
+ dl->dataoff = _startOfDataInCode;
+ dl->datasize = _dataInCodeSize;
+ if (_swap)
+ swapStruct(*dl);
+ lc += sizeof(linkedit_data_command);
+ }
+ }
+ return llvm::Error::success();
+void MachOFileLayout::writeSectionContent() {
+ for (const Section &s : _file.sections) {
+ // Copy all section content to output buffer.
+ if (isZeroFillSection(s.type))
+ continue;
+ if (s.content.empty())
+ continue;
+ uint32_t offset = _sectInfo[&s].fileOffset;
+ uint8_t *p = &_buffer[offset];
+ memcpy(p, &s.content[0], s.content.size());
+ p += s.content.size();
+ }
+void MachOFileLayout::writeRelocations() {
+ uint32_t relOffset = _startOfRelocations;
+ for (Section sect : _file.sections) {
+ for (Relocation r : sect.relocations) {
+ any_relocation_info* rb = reinterpret_cast<any_relocation_info*>(
+ &_buffer[relOffset]);
+ *rb = packRelocation(r, _swap, _bigEndianArch);
+ relOffset += sizeof(any_relocation_info);
+ }
+ }
+void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols,
+ uint32_t &symOffset, uint32_t &strOffset) {
+ for (const Symbol &sym : symbols) {
+ if (_is64) {
+ nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]);
+ nb->n_strx = strOffset - _startOfSymbolStrings;
+ nb->n_type = sym.type | sym.scope;
+ nb->n_sect = sym.sect;
+ nb->n_desc = sym.desc;
+ nb->n_value = sym.value;
+ if (_swap)
+ swapStruct(*nb);
+ symOffset += sizeof(nlist_64);
+ } else {
+ nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]);
+ nb->n_strx = strOffset - _startOfSymbolStrings;
+ nb->n_type = sym.type | sym.scope;
+ nb->n_sect = sym.sect;
+ nb->n_desc = sym.desc;
+ nb->n_value = sym.value;
+ if (_swap)
+ swapStruct(*nb);
+ symOffset += sizeof(nlist);
+ }
+ memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size());
+ strOffset += sym.name.size();
+ _buffer[strOffset++] ='\0'; // Strings in table have nul terminator.
+ }
+void MachOFileLayout::writeFunctionStartsInfo() {
+ if (!_functionStartsSize)
+ return;
+ memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(),
+ _functionStartsSize);
+void MachOFileLayout::writeDataInCodeInfo() {
+ uint32_t offset = _startOfDataInCode;
+ for (const DataInCode &entry : _file.dataInCode) {
+ data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>(
+ &_buffer[offset]);
+ dst->offset = entry.offset;
+ dst->length = entry.length;
+ dst->kind = entry.kind;
+ if (_swap)
+ swapStruct(*dst);
+ offset += sizeof(data_in_code_entry);
+ }
+void MachOFileLayout::writeSymbolTable() {
+ // Write symbol table and symbol strings in parallel.
+ uint32_t symOffset = _startOfSymbols;
+ uint32_t strOffset = _startOfSymbolStrings;
+ // Reserve n_strx offset of zero to mean no name.
+ _buffer[strOffset++] = ' ';
+ _buffer[strOffset++] = '\0';
+ appendSymbols(_file.stabsSymbols, symOffset, strOffset);
+ appendSymbols(_file.localSymbols, symOffset, strOffset);
+ appendSymbols(_file.globalSymbols, symOffset, strOffset);
+ appendSymbols(_file.undefinedSymbols, symOffset, strOffset);
+ // Write indirect symbol table array.
+ uint32_t *indirects = reinterpret_cast<uint32_t*>
+ (&_buffer[_startOfIndirectSymbols]);
+ if (_file.fileType == llvm::MachO::MH_OBJECT) {
+ // Object files have sections in same order as input normalized file.
+ for (const Section &section : _file.sections) {
+ for (uint32_t index : section.indirectSymbols) {
+ if (_swap)
+ *indirects++ = llvm::sys::getSwappedBytes(index);
+ else
+ *indirects++ = index;
+ }
+ }
+ } else {
+ // Final linked images must sort sections from normalized file.
+ for (const Segment &seg : _file.segments) {
+ SegExtraInfo &segInfo = _segInfo[&seg];
+ for (const Section *section : segInfo.sections) {
+ for (uint32_t index : section->indirectSymbols) {
+ if (_swap)
+ *indirects++ = llvm::sys::getSwappedBytes(index);
+ else
+ *indirects++ = index;
+ }
+ }
+ }
+ }
+void MachOFileLayout::writeRebaseInfo() {
+ memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size());
+void MachOFileLayout::writeBindingInfo() {
+ memcpy(&_buffer[_startOfBindingInfo],
+ _bindingInfo.bytes(), _bindingInfo.size());
+void MachOFileLayout::writeLazyBindingInfo() {
+ memcpy(&_buffer[_startOfLazyBindingInfo],
+ _lazyBindingInfo.bytes(), _lazyBindingInfo.size());
+void MachOFileLayout::writeExportInfo() {
+ memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size());
+void MachOFileLayout::buildLinkEditInfo() {
+ buildRebaseInfo();
+ buildBindInfo();
+ buildLazyBindInfo();
+ buildExportTrie();
+ computeSymbolTableSizes();
+ computeFunctionStartsSize();
+ computeDataInCodeSize();
+void MachOFileLayout::buildSectionRelocations() {
+void MachOFileLayout::buildRebaseInfo() {
+ // TODO: compress rebasing info.
+ for (const RebaseLocation& entry : _file.rebasingInfo) {
+ _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind);
+ | entry.segIndex);
+ _rebaseInfo.append_uleb128(entry.segOffset);
+ _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1);
+ }
+ _rebaseInfo.append_byte(REBASE_OPCODE_DONE);
+ _rebaseInfo.align(_is64 ? 8 : 4);
+void MachOFileLayout::buildBindInfo() {
+ // TODO: compress bind info.
+ uint64_t lastAddend = 0;
+ int lastOrdinal = 0x80000000;
+ StringRef lastSymbolName;
+ BindType lastType = (BindType)0;
+ Hex32 lastSegOffset = ~0U;
+ uint8_t lastSegIndex = (uint8_t)~0U;
+ for (const BindLocation& entry : _file.bindingInfo) {
+ if (entry.ordinal != lastOrdinal) {
+ if (entry.ordinal <= 0)
+ _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
+ (entry.ordinal & BIND_IMMEDIATE_MASK));
+ else if (entry.ordinal <= BIND_IMMEDIATE_MASK)
+ _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+ entry.ordinal);
+ else {
+ _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+ _bindingInfo.append_uleb128(entry.ordinal);
+ }
+ lastOrdinal = entry.ordinal;
+ }
+ if (lastSymbolName != entry.symbolName) {
+ _bindingInfo.append_string(entry.symbolName);
+ lastSymbolName = entry.symbolName;
+ }
+ if (lastType != entry.kind) {
+ _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind);
+ lastType = entry.kind;
+ }
+ if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) {
+ | entry.segIndex);
+ _bindingInfo.append_uleb128(entry.segOffset);
+ lastSegIndex = entry.segIndex;
+ lastSegOffset = entry.segOffset;
+ }
+ if (entry.addend != lastAddend) {
+ _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB);
+ _bindingInfo.append_sleb128(entry.addend);
+ lastAddend = entry.addend;
+ }
+ _bindingInfo.append_byte(BIND_OPCODE_DO_BIND);
+ }
+ _bindingInfo.append_byte(BIND_OPCODE_DONE);
+ _bindingInfo.align(_is64 ? 8 : 4);
+void MachOFileLayout::buildLazyBindInfo() {
+ for (const BindLocation& entry : _file.lazyBindingInfo) {
+ _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+ | entry.segIndex);
+ _lazyBindingInfo.append_uleb128(entry.segOffset);
+ if (entry.ordinal <= 0)
+ _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
+ (entry.ordinal & BIND_IMMEDIATE_MASK));
+ else if (entry.ordinal <= BIND_IMMEDIATE_MASK)
+ _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+ entry.ordinal);
+ else {
+ _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+ _lazyBindingInfo.append_uleb128(entry.ordinal);
+ }
+ // FIXME: We need to | the opcode here with flags.
+ _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
+ _lazyBindingInfo.append_string(entry.symbolName);
+ _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND);
+ _lazyBindingInfo.append_byte(BIND_OPCODE_DONE);
+ }
+ _lazyBindingInfo.align(_is64 ? 8 : 4);
+void TrieNode::addSymbol(const Export& entry,
+ BumpPtrAllocator &allocator,
+ std::vector<TrieNode*> &allNodes) {
+ StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
+ for (TrieEdge &edge : _children) {
+ StringRef edgeStr = edge._subString;
+ if (partialStr.startswith(edgeStr)) {
+ // Already have matching edge, go down that path.
+ edge._child->addSymbol(entry, allocator, allNodes);
+ return;
+ }
+ // See if string has commmon prefix with existing edge.
+ for (int n=edgeStr.size()-1; n > 0; --n) {
+ if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) {
+ // Splice in new node: was A -> C, now A -> B -> C
+ StringRef bNodeStr = edge._child->_cummulativeString;
+ bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator);
+ auto *bNode = new (allocator) TrieNode(bNodeStr);
+ allNodes.push_back(bNode);
+ TrieNode* cNode = edge._child;
+ StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator);
+ StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator);
+ DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
+ << "splice in TrieNode('" << bNodeStr
+ << "') between edge '"
+ << abEdgeStr << "' and edge='"
+ << bcEdgeStr<< "'\n");
+ TrieEdge& abEdge = edge;
+ abEdge._subString = abEdgeStr;
+ abEdge._child = bNode;
+ auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode);
+ bNode->_children.insert(bNode->_children.end(), bcEdge);
+ bNode->addSymbol(entry, allocator, allNodes);
+ return;
+ }
+ }
+ }
+ if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
+ assert(entry.otherOffset != 0);
+ }
+ assert(entry.otherOffset != 0);
+ }
+ // No commonality with any existing child, make a new edge.
+ auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator));
+ auto *newEdge = new (allocator) TrieEdge(partialStr, newNode);
+ _children.insert(_children.end(), newEdge);
+ DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
+ << "new TrieNode('" << entry.name << "') with edge '"
+ << partialStr << "' from node='"
+ << _cummulativeString << "'\n");
+ newNode->_address = entry.offset;
+ newNode->_flags = entry.flags | entry.kind;
+ newNode->_other = entry.otherOffset;
+ if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty())
+ newNode->_importedName = entry.otherName.copy(allocator);
+ newNode->_hasExportInfo = true;
+ allNodes.push_back(newNode);
+void TrieNode::addOrderedNodes(const Export& entry,
+ std::vector<TrieNode*> &orderedNodes) {
+ if (!_ordered) {
+ orderedNodes.push_back(this);
+ _ordered = true;
+ }
+ StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
+ for (TrieEdge &edge : _children) {
+ StringRef edgeStr = edge._subString;
+ if (partialStr.startswith(edgeStr)) {
+ // Already have matching edge, go down that path.
+ edge._child->addOrderedNodes(entry, orderedNodes);
+ return;
+ }
+ }
+bool TrieNode::updateOffset(uint32_t& offset) {
+ uint32_t nodeSize = 1; // Length when no export info
+ if (_hasExportInfo) {
+ nodeSize = llvm::getULEB128Size(_flags);
+ nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal.
+ nodeSize += _importedName.size();
+ ++nodeSize; // Trailing zero in imported name.
+ } else {
+ nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address);
+ nodeSize += llvm::getULEB128Size(_other);
+ }
+ // Overall node size so far is uleb128 of export info + actual export info.
+ nodeSize += llvm::getULEB128Size(nodeSize);
+ }
+ // Compute size of all child edges.
+ ++nodeSize; // Byte for number of chidren.
+ for (TrieEdge &edge : _children) {
+ nodeSize += edge._subString.size() + 1 // String length.
+ + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len.
+ }
+ // On input, 'offset' is new prefered location for this node.
+ bool result = (_trieOffset != offset);
+ // Store new location in node object for use by parents.
+ _trieOffset = offset;
+ // Update offset for next iteration.
+ offset += nodeSize;
+ // Return true if _trieOffset was changed.
+ return result;
+void TrieNode::appendToByteBuffer(ByteBuffer &out) {
+ if (_hasExportInfo) {
+ if (!_importedName.empty()) {
+ // nodes with re-export info: size, flags, ordinal, import-name
+ uint32_t nodeSize = llvm::getULEB128Size(_flags)
+ + llvm::getULEB128Size(_other)
+ + _importedName.size() + 1;
+ assert(nodeSize < 256);
+ out.append_byte(nodeSize);
+ out.append_uleb128(_flags);
+ out.append_uleb128(_other);
+ out.append_string(_importedName);
+ } else {
+ // nodes without re-export info: size, flags, ordinal, empty-string
+ uint32_t nodeSize = llvm::getULEB128Size(_flags)
+ + llvm::getULEB128Size(_other) + 1;
+ assert(nodeSize < 256);
+ out.append_byte(nodeSize);
+ out.append_uleb128(_flags);
+ out.append_uleb128(_other);
+ out.append_byte(0);
+ }
+ } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
+ // Nodes with export info: size, flags, address, other
+ uint32_t nodeSize = llvm::getULEB128Size(_flags)
+ + llvm::getULEB128Size(_address)
+ + llvm::getULEB128Size(_other);
+ assert(nodeSize < 256);
+ out.append_byte(nodeSize);
+ out.append_uleb128(_flags);
+ out.append_uleb128(_address);
+ out.append_uleb128(_other);
+ } else {
+ // Nodes with export info: size, flags, address
+ uint32_t nodeSize = llvm::getULEB128Size(_flags)
+ + llvm::getULEB128Size(_address);
+ assert(nodeSize < 256);
+ out.append_byte(nodeSize);
+ out.append_uleb128(_flags);
+ out.append_uleb128(_address);
+ }
+ } else {
+ // Node with no export info.
+ uint32_t nodeSize = 0;
+ out.append_byte(nodeSize);
+ }
+ // Add number of children.
+ assert(_children.size() < 256);
+ out.append_byte(_children.size());
+ // Append each child edge substring and node offset.
+ for (TrieEdge &edge : _children) {
+ out.append_string(edge._subString);
+ out.append_uleb128(edge._child->_trieOffset);
+ }
+void MachOFileLayout::buildExportTrie() {
+ if (_file.exportInfo.empty())
+ return;
+ // For all temporary strings and objects used building trie.
+ BumpPtrAllocator allocator;
+ // Build trie of all exported symbols.
+ auto *rootNode = new (allocator) TrieNode(StringRef());
+ std::vector<TrieNode*> allNodes;
+ allNodes.reserve(_file.exportInfo.size()*2);
+ allNodes.push_back(rootNode);
+ for (const Export& entry : _file.exportInfo) {
+ rootNode->addSymbol(entry, allocator, allNodes);
+ }
+ std::vector<TrieNode*> orderedNodes;
+ orderedNodes.reserve(allNodes.size());
+ for (const Export& entry : _file.exportInfo)
+ rootNode->addOrderedNodes(entry, orderedNodes);
+ // Assign each node in the vector an offset in the trie stream, iterating
+ // until all uleb128 sizes have stabilized.
+ bool more;
+ do {
+ uint32_t offset = 0;
+ more = false;
+ for (TrieNode* node : orderedNodes) {
+ if (node->updateOffset(offset))
+ more = true;
+ }
+ } while (more);
+ // Serialize trie to ByteBuffer.
+ for (TrieNode* node : orderedNodes) {
+ node->appendToByteBuffer(_exportTrie);
+ }
+ _exportTrie.align(_is64 ? 8 : 4);
+void MachOFileLayout::computeSymbolTableSizes() {
+ // MachO symbol tables have three ranges: locals, globals, and undefines
+ const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist));
+ _symbolTableSize = nlistSize * (_file.stabsSymbols.size()
+ + _file.localSymbols.size()
+ + _file.globalSymbols.size()
+ + _file.undefinedSymbols.size());
+ // Always reserve 1-byte for the empty string and 1-byte for its terminator.
+ _symbolStringPoolSize = 2;
+ for (const Symbol &sym : _file.stabsSymbols) {
+ _symbolStringPoolSize += (sym.name.size()+1);
+ }
+ for (const Symbol &sym : _file.localSymbols) {
+ _symbolStringPoolSize += (sym.name.size()+1);
+ }
+ for (const Symbol &sym : _file.globalSymbols) {
+ _symbolStringPoolSize += (sym.name.size()+1);
+ }
+ for (const Symbol &sym : _file.undefinedSymbols) {
+ _symbolStringPoolSize += (sym.name.size()+1);
+ }
+ _symbolTableLocalsStartIndex = 0;
+ _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() +
+ _file.localSymbols.size();
+ _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex
+ + _file.globalSymbols.size();
+ _indirectSymbolTableCount = 0;
+ for (const Section &sect : _file.sections) {
+ _indirectSymbolTableCount += sect.indirectSymbols.size();
+ }
+void MachOFileLayout::computeFunctionStartsSize() {
+ _functionStartsSize = _file.functionStarts.size();
+void MachOFileLayout::computeDataInCodeSize() {
+ _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
+void MachOFileLayout::writeLinkEditContent() {
+ if (_file.fileType == llvm::MachO::MH_OBJECT) {
+ writeRelocations();
+ writeFunctionStartsInfo();
+ writeDataInCodeInfo();
+ writeSymbolTable();
+ } else {
+ writeRebaseInfo();
+ writeBindingInfo();
+ writeLazyBindingInfo();
+ // TODO: add weak binding info
+ writeExportInfo();
+ writeFunctionStartsInfo();
+ writeDataInCodeInfo();
+ writeSymbolTable();
+ }
+llvm::Error MachOFileLayout::writeBinary(StringRef path) {
+ // Check for pending error from constructor.
+ if (_ec)
+ return llvm::errorCodeToError(_ec);
+ // Create FileOutputBuffer with calculated size.
+ unsigned flags = 0;
+ if (_file.fileType != llvm::MachO::MH_OBJECT)
+ flags = llvm::FileOutputBuffer::F_executable;
+ Expected<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr =
+ llvm::FileOutputBuffer::create(path, size(), flags);
+ if (Error E = fobOrErr.takeError())
+ return E;
+ std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr;
+ // Write content.
+ _buffer = fob->getBufferStart();
+ writeMachHeader();
+ if (auto ec = writeLoadCommands())
+ return ec;
+ writeSectionContent();
+ writeLinkEditContent();
+ if (Error E = fob->commit())
+ return E;
+ return llvm::Error::success();
+/// Takes in-memory normalized view and writes a mach-o object file.
+llvm::Error writeBinary(const NormalizedFile &file, StringRef path) {
+ MachOFileLayout layout(file);
+ return layout.writeBinary(path);
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
new file mode 100644
index 000000000000..e93ca86c3164
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
@@ -0,0 +1,1657 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file Converts from in-memory Atoms to in-memory normalized mach-o.
+/// +------------+
+/// | normalized |
+/// +------------+
+/// ^
+/// |
+/// |
+/// +-------+
+/// | Atoms |
+/// +-------+
+#include "ArchHandler.h"
+#include "DebugInfo.h"
+#include "MachONormalizedFile.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include <map>
+#include <system_error>
+#include <unordered_set>
+using llvm::StringRef;
+using llvm::isa;
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+using namespace lld;
+namespace {
+struct AtomInfo {
+ const DefinedAtom *atom;
+ uint64_t offsetInSection;
+struct SectionInfo {
+ SectionInfo(StringRef seg, StringRef sect, SectionType type,
+ const MachOLinkingContext &ctxt, uint32_t attr,
+ bool relocsToDefinedCanBeImplicit);
+ StringRef segmentName;
+ StringRef sectionName;
+ SectionType type;
+ uint32_t attributes;
+ uint64_t address;
+ uint64_t size;
+ uint16_t alignment;
+ /// If this is set, the any relocs in this section which point to defined
+ /// addresses can be implicitly generated. This is the case for the
+ /// __eh_frame section where references to the function can be implicit if the
+ /// function is defined.
+ bool relocsToDefinedCanBeImplicit;
+ std::vector<AtomInfo> atomsAndOffsets;
+ uint32_t normalizedSectionIndex;
+ uint32_t finalSectionIndex;
+SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t,
+ const MachOLinkingContext &ctxt, uint32_t attrs,
+ bool relocsToDefinedCanBeImplicit)
+ : segmentName(sg), sectionName(sct), type(t), attributes(attrs),
+ address(0), size(0), alignment(1),
+ relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit),
+ normalizedSectionIndex(0), finalSectionIndex(0) {
+ uint16_t align = 1;
+ if (ctxt.sectionAligned(segmentName, sectionName, align)) {
+ alignment = align;
+ }
+struct SegmentInfo {
+ SegmentInfo(StringRef name);
+ StringRef name;
+ uint64_t address;
+ uint64_t size;
+ uint32_t init_access;
+ uint32_t max_access;
+ std::vector<SectionInfo*> sections;
+ uint32_t normalizedSegmentIndex;
+SegmentInfo::SegmentInfo(StringRef n)
+ : name(n), address(0), size(0), init_access(0), max_access(0),
+ normalizedSegmentIndex(0) {
+class Util {
+ Util(const MachOLinkingContext &ctxt)
+ : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr),
+ _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {}
+ ~Util();
+ void processDefinedAtoms(const lld::File &atomFile);
+ void processAtomAttributes(const DefinedAtom *atom);
+ void assignAtomToSection(const DefinedAtom *atom);
+ void organizeSections();
+ void assignAddressesToSections(const NormalizedFile &file);
+ uint32_t fileFlags();
+ void copySegmentInfo(NormalizedFile &file);
+ void copySectionInfo(NormalizedFile &file);
+ void updateSectionInfo(NormalizedFile &file);
+ void buildAtomToAddressMap();
+ llvm::Error synthesizeDebugNotes(NormalizedFile &file);
+ llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file);
+ void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
+ void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
+ void addExportInfo(const lld::File &, NormalizedFile &file);
+ void addSectionRelocs(const lld::File &, NormalizedFile &file);
+ void addFunctionStarts(const lld::File &, NormalizedFile &file);
+ void buildDataInCodeArray(const lld::File &, NormalizedFile &file);
+ void addDependentDylibs(const lld::File &, NormalizedFile &file);
+ void copyEntryPointAddress(NormalizedFile &file);
+ void copySectionContent(NormalizedFile &file);
+ bool allSourceFilesHaveMinVersions() const {
+ return _allSourceFilesHaveMinVersions;
+ }
+ uint32_t minVersion() const {
+ return _minVersion;
+ }
+ LoadCommandType minVersionCommandType() const {
+ return _minVersionCommandType;
+ }
+ typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection;
+ typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress;
+ struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; };
+ typedef llvm::StringMap<DylibInfo> DylibPathToInfo;
+ SectionInfo *sectionForAtom(const DefinedAtom*);
+ SectionInfo *getRelocatableSection(DefinedAtom::ContentType type);
+ SectionInfo *getFinalSection(DefinedAtom::ContentType type);
+ void appendAtom(SectionInfo *sect, const DefinedAtom *atom);
+ SegmentInfo *segmentForName(StringRef segName);
+ void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr);
+ void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &);
+ void copySectionContent(SectionInfo *si, ContentBytes &content);
+ uint16_t descBits(const DefinedAtom* atom);
+ int dylibOrdinal(const SharedLibraryAtom *sa);
+ void segIndexForSection(const SectionInfo *sect,
+ uint8_t &segmentIndex, uint64_t &segmentStartAddr);
+ const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom);
+ const Atom *targetOfStub(const DefinedAtom *stubAtom);
+ llvm::Error getSymbolTableRegion(const DefinedAtom* atom,
+ bool &inGlobalsRegion,
+ SymbolScope &symbolScope);
+ void appendSection(SectionInfo *si, NormalizedFile &file);
+ uint32_t sectionIndexForAtom(const Atom *atom);
+ void fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset,
+ NormalizedFile &file);
+ typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex;
+ struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; };
+ struct AtomSorter {
+ bool operator()(const AtomAndIndex &left, const AtomAndIndex &right);
+ };
+ struct SegmentSorter {
+ bool operator()(const SegmentInfo *left, const SegmentInfo *right);
+ static unsigned weight(const SegmentInfo *);
+ };
+ struct TextSectionSorter {
+ bool operator()(const SectionInfo *left, const SectionInfo *right);
+ static unsigned weight(const SectionInfo *);
+ };
+ const MachOLinkingContext &_ctx;
+ mach_o::ArchHandler &_archHandler;
+ llvm::BumpPtrAllocator _allocator;
+ std::vector<SectionInfo*> _sectionInfos;
+ std::vector<SegmentInfo*> _segmentInfos;
+ TypeToSection _sectionMap;
+ std::vector<SectionInfo*> _customSections;
+ AtomToAddress _atomToAddress;
+ DylibPathToInfo _dylibInfo;
+ const DefinedAtom *_entryAtom;
+ AtomToIndex _atomToSymbolIndex;
+ std::vector<const Atom *> _machHeaderAliasAtoms;
+ bool _hasTLVDescriptors;
+ bool _subsectionsViaSymbols;
+ bool _allSourceFilesHaveMinVersions = true;
+ LoadCommandType _minVersionCommandType = (LoadCommandType)0;
+ uint32_t _minVersion = 0;
+ std::vector<lld::mach_o::Stab> _stabs;
+Util::~Util() {
+ // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs
+ // to be deleted.
+ for (SectionInfo *si : _sectionInfos) {
+ // clear() destroys vector elements, but does not deallocate.
+ // Instead use swap() to deallocate vector buffer.
+ std::vector<AtomInfo> empty;
+ si->atomsAndOffsets.swap(empty);
+ }
+ // The SegmentInfo structs are BumpPtr allocated, but sections needs
+ // to be deleted.
+ for (SegmentInfo *sgi : _segmentInfos) {
+ std::vector<SectionInfo*> empty2;
+ sgi->sections.swap(empty2);
+ }
+SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) {
+ StringRef segmentName;
+ StringRef sectionName;
+ SectionType sectionType;
+ SectionAttr sectionAttrs;
+ bool relocsToDefinedCanBeImplicit;
+ // Use same table used by when parsing .o files.
+ relocatableSectionInfoForContentType(type, segmentName, sectionName,
+ sectionType, sectionAttrs,
+ relocsToDefinedCanBeImplicit);
+ // If we already have a SectionInfo with this name, re-use it.
+ // This can happen if two ContentType map to the same mach-o section.
+ for (auto sect : _sectionMap) {
+ if (sect.second->sectionName.equals(sectionName) &&
+ sect.second->segmentName.equals(segmentName)) {
+ return sect.second;
+ }
+ }
+ // Otherwise allocate new SectionInfo object.
+ auto *sect = new (_allocator)
+ SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs,
+ relocsToDefinedCanBeImplicit);
+ _sectionInfos.push_back(sect);
+ _sectionMap[type] = sect;
+ return sect;
+#define ENTRY(seg, sect, type, atomType) \
+ {seg, sect, type, DefinedAtom::atomType }
+struct MachOFinalSectionFromAtomType {
+ StringRef segmentName;
+ StringRef sectionName;
+ SectionType sectionType;
+ DefinedAtom::ContentType atomType;
+const MachOFinalSectionFromAtomType sectsToAtomType[] = {
+ ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
+ ENTRY("__TEXT", "__text", S_REGULAR, typeMachHeader),
+ ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
+ ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
+ ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
+ ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4),
+ ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8),
+ ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16),
+ ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub),
+ ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper),
+ ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
+ ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
+ ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo),
+ ENTRY("__DATA", "__data", S_REGULAR, typeData),
+ ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
+ ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
+ ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS,
+ typeLazyPointer),
+ ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
+ typeInitializerPtr),
+ ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
+ typeTerminatorPtr),
+ typeGOT),
+ ENTRY("__DATA", "__nl_symbol_ptr", S_NON_LAZY_SYMBOL_POINTERS,
+ typeNonLazyPointer),
+ ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES,
+ typeThunkTLV),
+ ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR,
+ typeTLVInitialData),
+ typeTLVInitializerPtr),
+ ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL,
+ typeTLVInitialZeroFill),
+ ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill),
+ ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples),
+#undef ENTRY
+SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) {
+ for (auto &p : sectsToAtomType) {
+ if (p.atomType != atomType)
+ continue;
+ SectionAttr sectionAttrs = 0;
+ switch (atomType) {
+ case DefinedAtom::typeMachHeader:
+ case DefinedAtom::typeCode:
+ case DefinedAtom::typeStub:
+ case DefinedAtom::typeStubHelper:
+ break;
+ case DefinedAtom::typeThunkTLV:
+ _hasTLVDescriptors = true;
+ break;
+ default:
+ break;
+ }
+ // If we already have a SectionInfo with this name, re-use it.
+ // This can happen if two ContentType map to the same mach-o section.
+ for (auto sect : _sectionMap) {
+ if (sect.second->sectionName.equals(p.sectionName) &&
+ sect.second->segmentName.equals(p.segmentName)) {
+ return sect.second;
+ }
+ }
+ // Otherwise allocate new SectionInfo object.
+ auto *sect = new (_allocator) SectionInfo(
+ p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs,
+ /* relocsToDefinedCanBeImplicit */ false);
+ _sectionInfos.push_back(sect);
+ _sectionMap[atomType] = sect;
+ return sect;
+ }
+ llvm_unreachable("content type not yet supported");
+SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) {
+ if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) {
+ // Section for this atom is derived from content type.
+ DefinedAtom::ContentType type = atom->contentType();
+ auto pos = _sectionMap.find(type);
+ if ( pos != _sectionMap.end() )
+ return pos->second;
+ bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
+ return rMode ? getRelocatableSection(type) : getFinalSection(type);
+ } else {
+ // This atom needs to be in a custom section.
+ StringRef customName = atom->customSectionName();
+ // Look to see if we have already allocated the needed custom section.
+ for(SectionInfo *sect : _customSections) {
+ const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom;
+ if (firstAtom->customSectionName().equals(customName)) {
+ return sect;
+ }
+ }
+ // Not found, so need to create a new custom section.
+ size_t seperatorIndex = customName.find('/');
+ assert(seperatorIndex != StringRef::npos);
+ StringRef segName = customName.slice(0, seperatorIndex);
+ StringRef sectName = customName.drop_front(seperatorIndex + 1);
+ auto *sect =
+ new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx,
+ 0, /* relocsToDefinedCanBeImplicit */ false);
+ _customSections.push_back(sect);
+ _sectionInfos.push_back(sect);
+ return sect;
+ }
+void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) {
+ // Figure out offset for atom in this section given alignment constraints.
+ uint64_t offset = sect->size;
+ DefinedAtom::Alignment atomAlign = atom->alignment();
+ uint64_t align = atomAlign.value;
+ uint64_t requiredModulus = atomAlign.modulus;
+ uint64_t currentModulus = (offset % align);
+ if ( currentModulus != requiredModulus ) {
+ if ( requiredModulus > currentModulus )
+ offset += requiredModulus-currentModulus;
+ else
+ offset += align+requiredModulus-currentModulus;
+ }
+ // Record max alignment of any atom in this section.
+ if (align > sect->alignment)
+ sect->alignment = atomAlign.value;
+ // Assign atom to this section with this offset.
+ AtomInfo ai = {atom, offset};
+ sect->atomsAndOffsets.push_back(ai);
+ // Update section size to include this atom.
+ sect->size = offset + atom->size();
+void Util::processDefinedAtoms(const lld::File &atomFile) {
+ for (const DefinedAtom *atom : atomFile.defined()) {
+ processAtomAttributes(atom);
+ assignAtomToSection(atom);
+ }
+void Util::processAtomAttributes(const DefinedAtom *atom) {
+ if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) {
+ // If the file doesn't use subsections via symbols, then make sure we don't
+ // add that flag to the final output file if we have a relocatable file.
+ if (!machoFile->subsectionsViaSymbols())
+ _subsectionsViaSymbols = false;
+ // All the source files must have min versions for us to output an object
+ // file with a min version.
+ if (auto v = machoFile->minVersion())
+ _minVersion = std::max(_minVersion, v);
+ else
+ _allSourceFilesHaveMinVersions = false;
+ // If we don't have a platform load command, but one of the source files
+ // does, then take the one from the file.
+ if (!_minVersionCommandType)
+ if (auto v = machoFile->minVersionLoadCommandKind())
+ _minVersionCommandType = v;
+ }
+void Util::assignAtomToSection(const DefinedAtom *atom) {
+ if (atom->contentType() == DefinedAtom::typeMachHeader) {
+ _machHeaderAliasAtoms.push_back(atom);
+ // Assign atom to this section with this offset.
+ AtomInfo ai = {atom, 0};
+ sectionForAtom(atom)->atomsAndOffsets.push_back(ai);
+ } else if (atom->contentType() == DefinedAtom::typeDSOHandle)
+ _machHeaderAliasAtoms.push_back(atom);
+ else
+ appendAtom(sectionForAtom(atom), atom);
+SegmentInfo *Util::segmentForName(StringRef segName) {
+ for (SegmentInfo *si : _segmentInfos) {
+ if ( si->name.equals(segName) )
+ return si;
+ }
+ auto *info = new (_allocator) SegmentInfo(segName);
+ // Set the initial segment protection.
+ if (segName.equals("__TEXT"))
+ info->init_access = VM_PROT_READ | VM_PROT_EXECUTE;
+ else if (segName.equals("__PAGEZERO"))
+ info->init_access = 0;
+ else if (segName.equals("__LINKEDIT"))
+ info->init_access = VM_PROT_READ;
+ else {
+ // All others default to read-write
+ info->init_access = VM_PROT_READ | VM_PROT_WRITE;
+ }
+ // Set max segment protection
+ // Note, its overkill to use a switch statement here, but makes it so much
+ // easier to use switch coverage to catch new cases.
+ switch (_ctx.os()) {
+ case lld::MachOLinkingContext::OS::unknown:
+ case lld::MachOLinkingContext::OS::macOSX:
+ case lld::MachOLinkingContext::OS::iOS_simulator:
+ if (segName.equals("__PAGEZERO")) {
+ info->max_access = 0;
+ break;
+ }
+ // All others default to all
+ info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
+ break;
+ case lld::MachOLinkingContext::OS::iOS:
+ // iPhoneOS always uses same protection for max and initial
+ info->max_access = info->init_access;
+ break;
+ }
+ _segmentInfos.push_back(info);
+ return info;
+unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) {
+ return llvm::StringSwitch<unsigned>(seg->name)
+ .Case("__PAGEZERO", 1)
+ .Case("__TEXT", 2)
+ .Case("__DATA", 3)
+ .Default(100);
+bool Util::SegmentSorter::operator()(const SegmentInfo *left,
+ const SegmentInfo *right) {
+ return (weight(left) < weight(right));
+unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) {
+ return llvm::StringSwitch<unsigned>(sect->sectionName)
+ .Case("__text", 1)
+ .Case("__stubs", 2)
+ .Case("__stub_helper", 3)
+ .Case("__const", 4)
+ .Case("__cstring", 5)
+ .Case("__unwind_info", 98)
+ .Case("__eh_frame", 99)
+ .Default(10);
+bool Util::TextSectionSorter::operator()(const SectionInfo *left,
+ const SectionInfo *right) {
+ return (weight(left) < weight(right));
+void Util::organizeSections() {
+ // NOTE!: Keep this in sync with assignAddressesToSections.
+ switch (_ctx.outputMachOType()) {
+ case llvm::MachO::MH_EXECUTE:
+ // Main executables, need a zero-page segment
+ segmentForName("__PAGEZERO");
+ // Fall into next case.
+ case llvm::MachO::MH_DYLIB:
+ case llvm::MachO::MH_BUNDLE:
+ // All dynamic code needs TEXT segment to hold the load commands.
+ segmentForName("__TEXT");
+ break;
+ default:
+ break;
+ }
+ segmentForName("__LINKEDIT");
+ // Group sections into segments.
+ for (SectionInfo *si : _sectionInfos) {
+ SegmentInfo *seg = segmentForName(si->segmentName);
+ seg->sections.push_back(si);
+ }
+ // Sort segments.
+ std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter());
+ // Sort sections within segments.
+ for (SegmentInfo *seg : _segmentInfos) {
+ if (seg->name.equals("__TEXT")) {
+ std::sort(seg->sections.begin(), seg->sections.end(),
+ TextSectionSorter());
+ }
+ }
+ // Record final section indexes.
+ uint32_t segmentIndex = 0;
+ uint32_t sectionIndex = 1;
+ for (SegmentInfo *seg : _segmentInfos) {
+ seg->normalizedSegmentIndex = segmentIndex++;
+ for (SectionInfo *sect : seg->sections)
+ sect->finalSectionIndex = sectionIndex++;
+ }
+void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) {
+ seg->address = addr;
+ for (SectionInfo *sect : seg->sections) {
+ sect->address = llvm::alignTo(addr, sect->alignment);
+ addr = sect->address + sect->size;
+ }
+ seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
+// __TEXT segment lays out backwards so padding is at front after load commands.
+void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg,
+ uint64_t &addr) {
+ seg->address = addr;
+ // Walks sections starting at end to calculate padding for start.
+ int64_t taddr = 0;
+ for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) {
+ SectionInfo *sect = *it;
+ taddr -= sect->size;
+ taddr = taddr & (0 - sect->alignment);
+ }
+ int64_t padding = taddr - hlcSize;
+ while (padding < 0)
+ padding += _ctx.pageSize();
+ // Start assigning section address starting at padded offset.
+ addr += (padding + hlcSize);
+ for (SectionInfo *sect : seg->sections) {
+ sect->address = llvm::alignTo(addr, sect->alignment);
+ addr = sect->address + sect->size;
+ }
+ seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
+void Util::assignAddressesToSections(const NormalizedFile &file) {
+ // NOTE!: Keep this in sync with organizeSections.
+ size_t hlcSize = headerAndLoadCommandsSize(file);
+ uint64_t address = 0;
+ for (SegmentInfo *seg : _segmentInfos) {
+ if (seg->name.equals("__PAGEZERO")) {
+ seg->size = _ctx.pageZeroSize();
+ address += seg->size;
+ }
+ else if (seg->name.equals("__TEXT")) {
+ // _ctx.baseAddress() == 0 implies it was either unspecified or
+ // pageZeroSize is also 0. In either case resetting address is safe.
+ address = _ctx.baseAddress() ? _ctx.baseAddress() : address;
+ layoutSectionsInTextSegment(hlcSize, seg, address);
+ } else
+ layoutSectionsInSegment(seg, address);
+ address = llvm::alignTo(address, _ctx.pageSize());
+ }
+ DEBUG_WITH_TYPE("WriterMachO-norm",
+ llvm::dbgs() << "assignAddressesToSections()\n";
+ for (SegmentInfo *sgi : _segmentInfos) {
+ llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address)
+ << ", size=" << llvm::format("0x%08llX", sgi->size)
+ << ", segment-name='" << sgi->name
+ << "'\n";
+ for (SectionInfo *si : sgi->sections) {
+ llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address)
+ << ", size=" << llvm::format("0x%08llX", si->size)
+ << ", section-name='" << si->sectionName
+ << "\n";
+ }
+ }
+ );
+void Util::copySegmentInfo(NormalizedFile &file) {
+ for (SegmentInfo *sgi : _segmentInfos) {
+ Segment seg;
+ seg.name = sgi->name;
+ seg.address = sgi->address;
+ seg.size = sgi->size;
+ seg.init_access = sgi->init_access;
+ seg.max_access = sgi->max_access;
+ file.segments.push_back(seg);
+ }
+void Util::appendSection(SectionInfo *si, NormalizedFile &file) {
+ // Add new empty section to end of file.sections.
+ Section temp;
+ file.sections.push_back(std::move(temp));
+ Section* normSect = &file.sections.back();
+ // Copy fields to normalized section.
+ normSect->segmentName = si->segmentName;
+ normSect->sectionName = si->sectionName;
+ normSect->type = si->type;
+ normSect->attributes = si->attributes;
+ normSect->address = si->address;
+ normSect->alignment = si->alignment;
+ // Record where normalized section is.
+ si->normalizedSectionIndex = file.sections.size()-1;
+void Util::copySectionContent(NormalizedFile &file) {
+ const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
+ // Utility function for ArchHandler to find address of atom in output file.
+ auto addrForAtom = [&] (const Atom &atom) -> uint64_t {
+ auto pos = _atomToAddress.find(&atom);
+ assert(pos != _atomToAddress.end());
+ return pos->second;
+ };
+ auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t {
+ for (const SectionInfo *sectInfo : _sectionInfos)
+ for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets)
+ if (atomInfo.atom == &atom)
+ return sectInfo->address;
+ llvm_unreachable("atom not assigned to section");
+ };
+ for (SectionInfo *si : _sectionInfos) {
+ Section *normSect = &file.sections[si->normalizedSectionIndex];
+ if (isZeroFillSection(si->type)) {
+ const uint8_t *empty = nullptr;
+ normSect->content = llvm::makeArrayRef(empty, si->size);
+ continue;
+ }
+ // Copy content from atoms to content buffer for section.
+ llvm::MutableArrayRef<uint8_t> sectionContent;
+ if (si->size) {
+ uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size);
+ sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size);
+ normSect->content = sectionContent;
+ }
+ for (AtomInfo &ai : si->atomsAndOffsets) {
+ if (!ai.atom->size()) {
+ assert(ai.atom->begin() == ai.atom->end() &&
+ "Cannot have references without content");
+ continue;
+ }
+ auto atomContent = sectionContent.slice(ai.offsetInSection,
+ ai.atom->size());
+ _archHandler.generateAtomContent(*ai.atom, r, addrForAtom,
+ sectionAddrForAtom, _ctx.baseAddress(),
+ atomContent);
+ }
+ }
+void Util::copySectionInfo(NormalizedFile &file) {
+ file.sections.reserve(_sectionInfos.size());
+ // Write sections grouped by segment.
+ for (SegmentInfo *sgi : _segmentInfos) {
+ for (SectionInfo *si : sgi->sections) {
+ appendSection(si, file);
+ }
+ }
+void Util::updateSectionInfo(NormalizedFile &file) {
+ file.sections.reserve(_sectionInfos.size());
+ // sections grouped by segment.
+ for (SegmentInfo *sgi : _segmentInfos) {
+ Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex];
+ normSeg->address = sgi->address;
+ normSeg->size = sgi->size;
+ for (SectionInfo *si : sgi->sections) {
+ Section *normSect = &file.sections[si->normalizedSectionIndex];
+ normSect->address = si->address;
+ }
+ }
+void Util::copyEntryPointAddress(NormalizedFile &nFile) {
+ if (!_entryAtom) {
+ nFile.entryAddress = 0;
+ return;
+ }
+ if (_ctx.outputTypeHasEntry()) {
+ if (_archHandler.isThumbFunction(*_entryAtom))
+ nFile.entryAddress = (_atomToAddress[_entryAtom] | 1);
+ else
+ nFile.entryAddress = _atomToAddress[_entryAtom];
+ }
+void Util::buildAtomToAddressMap() {
+ DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
+ << "assign atom addresses:\n");
+ const bool lookForEntry = _ctx.outputTypeHasEntry();
+ for (SectionInfo *sect : _sectionInfos) {
+ for (const AtomInfo &info : sect->atomsAndOffsets) {
+ _atomToAddress[info.atom] = sect->address + info.offsetInSection;
+ if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) &&
+ (info.atom->size() != 0) &&
+ info.atom->name() == _ctx.entrySymbolName()) {
+ _entryAtom = info.atom;
+ }
+ DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
+ << " address="
+ << llvm::format("0x%016X", _atomToAddress[info.atom])
+ << llvm::format(" 0x%09lX", info.atom)
+ << ", file=#"
+ << info.atom->file().ordinal()
+ << ", atom=#"
+ << info.atom->ordinal()
+ << ", name="
+ << info.atom->name()
+ << ", type="
+ << info.atom->contentType()
+ << "\n");
+ }
+ }
+ DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
+ << "assign header alias atom addresses:\n");
+ for (const Atom *atom : _machHeaderAliasAtoms) {
+ _atomToAddress[atom] = _ctx.baseAddress();
+#ifndef NDEBUG
+ if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) {
+ DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
+ << " address="
+ << llvm::format("0x%016X", _atomToAddress[atom])
+ << llvm::format(" 0x%09lX", atom)
+ << ", file=#"
+ << definedAtom->file().ordinal()
+ << ", atom=#"
+ << definedAtom->ordinal()
+ << ", name="
+ << definedAtom->name()
+ << ", type="
+ << definedAtom->contentType()
+ << "\n");
+ } else {
+ DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
+ << " address="
+ << llvm::format("0x%016X", _atomToAddress[atom])
+ << " atom=" << atom
+ << " name=" << atom->name() << "\n");
+ }
+ }
+llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) {
+ // Bail out early if we don't need to generate a debug map.
+ if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap)
+ return llvm::Error::success();
+ std::vector<const DefinedAtom*> atomsNeedingDebugNotes;
+ std::set<const mach_o::MachOFile*> filesWithStabs;
+ bool objFileHasDwarf = false;
+ const File *objFile = nullptr;
+ for (SectionInfo *sect : _sectionInfos) {
+ for (const AtomInfo &info : sect->atomsAndOffsets) {
+ if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) {
+ // FIXME: No stabs/debug-notes for symbols that wouldn't be in the
+ // symbol table.
+ // FIXME: No stabs/debug-notes for kernel dtrace probes.
+ if (atom->contentType() == DefinedAtom::typeCFI ||
+ atom->contentType() == DefinedAtom::typeCString)
+ continue;
+ // Whenever we encounter a new file, update the 'objfileHasDwarf' flag.
+ if (&info.atom->file() != objFile) {
+ objFileHasDwarf = false;
+ if (const mach_o::MachOFile *atomFile =
+ dyn_cast<mach_o::MachOFile>(&info.atom->file())) {
+ if (atomFile->debugInfo()) {
+ if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo()))
+ objFileHasDwarf = true;
+ else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo()))
+ filesWithStabs.insert(atomFile);
+ }
+ }
+ }
+ // If this atom is from a file that needs dwarf, add it to the list.
+ if (objFileHasDwarf)
+ atomsNeedingDebugNotes.push_back(info.atom);
+ }
+ }
+ }
+ // Sort atoms needing debug notes by file ordinal, then atom ordinal.
+ std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(),
+ [](const DefinedAtom *lhs, const DefinedAtom *rhs) {
+ if (lhs->file().ordinal() != rhs->file().ordinal())
+ return (lhs->file().ordinal() < rhs->file().ordinal());
+ return (lhs->ordinal() < rhs->ordinal());
+ });
+ // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \
+ // linker which add N_AST stab entry to output
+ // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64.
+ StringRef oldFileName = "";
+ StringRef oldDirPath = "";
+ bool wroteStartSO = false;
+ std::unordered_set<std::string> seenFiles;
+ for (const DefinedAtom *atom : atomsNeedingDebugNotes) {
+ const auto &atomFile = cast<mach_o::MachOFile>(atom->file());
+ assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo())
+ && "file for atom needing debug notes does not contain dwarf");
+ auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo());
+ auto &tu = dwarf.translationUnitSource();
+ StringRef newFileName = tu.name;
+ StringRef newDirPath = tu.path;
+ // Add an SO whenever the TU source file changes.
+ if (newFileName != oldFileName || newDirPath != oldDirPath) {
+ // Translation unit change, emit ending SO
+ if (oldFileName != "")
+ _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));
+ oldFileName = newFileName;
+ oldDirPath = newDirPath;
+ // If newDirPath doesn't end with a '/' we need to add one:
+ if (newDirPath.back() != '/') {
+ char *p =
+ file.ownedAllocations.Allocate<char>(newDirPath.size() + 2);
+ memcpy(p, newDirPath.data(), newDirPath.size());
+ p[newDirPath.size()] = '/';
+ p[newDirPath.size() + 1] = '\0';
+ newDirPath = p;
+ }
+ // New translation unit, emit start SOs:
+ _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath));
+ _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName));
+ // Synthesize OSO for start of file.
+ char *fullPath = nullptr;
+ {
+ SmallString<1024> pathBuf(atomFile.path());
+ if (auto EC = llvm::sys::fs::make_absolute(pathBuf))
+ return llvm::errorCodeToError(EC);
+ fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1);
+ memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1);
+ }
+ // Get mod time.
+ uint32_t modTime = 0;
+ llvm::sys::fs::file_status stat;
+ if (!llvm::sys::fs::status(fullPath, stat))
+ if (llvm::sys::fs::exists(stat))
+ modTime = llvm::sys::toTimeT(stat.getLastModificationTime());
+ _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1,
+ modTime, fullPath));
+ // <rdar://problem/6337329> linker should put cpusubtype in n_sect field
+ // of nlist entry for N_OSO debug note entries.
+ wroteStartSO = true;
+ }
+ if (atom->contentType() == DefinedAtom::typeCode) {
+ // Synthesize BNSYM and start FUN stabs.
+ _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, ""));
+ _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name()));
+ // Synthesize any SOL stabs needed
+ // FIXME: add SOL stabs.
+ _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0,
+ atom->rawContent().size(), ""));
+ _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0,
+ atom->rawContent().size(), ""));
+ } else {
+ if (atom->scope() == Atom::scopeTranslationUnit)
+ _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name()));
+ else
+ _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name()));
+ }
+ }
+ // Emit ending SO if necessary.
+ if (wroteStartSO)
+ _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));
+ // Copy any stabs from .o file.
+ for (const auto *objFile : filesWithStabs) {
+ const auto &stabsList =
+ cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs();
+ for (auto &stab : stabsList) {
+ // FIXME: Drop stabs whose atoms have been dead-stripped.
+ _stabs.push_back(stab);
+ }
+ }
+ return llvm::Error::success();
+uint16_t Util::descBits(const DefinedAtom* atom) {
+ uint16_t desc = 0;
+ switch (atom->merge()) {
+ case lld::DefinedAtom::mergeNo:
+ case lld::DefinedAtom::mergeAsTentative:
+ break;
+ case lld::DefinedAtom::mergeAsWeak:
+ case lld::DefinedAtom::mergeAsWeakAndAddressUsed:
+ desc |= N_WEAK_DEF;
+ break;
+ case lld::DefinedAtom::mergeSameNameAndSize:
+ case lld::DefinedAtom::mergeByLargestSection:
+ case lld::DefinedAtom::mergeByContent:
+ llvm_unreachable("Unsupported DefinedAtom::merge()");
+ break;
+ }
+ if (atom->contentType() == lld::DefinedAtom::typeResolver)
+ if (atom->contentType() == lld::DefinedAtom::typeMachHeader)
+ if (_archHandler.isThumbFunction(*atom))
+ desc |= N_ARM_THUMB_DEF;
+ if (atom->deadStrip() == DefinedAtom::deadStripNever &&
+ _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) {
+ if ((atom->contentType() != DefinedAtom::typeInitializerPtr)
+ && (atom->contentType() != DefinedAtom::typeTerminatorPtr))
+ desc |= N_NO_DEAD_STRIP;
+ }
+ return desc;
+bool Util::AtomSorter::operator()(const AtomAndIndex &left,
+ const AtomAndIndex &right) {
+ return (left.atom->name().compare(right.atom->name()) < 0);
+llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom,
+ bool &inGlobalsRegion,
+ SymbolScope &scope) {
+ bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
+ switch (atom->scope()) {
+ case Atom::scopeTranslationUnit:
+ scope = 0;
+ inGlobalsRegion = false;
+ return llvm::Error::success();
+ case Atom::scopeLinkageUnit:
+ if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) &&
+ _ctx.exportSymbolNamed(atom->name())) {
+ return llvm::make_error<GenericError>(
+ Twine("cannot export hidden symbol ") + atom->name());
+ }
+ if (rMode) {
+ if (_ctx.keepPrivateExterns()) {
+ // -keep_private_externs means keep in globals region as N_PEXT.
+ scope = N_PEXT | N_EXT;
+ inGlobalsRegion = true;
+ return llvm::Error::success();
+ }
+ }
+ // scopeLinkageUnit symbols are no longer global once linked.
+ scope = N_PEXT;
+ inGlobalsRegion = false;
+ return llvm::Error::success();
+ case Atom::scopeGlobal:
+ if (_ctx.exportRestrictMode()) {
+ if (_ctx.exportSymbolNamed(atom->name())) {
+ scope = N_EXT;
+ inGlobalsRegion = true;
+ return llvm::Error::success();
+ } else {
+ scope = N_PEXT;
+ inGlobalsRegion = false;
+ return llvm::Error::success();
+ }
+ } else {
+ scope = N_EXT;
+ inGlobalsRegion = true;
+ return llvm::Error::success();
+ }
+ break;
+ }
+ llvm_unreachable("atom->scope() unknown enum value");
+llvm::Error Util::addSymbols(const lld::File &atomFile,
+ NormalizedFile &file) {
+ bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
+ // Mach-O symbol table has four regions: stabs, locals, globals, undefs.
+ // Add all stabs.
+ for (auto &stab : _stabs) {
+ Symbol sym;
+ sym.type = static_cast<NListType>(stab.type);
+ sym.scope = 0;
+ sym.sect = stab.other;
+ sym.desc = stab.desc;
+ if (stab.atom)
+ sym.value = _atomToAddress[stab.atom];
+ else
+ sym.value = stab.value;
+ sym.name = stab.str;
+ file.stabsSymbols.push_back(sym);
+ }
+ // Add all local (non-global) symbols in address order
+ std::vector<AtomAndIndex> globals;
+ globals.reserve(512);
+ for (SectionInfo *sect : _sectionInfos) {
+ for (const AtomInfo &info : sect->atomsAndOffsets) {
+ const DefinedAtom *atom = info.atom;
+ if (!atom->name().empty()) {
+ SymbolScope symbolScope;
+ bool inGlobalsRegion;
+ if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){
+ return ec;
+ }
+ if (inGlobalsRegion) {
+ AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope };
+ globals.push_back(ai);
+ } else {
+ Symbol sym;
+ sym.name = atom->name();
+ sym.type = N_SECT;
+ sym.scope = symbolScope;
+ sym.sect = sect->finalSectionIndex;
+ sym.desc = descBits(atom);
+ sym.value = _atomToAddress[atom];
+ _atomToSymbolIndex[atom] = file.localSymbols.size();
+ file.localSymbols.push_back(sym);
+ }
+ } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){
+ // Create 'Lxxx' labels for anonymous atoms if archHandler says so.
+ static unsigned tempNum = 1;
+ char tmpName[16];
+ sprintf(tmpName, "L%04u", tempNum++);
+ StringRef tempRef(tmpName);
+ Symbol sym;
+ sym.name = tempRef.copy(file.ownedAllocations);
+ sym.type = N_SECT;
+ sym.scope = 0;
+ sym.sect = sect->finalSectionIndex;
+ sym.desc = 0;
+ sym.value = _atomToAddress[atom];
+ _atomToSymbolIndex[atom] = file.localSymbols.size();
+ file.localSymbols.push_back(sym);
+ }
+ }
+ }
+ // Sort global symbol alphabetically, then add to symbol table.
+ std::sort(globals.begin(), globals.end(), AtomSorter());
+ const uint32_t globalStartIndex = file.localSymbols.size();
+ for (AtomAndIndex &ai : globals) {
+ Symbol sym;
+ sym.name = ai.atom->name();
+ sym.type = N_SECT;
+ sym.scope = ai.scope;
+ sym.sect = ai.index;
+ sym.desc = descBits(static_cast<const DefinedAtom*>(ai.atom));
+ sym.value = _atomToAddress[ai.atom];
+ _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size();
+ file.globalSymbols.push_back(sym);
+ }
+ // Sort undefined symbol alphabetically, then add to symbol table.
+ std::vector<AtomAndIndex> undefs;
+ undefs.reserve(128);
+ for (const UndefinedAtom *atom : atomFile.undefined()) {
+ AtomAndIndex ai = { atom, 0, N_EXT };
+ undefs.push_back(ai);
+ }
+ for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) {
+ AtomAndIndex ai = { atom, 0, N_EXT };
+ undefs.push_back(ai);
+ }
+ std::sort(undefs.begin(), undefs.end(), AtomSorter());
+ const uint32_t start = file.globalSymbols.size() + file.localSymbols.size();
+ for (AtomAndIndex &ai : undefs) {
+ Symbol sym;
+ uint16_t desc = 0;
+ if (!rMode) {
+ uint8_t ordinal = 0;
+ if (!_ctx.useFlatNamespace())
+ ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom));
+ llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal);
+ }
+ sym.name = ai.atom->name();
+ sym.type = N_UNDF;
+ sym.scope = ai.scope;
+ sym.sect = 0;
+ sym.desc = desc;
+ sym.value = 0;
+ _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start;
+ file.undefinedSymbols.push_back(sym);
+ }
+ return llvm::Error::success();
+const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) {
+ for (const Reference *ref : *lpAtom) {
+ if (_archHandler.isLazyPointer(*ref)) {
+ return ref->target();
+ }
+ }
+ return nullptr;
+const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) {
+ for (const Reference *ref : *stubAtom) {
+ if (const Atom *ta = ref->target()) {
+ if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) {
+ const Atom *target = targetOfLazyPointer(lpAtom);
+ if (target)
+ return target;
+ }
+ }
+ }
+ return nullptr;
+void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) {
+ for (SectionInfo *si : _sectionInfos) {
+ Section &normSect = file.sections[si->normalizedSectionIndex];
+ switch (si->type) {
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ bool foundTarget = false;
+ for (const Reference *ref : *info.atom) {
+ const Atom *target = ref->target();
+ if (target) {
+ if (isa<const SharedLibraryAtom>(target)) {
+ uint32_t index = _atomToSymbolIndex[target];
+ normSect.indirectSymbols.push_back(index);
+ foundTarget = true;
+ } else {
+ normSect.indirectSymbols.push_back(
+ }
+ }
+ }
+ if (!foundTarget) {
+ normSect.indirectSymbols.push_back(
+ }
+ }
+ break;
+ case llvm::MachO::S_LAZY_SYMBOL_POINTERS:
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ const Atom *target = targetOfLazyPointer(info.atom);
+ if (target) {
+ uint32_t index = _atomToSymbolIndex[target];
+ normSect.indirectSymbols.push_back(index);
+ }
+ }
+ break;
+ case llvm::MachO::S_SYMBOL_STUBS:
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ const Atom *target = targetOfStub(info.atom);
+ if (target) {
+ uint32_t index = _atomToSymbolIndex[target];
+ normSect.indirectSymbols.push_back(index);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+void Util::addDependentDylibs(const lld::File &atomFile,
+ NormalizedFile &nFile) {
+ // Scan all imported symbols and build up list of dylibs they are from.
+ int ordinal = 1;
+ for (const auto *dylib : _ctx.allDylibs()) {
+ DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName());
+ if (pos == _dylibInfo.end()) {
+ DylibInfo info;
+ bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile();
+ // If we're in -flat_namespace mode (or this atom came from the flat
+ // namespace file under -undefined dynamic_lookup) then use the flat
+ // lookup ordinal.
+ if (flatNamespaceAtom || _ctx.useFlatNamespace())
+ else
+ info.ordinal = ordinal++;
+ info.hasWeak = false;
+ info.hasNonWeak = !info.hasWeak;
+ _dylibInfo[dylib->installName()] = info;
+ // Unless this was a flat_namespace atom, record the source dylib.
+ if (!flatNamespaceAtom) {
+ DependentDylib depInfo;
+ depInfo.path = dylib->installName();
+ depInfo.kind = llvm::MachO::LC_LOAD_DYLIB;
+ depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path());
+ depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path());
+ nFile.dependentDylibs.push_back(depInfo);
+ }
+ } else {
+ pos->second.hasWeak = false;
+ pos->second.hasNonWeak = !pos->second.hasWeak;
+ }
+ }
+ // Automatically weak link dylib in which all symbols are weak (canBeNull).
+ for (DependentDylib &dep : nFile.dependentDylibs) {
+ DylibInfo &info = _dylibInfo[dep.path];
+ if (info.hasWeak && !info.hasNonWeak)
+ dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB;
+ else if (_ctx.isUpwardDylib(dep.path))
+ dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB;
+ }
+int Util::dylibOrdinal(const SharedLibraryAtom *sa) {
+ return _dylibInfo[sa->loadName()].ordinal;
+void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex,
+ uint64_t &segmentStartAddr) {
+ segmentIndex = 0;
+ for (const SegmentInfo *seg : _segmentInfos) {
+ if ((seg->address <= sect->address)
+ && (seg->address+seg->size >= sect->address+sect->size)) {
+ segmentStartAddr = seg->address;
+ return;
+ }
+ ++segmentIndex;
+ }
+ llvm_unreachable("section not in any segment");
+uint32_t Util::sectionIndexForAtom(const Atom *atom) {
+ uint64_t address = _atomToAddress[atom];
+ for (const SectionInfo *si : _sectionInfos) {
+ if ((si->address <= address) && (address < si->address+si->size))
+ return si->finalSectionIndex;
+ }
+ llvm_unreachable("atom not in any section");
+void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) {
+ if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT)
+ return;
+ // Utility function for ArchHandler to find symbol index for an atom.
+ auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t {
+ auto pos = _atomToSymbolIndex.find(&atom);
+ assert(pos != _atomToSymbolIndex.end());
+ return pos->second;
+ };
+ // Utility function for ArchHandler to find section index for an atom.
+ auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t {
+ return sectionIndexForAtom(&atom);
+ };
+ // Utility function for ArchHandler to find address of atom in output file.
+ auto addressForAtom = [&] (const Atom &atom) -> uint64_t {
+ auto pos = _atomToAddress.find(&atom);
+ assert(pos != _atomToAddress.end());
+ return pos->second;
+ };
+ for (SectionInfo *si : _sectionInfos) {
+ Section &normSect = file.sections[si->normalizedSectionIndex];
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ const DefinedAtom *atom = info.atom;
+ for (const Reference *ref : *atom) {
+ // Skip emitting relocs for sections which are always able to be
+ // implicitly regenerated and where the relocation targets an address
+ // which is defined.
+ if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target()))
+ continue;
+ _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref,
+ symIndexForAtom,
+ sectIndexForAtom,
+ addressForAtom,
+ normSect.relocations);
+ }
+ }
+ }
+void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) {
+ if (!_ctx.generateFunctionStartsLoadCommand())
+ return;
+ file.functionStarts.reserve(8192);
+ // Delta compress function starts, starting with the mach header symbol.
+ const uint64_t badAddress = ~0ULL;
+ uint64_t addr = badAddress;
+ for (SectionInfo *si : _sectionInfos) {
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ auto type = info.atom->contentType();
+ if (type == DefinedAtom::typeMachHeader) {
+ addr = _atomToAddress[info.atom];
+ continue;
+ }
+ if (type != DefinedAtom::typeCode)
+ continue;
+ assert(addr != badAddress && "Missing mach header symbol");
+ // Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS
+ // can't spill in to the next section.
+ if (!info.atom->size())
+ continue;
+ uint64_t nextAddr = _atomToAddress[info.atom];
+ if (_archHandler.isThumbFunction(*info.atom))
+ nextAddr |= 1;
+ uint64_t delta = nextAddr - addr;
+ if (delta) {
+ ByteBuffer buffer;
+ buffer.append_uleb128(delta);
+ file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(),
+ buffer.bytes() + buffer.size());
+ }
+ addr = nextAddr;
+ }
+ }
+ // Null terminate, and pad to pointer size for this arch.
+ file.functionStarts.push_back(0);
+ auto size = file.functionStarts.size();
+ for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4);
+ i != e; ++i)
+ file.functionStarts.push_back(0);
+void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
+ if (!_ctx.generateDataInCodeLoadCommand())
+ return;
+ for (SectionInfo *si : _sectionInfos) {
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ // Atoms that contain data-in-code have "transition" references
+ // which mark a point where the embedded data starts of ends.
+ // This needs to be converted to the mach-o format which is an array
+ // of data-in-code ranges.
+ uint32_t startOffset = 0;
+ DataRegionType mode = DataRegionType(0);
+ for (const Reference *ref : *info.atom) {
+ if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
+ continue;
+ if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
+ DataRegionType nextMode = (DataRegionType)ref->addend();
+ if (mode != nextMode) {
+ if (mode != 0) {
+ // Found end data range, so make range entry.
+ DataInCode entry;
+ entry.offset = si->address + info.offsetInSection + startOffset;
+ entry.length = ref->offsetInAtom() - startOffset;
+ entry.kind = mode;
+ file.dataInCode.push_back(entry);
+ }
+ }
+ mode = nextMode;
+ startOffset = ref->offsetInAtom();
+ }
+ }
+ if (mode != 0) {
+ // Function ends with data (no end transition).
+ DataInCode entry;
+ entry.offset = si->address + info.offsetInSection + startOffset;
+ entry.length = info.atom->size() - startOffset;
+ entry.kind = mode;
+ file.dataInCode.push_back(entry);
+ }
+ }
+ }
+void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
+ NormalizedFile &nFile) {
+ if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
+ return;
+ uint8_t segmentIndex;
+ uint64_t segmentStartAddr;
+ uint32_t offsetInBindInfo = 0;
+ for (SectionInfo *sect : _sectionInfos) {
+ segIndexForSection(sect, segmentIndex, segmentStartAddr);
+ for (const AtomInfo &info : sect->atomsAndOffsets) {
+ const DefinedAtom *atom = info.atom;
+ for (const Reference *ref : *atom) {
+ uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom()
+ - segmentStartAddr;
+ const Atom* targ = ref->target();
+ if (_archHandler.isPointer(*ref)) {
+ // A pointer to a DefinedAtom requires rebasing.
+ if (isa<DefinedAtom>(targ)) {
+ RebaseLocation rebase;
+ rebase.segIndex = segmentIndex;
+ rebase.segOffset = segmentOffset;
+ rebase.kind = llvm::MachO::REBASE_TYPE_POINTER;
+ nFile.rebasingInfo.push_back(rebase);
+ }
+ // A pointer to an SharedLibraryAtom requires binding.
+ if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
+ BindLocation bind;
+ bind.segIndex = segmentIndex;
+ bind.segOffset = segmentOffset;
+ bind.kind = llvm::MachO::BIND_TYPE_POINTER;
+ bind.canBeNull = sa->canBeNullAtRuntime();
+ bind.ordinal = dylibOrdinal(sa);
+ bind.symbolName = targ->name();
+ bind.addend = ref->addend();
+ nFile.bindingInfo.push_back(bind);
+ }
+ }
+ else if (_archHandler.isLazyPointer(*ref)) {
+ BindLocation bind;
+ if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
+ bind.ordinal = dylibOrdinal(sa);
+ } else {
+ bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF;
+ }
+ bind.segIndex = segmentIndex;
+ bind.segOffset = segmentOffset;
+ bind.kind = llvm::MachO::BIND_TYPE_POINTER;
+ bind.canBeNull = false; //sa->canBeNullAtRuntime();
+ bind.symbolName = targ->name();
+ bind.addend = ref->addend();
+ nFile.lazyBindingInfo.push_back(bind);
+ // Now that we know the segmentOffset and the ordinal attribute,
+ // we can fix the helper's code
+ fixLazyReferenceImm(atom, offsetInBindInfo, nFile);
+ // 5 bytes for opcodes + variable sizes (target name + \0 and offset
+ // encode's size)
+ offsetInBindInfo +=
+ 6 + targ->name().size() + llvm::getULEB128Size(bind.segOffset);
+ if (bind.ordinal > BIND_IMMEDIATE_MASK)
+ offsetInBindInfo += llvm::getULEB128Size(bind.ordinal);
+ }
+ }
+ }
+ }
+void Util::fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset,
+ NormalizedFile &file) {
+ for (const auto &ref : *atom) {
+ const DefinedAtom *da = dyn_cast<DefinedAtom>(ref->target());
+ if (da == nullptr)
+ return;
+ const Reference *helperRef = nullptr;
+ for (const Reference *hr : *da) {
+ if (hr->kindValue() == _archHandler.lazyImmediateLocationKind()) {
+ helperRef = hr;
+ break;
+ }
+ }
+ if (helperRef == nullptr)
+ continue;
+ // TODO: maybe get the fixed atom content from _archHandler ?
+ for (SectionInfo *sectInfo : _sectionInfos) {
+ for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) {
+ if (atomInfo.atom == helperRef->target()) {
+ auto sectionContent =
+ file.sections[sectInfo->normalizedSectionIndex].content;
+ uint8_t *rawb =
+ file.ownedAllocations.Allocate<uint8_t>(sectionContent.size());
+ llvm::MutableArrayRef<uint8_t> newContent{rawb,
+ sectionContent.size()};
+ std::copy(sectionContent.begin(), sectionContent.end(),
+ newContent.begin());
+ llvm::support::ulittle32_t *loc =
+ reinterpret_cast<llvm::support::ulittle32_t *>(
+ &newContent[atomInfo.offsetInSection +
+ helperRef->offsetInAtom()]);
+ *loc = offset;
+ file.sections[sectInfo->normalizedSectionIndex].content = newContent;
+ }
+ }
+ }
+ }
+void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) {
+ if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
+ return;
+ for (SectionInfo *sect : _sectionInfos) {
+ for (const AtomInfo &info : sect->atomsAndOffsets) {
+ const DefinedAtom *atom = info.atom;
+ if (atom->scope() != Atom::scopeGlobal)
+ continue;
+ if (_ctx.exportRestrictMode()) {
+ if (!_ctx.exportSymbolNamed(atom->name()))
+ continue;
+ }
+ Export exprt;
+ exprt.name = atom->name();
+ exprt.offset = _atomToAddress[atom] - _ctx.baseAddress();
+ if (atom->merge() == DefinedAtom::mergeAsWeak)
+ else
+ exprt.flags = 0;
+ exprt.otherOffset = 0;
+ exprt.otherName = StringRef();
+ nFile.exportInfo.push_back(exprt);
+ }
+ }
+uint32_t Util::fileFlags() {
+ // FIXME: these need to determined at runtime.
+ if (_ctx.outputMachOType() == MH_OBJECT) {
+ return _subsectionsViaSymbols ? MH_SUBSECTIONS_VIA_SYMBOLS : 0;
+ } else {
+ uint32_t flags = MH_DYLDLINK;
+ if (!_ctx.useFlatNamespace())
+ if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE())
+ flags |= MH_PIE;
+ if (_hasTLVDescriptors)
+ return flags;
+ }
+} // end anonymous namespace
+namespace lld {
+namespace mach_o {
+namespace normalized {
+/// Convert a set of Atoms into a normalized mach-o file.
+normalizedFromAtoms(const lld::File &atomFile,
+ const MachOLinkingContext &context) {
+ // The util object buffers info until the normalized file can be made.
+ Util util(context);
+ util.processDefinedAtoms(atomFile);
+ util.organizeSections();
+ std::unique_ptr<NormalizedFile> f(new NormalizedFile());
+ NormalizedFile &normFile = *f.get();
+ normFile.arch = context.arch();
+ normFile.fileType = context.outputMachOType();
+ normFile.flags = util.fileFlags();
+ normFile.stackSize = context.stackSize();
+ normFile.installName = context.installName();
+ normFile.currentVersion = context.currentVersion();
+ normFile.compatVersion = context.compatibilityVersion();
+ normFile.os = context.os();
+ // If we are emitting an object file, then the min version is the maximum
+ // of the min's of all the source files and the cmdline.
+ if (normFile.fileType == llvm::MachO::MH_OBJECT)
+ normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion());
+ else
+ normFile.minOSverson = context.osMinVersion();
+ normFile.minOSVersionKind = util.minVersionCommandType();
+ normFile.sdkVersion = context.sdkVersion();
+ normFile.sourceVersion = context.sourceVersion();
+ if (context.generateVersionLoadCommand() &&
+ context.os() != MachOLinkingContext::OS::unknown)
+ normFile.hasMinVersionLoadCommand = true;
+ else if (normFile.fileType == llvm::MachO::MH_OBJECT &&
+ util.allSourceFilesHaveMinVersions() &&
+ ((normFile.os != MachOLinkingContext::OS::unknown) ||
+ util.minVersionCommandType())) {
+ // If we emit an object file, then it should contain a min version load
+ // command if all of the source files also contained min version commands.
+ // Also, we either need to have a platform, or found a platform from the
+ // source object files.
+ normFile.hasMinVersionLoadCommand = true;
+ }
+ normFile.generateDataInCodeLoadCommand =
+ context.generateDataInCodeLoadCommand();
+ normFile.pageSize = context.pageSize();
+ normFile.rpaths = context.rpaths();
+ util.addDependentDylibs(atomFile, normFile);
+ util.copySegmentInfo(normFile);
+ util.copySectionInfo(normFile);
+ util.assignAddressesToSections(normFile);
+ util.buildAtomToAddressMap();
+ if (auto err = util.synthesizeDebugNotes(normFile))
+ return std::move(err);
+ util.updateSectionInfo(normFile);
+ util.copySectionContent(normFile);
+ if (auto ec = util.addSymbols(atomFile, normFile)) {
+ return std::move(ec);
+ }
+ util.addIndirectSymbols(atomFile, normFile);
+ util.addRebaseAndBindingInfo(atomFile, normFile);
+ util.addExportInfo(atomFile, normFile);
+ util.addSectionRelocs(atomFile, normFile);
+ util.addFunctionStarts(atomFile, normFile);
+ util.buildDataInCodeArray(atomFile, normFile);
+ util.copyEntryPointAddress(normFile);
+ return std::move(f);
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
new file mode 100644
index 000000000000..473de894894e
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
@@ -0,0 +1,1635 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file Converts from in-memory normalized mach-o to in-memory Atoms.
+/// +------------+
+/// | normalized |
+/// +------------+
+/// |
+/// |
+/// v
+/// +-------+
+/// | Atoms |
+/// +-------+
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "File.h"
+#include "MachONormalizedFile.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+#define DEBUG_TYPE "normalized-file-to-atoms"
+namespace lld {
+namespace mach_o {
+namespace { // anonymous
+#define ENTRY(seg, sect, type, atomType) \
+ {seg, sect, type, DefinedAtom::atomType }
+struct MachORelocatableSectionToAtomType {
+ StringRef segmentName;
+ StringRef sectionName;
+ SectionType sectionType;
+ DefinedAtom::ContentType atomType;
+const MachORelocatableSectionToAtomType sectsToAtomType[] = {
+ ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
+ ENTRY("__TEXT", "__text", S_REGULAR, typeResolver),
+ ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
+ ENTRY("", "", S_CSTRING_LITERALS, typeCString),
+ ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
+ ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
+ ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant),
+ ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
+ ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI),
+ ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4),
+ ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8),
+ ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16),
+ ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
+ ENTRY("__DATA", "__data", S_REGULAR, typeData),
+ ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData),
+ ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
+ ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
+ ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
+ typeInitializerPtr),
+ ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
+ typeTerminatorPtr),
+ typeGOT),
+ ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill),
+ typeGOT),
+ ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples),
+ ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES,
+ typeThunkTLV),
+ ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
+ ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL,
+ typeTLVInitialZeroFill),
+ ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo),
+ ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList),
+ ENTRY("", "", S_INTERPOSING, typeInterposingTuples),
+ ENTRY("__LD", "__compact_unwind", S_REGULAR,
+ typeCompactUnwindInfo),
+ ENTRY("", "", S_REGULAR, typeUnknown)
+#undef ENTRY
+/// Figures out ContentType of a mach-o section.
+DefinedAtom::ContentType atomTypeFromSection(const Section &section,
+ bool &customSectionName) {
+ // First look for match of name and type. Empty names in table are wildcards.
+ customSectionName = false;
+ for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
+ p->atomType != DefinedAtom::typeUnknown; ++p) {
+ if (p->sectionType != section.type)
+ continue;
+ if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
+ continue;
+ if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
+ continue;
+ customSectionName = p->segmentName.empty() && p->sectionName.empty();
+ return p->atomType;
+ }
+ // Look for code denoted by section attributes
+ if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
+ return DefinedAtom::typeCode;
+ return DefinedAtom::typeUnknown;
+enum AtomizeModel {
+ atomizeAtSymbols,
+ atomizeFixedSize,
+ atomizePointerSize,
+ atomizeUTF8,
+ atomizeUTF16,
+ atomizeCFI,
+ atomizeCU,
+ atomizeCFString
+/// Returns info on how to atomize a section of the specified ContentType.
+void sectionParseInfo(DefinedAtom::ContentType atomType,
+ unsigned int &sizeMultiple,
+ DefinedAtom::Scope &scope,
+ DefinedAtom::Merge &merge,
+ AtomizeModel &atomizeModel) {
+ struct ParseInfo {
+ DefinedAtom::ContentType atomType;
+ unsigned int sizeMultiple;
+ DefinedAtom::Scope scope;
+ DefinedAtom::Merge merge;
+ AtomizeModel atomizeModel;
+ };
+ #define ENTRY(type, size, scope, merge, model) \
+ {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
+ static const ParseInfo parseInfo[] = {
+ ENTRY(typeCode, 1, scopeGlobal, mergeNo,
+ atomizeAtSymbols),
+ ENTRY(typeData, 1, scopeGlobal, mergeNo,
+ atomizeAtSymbols),
+ ENTRY(typeConstData, 1, scopeGlobal, mergeNo,
+ atomizeAtSymbols),
+ ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo,
+ atomizeAtSymbols),
+ ENTRY(typeConstant, 1, scopeGlobal, mergeNo,
+ atomizeAtSymbols),
+ ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent,
+ atomizeUTF8),
+ ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent,
+ atomizeUTF16),
+ ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo,
+ atomizeCFI),
+ ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent,
+ atomizeFixedSize),
+ ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent,
+ atomizeFixedSize),
+ ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent,
+ atomizeFixedSize),
+ ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent,
+ atomizeCFString),
+ ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo,
+ atomizePointerSize),
+ ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo,
+ atomizePointerSize),
+ ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
+ atomizeCU),
+ ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent,
+ atomizePointerSize),
+ ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
+ atomizePointerSize),
+ ENTRY(typeUnknown, 1, scopeGlobal, mergeNo,
+ atomizeAtSymbols)
+ };
+ #undef ENTRY
+ const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
+ for (int i=0; i < tableLen; ++i) {
+ if (parseInfo[i].atomType == atomType) {
+ sizeMultiple = parseInfo[i].sizeMultiple;
+ scope = parseInfo[i].scope;
+ merge = parseInfo[i].merge;
+ atomizeModel = parseInfo[i].atomizeModel;
+ return;
+ }
+ }
+ // Unknown type is atomized by symbols.
+ sizeMultiple = 1;
+ scope = DefinedAtom::scopeGlobal;
+ merge = DefinedAtom::mergeNo;
+ atomizeModel = atomizeAtSymbols;
+Atom::Scope atomScope(uint8_t scope) {
+ switch (scope) {
+ case N_EXT:
+ return Atom::scopeGlobal;
+ case N_PEXT:
+ case N_PEXT | N_EXT:
+ return Atom::scopeLinkageUnit;
+ case 0:
+ return Atom::scopeTranslationUnit;
+ }
+ llvm_unreachable("unknown scope value!");
+void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
+ uint32_t sectionIndex,
+ SmallVector<const Symbol *, 64> &outSyms) {
+ for (const Symbol &sym : inSymbols) {
+ // Only look at definition symbols.
+ if ((sym.type & N_TYPE) != N_SECT)
+ continue;
+ if (sym.sect != sectionIndex)
+ continue;
+ outSyms.push_back(&sym);
+ }
+void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
+ MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
+ uint16_t symbolDescFlags, Atom::Scope symbolScope,
+ uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
+ // Mach-O symbol table does have size in it. Instead the size is the
+ // difference between this and the next symbol.
+ uint64_t size = nextSymbolAddr - symbolAddr;
+ uint64_t offset = symbolAddr - section.address;
+ bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
+ if (isZeroFillSection(section.type)) {
+ file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
+ noDeadStrip, copyRefs, &section);
+ } else {
+ DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
+ ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
+ bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
+ if (atomType == DefinedAtom::typeUnknown) {
+ // Mach-O needs a segment and section name. Concatentate those two
+ // with a / separator (e.g. "seg/sect") to fit into the lld model
+ // of just a section name.
+ std::string segSectName = section.segmentName.str()
+ + "/" + section.sectionName.str();
+ file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
+ merge, thumb, noDeadStrip, offset,
+ size, segSectName, true, &section);
+ } else {
+ if ((atomType == lld::DefinedAtom::typeCode) &&
+ (symbolDescFlags & N_SYMBOL_RESOLVER)) {
+ atomType = lld::DefinedAtom::typeResolver;
+ }
+ file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
+ offset, size, thumb, noDeadStrip, copyRefs, &section);
+ }
+ }
+llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
+ const Section &section,
+ const NormalizedFile &normalizedFile,
+ MachOFile &file, bool scatterable,
+ bool copyRefs) {
+ // Find section's index.
+ uint32_t sectIndex = 1;
+ for (auto &sect : normalizedFile.sections) {
+ if (&sect == &section)
+ break;
+ ++sectIndex;
+ }
+ // Find all symbols in this section.
+ SmallVector<const Symbol *, 64> symbols;
+ appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
+ appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols);
+ // Sort symbols.
+ std::sort(symbols.begin(), symbols.end(),
+ [](const Symbol *lhs, const Symbol *rhs) -> bool {
+ if (lhs == rhs)
+ return false;
+ // First by address.
+ uint64_t lhsAddr = lhs->value;
+ uint64_t rhsAddr = rhs->value;
+ if (lhsAddr != rhsAddr)
+ return lhsAddr < rhsAddr;
+ // If same address, one is an alias so sort by scope.
+ Atom::Scope lScope = atomScope(lhs->scope);
+ Atom::Scope rScope = atomScope(rhs->scope);
+ if (lScope != rScope)
+ return lScope < rScope;
+ // If same address and scope, see if one might be better as
+ // the alias.
+ bool lPrivate = (lhs->name.front() == 'l');
+ bool rPrivate = (rhs->name.front() == 'l');
+ if (lPrivate != rPrivate)
+ return lPrivate;
+ // If same address and scope, sort by name.
+ return lhs->name < rhs->name;
+ });
+ // Debug logging of symbols.
+ //for (const Symbol *sym : symbols)
+ // llvm::errs() << " sym: "
+ // << llvm::format("0x%08llx ", (uint64_t)sym->value)
+ // << ", " << sym->name << "\n";
+ // If section has no symbols and no content, there are no atoms.
+ if (symbols.empty() && section.content.empty())
+ return llvm::Error::success();
+ if (symbols.empty()) {
+ // Section has no symbols, put all content in one anoymous atom.
+ atomFromSymbol(atomType, section, file, section.address, StringRef(),
+ 0, Atom::scopeTranslationUnit,
+ section.address + section.content.size(),
+ scatterable, copyRefs);
+ }
+ else if (symbols.front()->value != section.address) {
+ // Section has anonymous content before first symbol.
+ atomFromSymbol(atomType, section, file, section.address, StringRef(),
+ 0, Atom::scopeTranslationUnit, symbols.front()->value,
+ scatterable, copyRefs);
+ }
+ const Symbol *lastSym = nullptr;
+ for (const Symbol *sym : symbols) {
+ if (lastSym != nullptr) {
+ // Ignore any assembler added "ltmpNNN" symbol at start of section
+ // if there is another symbol at the start.
+ if ((lastSym->value != sym->value)
+ || lastSym->value != section.address
+ || !lastSym->name.startswith("ltmp")) {
+ atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
+ lastSym->desc, atomScope(lastSym->scope), sym->value,
+ scatterable, copyRefs);
+ }
+ }
+ lastSym = sym;
+ }
+ if (lastSym != nullptr) {
+ atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
+ lastSym->desc, atomScope(lastSym->scope),
+ section.address + section.content.size(),
+ scatterable, copyRefs);
+ }
+ // If object built without .subsections_via_symbols, add reference chain.
+ if (!scatterable) {
+ MachODefinedAtom *prevAtom = nullptr;
+ file.eachAtomInSection(section,
+ [&](MachODefinedAtom *atom, uint64_t offset)->void {
+ if (prevAtom)
+ prevAtom->addReference(Reference::KindNamespace::all,
+ Reference::KindArch::all,
+ Reference::kindLayoutAfter, 0, atom, 0);
+ prevAtom = atom;
+ });
+ }
+ return llvm::Error::success();
+llvm::Error processSection(DefinedAtom::ContentType atomType,
+ const Section &section,
+ bool customSectionName,
+ const NormalizedFile &normalizedFile,
+ MachOFile &file, bool scatterable,
+ bool copyRefs) {
+ const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ // Get info on how to atomize section.
+ unsigned int sizeMultiple;
+ DefinedAtom::Scope scope;
+ DefinedAtom::Merge merge;
+ AtomizeModel atomizeModel;
+ sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
+ // Validate section size.
+ if ((section.content.size() % sizeMultiple) != 0)
+ return llvm::make_error<GenericError>(Twine("Section ")
+ + section.segmentName
+ + "/" + section.sectionName
+ + " has size ("
+ + Twine(section.content.size())
+ + ") which is not a multiple of "
+ + Twine(sizeMultiple));
+ if (atomizeModel == atomizeAtSymbols) {
+ // Break section up into atoms each with a fixed size.
+ return processSymboledSection(atomType, section, normalizedFile, file,
+ scatterable, copyRefs);
+ } else {
+ unsigned int size;
+ for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
+ switch (atomizeModel) {
+ case atomizeFixedSize:
+ // Break section up into atoms each with a fixed size.
+ size = sizeMultiple;
+ break;
+ case atomizePointerSize:
+ // Break section up into atoms each the size of a pointer.
+ size = is64 ? 8 : 4;
+ break;
+ case atomizeUTF8:
+ // Break section up into zero terminated c-strings.
+ size = 0;
+ for (unsigned int i = offset; i < e; ++i) {
+ if (section.content[i] == 0) {
+ size = i + 1 - offset;
+ break;
+ }
+ }
+ break;
+ case atomizeUTF16:
+ // Break section up into zero terminated UTF16 strings.
+ size = 0;
+ for (unsigned int i = offset; i < e; i += 2) {
+ if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
+ size = i + 2 - offset;
+ break;
+ }
+ }
+ break;
+ case atomizeCFI:
+ // Break section up into dwarf unwind CFIs (FDE or CIE).
+ size = read32(&section.content[offset], isBig) + 4;
+ if (offset+size > section.content.size()) {
+ return llvm::make_error<GenericError>(Twine("Section ")
+ + section.segmentName
+ + "/" + section.sectionName
+ + " is malformed. Size of CFI "
+ "starting at offset ("
+ + Twine(offset)
+ + ") is past end of section.");
+ }
+ break;
+ case atomizeCU:
+ // Break section up into compact unwind entries.
+ size = is64 ? 32 : 20;
+ break;
+ case atomizeCFString:
+ // Break section up into NS/CFString objects.
+ size = is64 ? 32 : 16;
+ break;
+ case atomizeAtSymbols:
+ break;
+ }
+ if (size == 0) {
+ return llvm::make_error<GenericError>(Twine("Section ")
+ + section.segmentName
+ + "/" + section.sectionName
+ + " is malformed. The last atom "
+ "is not zero terminated.");
+ }
+ if (customSectionName) {
+ // Mach-O needs a segment and section name. Concatentate those two
+ // with a / separator (e.g. "seg/sect") to fit into the lld model
+ // of just a section name.
+ std::string segSectName = section.segmentName.str()
+ + "/" + section.sectionName.str();
+ file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
+ merge, false, false, offset,
+ size, segSectName, true, &section);
+ } else {
+ file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
+ false, false, copyRefs, &section);
+ }
+ offset += size;
+ }
+ }
+ return llvm::Error::success();
+const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
+ uint64_t address) {
+ for (const Section &s : normalizedFile.sections) {
+ uint64_t sAddr = s.address;
+ if ((sAddr <= address) && (address < sAddr+s.content.size())) {
+ return &s;
+ }
+ }
+ return nullptr;
+const MachODefinedAtom *
+findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
+ uint64_t addr, Reference::Addend &addend) {
+ const Section *sect = nullptr;
+ sect = findSectionCoveringAddress(normalizedFile, addr);
+ if (!sect)
+ return nullptr;
+ uint32_t offsetInTarget;
+ uint64_t offsetInSect = addr - sect->address;
+ auto atom =
+ file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
+ addend = offsetInTarget;
+ return atom;
+// Walks all relocations for a section in a normalized .o file and
+// creates corresponding lld::Reference objects.
+llvm::Error convertRelocs(const Section &section,
+ const NormalizedFile &normalizedFile,
+ bool scatterable,
+ MachOFile &file,
+ ArchHandler &handler) {
+ // Utility function for ArchHandler to find atom by its address.
+ auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
+ const lld::Atom **atom, Reference::Addend *addend)
+ -> llvm::Error {
+ if (sectIndex > normalizedFile.sections.size())
+ return llvm::make_error<GenericError>(Twine("out of range section "
+ "index (") + Twine(sectIndex) + ")");
+ const Section *sect = nullptr;
+ if (sectIndex == 0) {
+ sect = findSectionCoveringAddress(normalizedFile, addr);
+ if (!sect)
+ return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
+ + ") is not in any section"));
+ } else {
+ sect = &normalizedFile.sections[sectIndex-1];
+ }
+ uint32_t offsetInTarget;
+ uint64_t offsetInSect = addr - sect->address;
+ *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
+ *addend = offsetInTarget;
+ return llvm::Error::success();
+ };
+ // Utility function for ArchHandler to find atom by its symbol index.
+ auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
+ -> llvm::Error {
+ // Find symbol from index.
+ const Symbol *sym = nullptr;
+ uint32_t numStabs = normalizedFile.stabsSymbols.size();
+ uint32_t numLocal = normalizedFile.localSymbols.size();
+ uint32_t numGlobal = normalizedFile.globalSymbols.size();
+ uint32_t numUndef = normalizedFile.undefinedSymbols.size();
+ assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
+ if (symbolIndex < numStabs+numLocal) {
+ sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
+ } else if (symbolIndex < numStabs+numLocal+numGlobal) {
+ sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
+ } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
+ sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
+ numGlobal];
+ } else {
+ return llvm::make_error<GenericError>(Twine("symbol index (")
+ + Twine(symbolIndex) + ") out of range");
+ }
+ // Find atom from symbol.
+ if ((sym->type & N_TYPE) == N_SECT) {
+ if (sym->sect > normalizedFile.sections.size())
+ return llvm::make_error<GenericError>(Twine("symbol section index (")
+ + Twine(sym->sect) + ") out of range ");
+ const Section &symSection = normalizedFile.sections[sym->sect-1];
+ uint64_t targetOffsetInSect = sym->value - symSection.address;
+ MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
+ targetOffsetInSect);
+ if (target) {
+ *result = target;
+ return llvm::Error::success();
+ }
+ return llvm::make_error<GenericError>("no atom found for defined symbol");
+ } else if ((sym->type & N_TYPE) == N_UNDF) {
+ const lld::Atom *target = file.findUndefAtom(sym->name);
+ if (target) {
+ *result = target;
+ return llvm::Error::success();
+ }
+ return llvm::make_error<GenericError>("no undefined atom found for sym");
+ } else {
+ // Search undefs
+ return llvm::make_error<GenericError>("no atom found for symbol");
+ }
+ };
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ // Use old-school iterator so that paired relocations can be grouped.
+ for (auto it=section.relocations.begin(), e=section.relocations.end();
+ it != e; ++it) {
+ const Relocation &reloc = *it;
+ // Find atom this relocation is in.
+ if (reloc.offset > section.content.size())
+ return llvm::make_error<GenericError>(
+ Twine("r_address (") + Twine(reloc.offset)
+ + ") is larger than section size ("
+ + Twine(section.content.size()) + ")");
+ uint32_t offsetInAtom;
+ MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
+ reloc.offset,
+ &offsetInAtom);
+ assert(inAtom && "r_address in range, should have found atom");
+ uint64_t fixupAddress = section.address + reloc.offset;
+ const lld::Atom *target = nullptr;
+ Reference::Addend addend = 0;
+ Reference::KindValue kind;
+ if (handler.isPairedReloc(reloc)) {
+ // Handle paired relocations together.
+ const Relocation &reloc2 = *++it;
+ auto relocErr = handler.getPairReferenceInfo(
+ reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
+ atomByAddr, atomBySymbol, &kind, &target, &addend);
+ if (relocErr) {
+ return handleErrors(std::move(relocErr),
+ [&](std::unique_ptr<GenericError> GE) {
+ return llvm::make_error<GenericError>(
+ Twine("bad relocation (") + GE->getMessage()
+ + ") in section "
+ + section.segmentName + "/" + section.sectionName
+ + " (r1_address=" + Twine::utohexstr(reloc.offset)
+ + ", r1_type=" + Twine(reloc.type)
+ + ", r1_extern=" + Twine(reloc.isExtern)
+ + ", r1_length=" + Twine((int)reloc.length)
+ + ", r1_pcrel=" + Twine(reloc.pcRel)
+ + (!reloc.scattered ? (Twine(", r1_symbolnum=")
+ + Twine(reloc.symbol))
+ : (Twine(", r1_scattered=1, r1_value=")
+ + Twine(reloc.value)))
+ + ")"
+ + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
+ + ", r2_type=" + Twine(reloc2.type)
+ + ", r2_extern=" + Twine(reloc2.isExtern)
+ + ", r2_length=" + Twine((int)reloc2.length)
+ + ", r2_pcrel=" + Twine(reloc2.pcRel)
+ + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
+ + Twine(reloc2.symbol))
+ : (Twine(", r2_scattered=1, r2_value=")
+ + Twine(reloc2.value)))
+ + ")" );
+ });
+ }
+ }
+ else {
+ // Use ArchHandler to convert relocation record into information
+ // needed to instantiate an lld::Reference object.
+ auto relocErr = handler.getReferenceInfo(
+ reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
+ atomBySymbol, &kind, &target, &addend);
+ if (relocErr) {
+ return handleErrors(std::move(relocErr),
+ [&](std::unique_ptr<GenericError> GE) {
+ return llvm::make_error<GenericError>(
+ Twine("bad relocation (") + GE->getMessage()
+ + ") in section "
+ + section.segmentName + "/" + section.sectionName
+ + " (r_address=" + Twine::utohexstr(reloc.offset)
+ + ", r_type=" + Twine(reloc.type)
+ + ", r_extern=" + Twine(reloc.isExtern)
+ + ", r_length=" + Twine((int)reloc.length)
+ + ", r_pcrel=" + Twine(reloc.pcRel)
+ + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
+ : (Twine(", r_scattered=1, r_value=")
+ + Twine(reloc.value)))
+ + ")" );
+ });
+ }
+ }
+ // Instantiate an lld::Reference object and add to its atom.
+ inAtom->addReference(Reference::KindNamespace::mach_o,
+ handler.kindArch(),
+ kind, offsetInAtom, target, addend);
+ }
+ return llvm::Error::success();
+bool isDebugInfoSection(const Section &section) {
+ if ((section.attributes & S_ATTR_DEBUG) == 0)
+ return false;
+ return section.segmentName.equals("__DWARF");
+static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
+ std::string strName = name.str();
+ for (auto *atom : file.defined())
+ if (atom->name() == strName)
+ return atom;
+ return nullptr;
+static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
+ char *strCopy = alloc.Allocate<char>(str.size() + 1);
+ memcpy(strCopy, str.data(), str.size());
+ strCopy[str.size()] = '\0';
+ return strCopy;
+llvm::Error parseStabs(MachOFile &file,
+ const NormalizedFile &normalizedFile,
+ bool copyRefs) {
+ if (normalizedFile.stabsSymbols.empty())
+ return llvm::Error::success();
+ // FIXME: Kill this off when we can move to sane yaml parsing.
+ std::unique_ptr<BumpPtrAllocator> allocator;
+ if (copyRefs)
+ allocator = llvm::make_unique<BumpPtrAllocator>();
+ enum { start, inBeginEnd } state = start;
+ const Atom *currentAtom = nullptr;
+ uint64_t currentAtomAddress = 0;
+ StabsDebugInfo::StabsList stabsList;
+ for (const auto &stabSym : normalizedFile.stabsSymbols) {
+ Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
+ stabSym.value, stabSym.name);
+ switch (state) {
+ case start:
+ switch (static_cast<StabType>(stabSym.type)) {
+ case N_BNSYM:
+ state = inBeginEnd;
+ currentAtomAddress = stabSym.value;
+ Reference::Addend addend;
+ currentAtom = findAtomCoveringAddress(normalizedFile, file,
+ currentAtomAddress, addend);
+ if (addend != 0)
+ return llvm::make_error<GenericError>(
+ "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
+ file.path());
+ if (currentAtom)
+ stab.atom = currentAtom;
+ else {
+ // FIXME: ld64 just issues a warning here - should we match that?
+ return llvm::make_error<GenericError>(
+ "can't find atom for stabs BNSYM at " +
+ Twine::utohexstr(stabSym.value) + " in " + file.path());
+ }
+ break;
+ case N_SO:
+ case N_OSO:
+ // Not associated with an atom, just copy.
+ if (copyRefs)
+ stab.str = copyDebugString(stabSym.name, *allocator);
+ else
+ stab.str = stabSym.name;
+ break;
+ case N_GSYM: {
+ auto colonIdx = stabSym.name.find(':');
+ if (colonIdx != StringRef::npos) {
+ StringRef name = stabSym.name.substr(0, colonIdx);
+ currentAtom = findDefinedAtomByName(file, "_" + name);
+ stab.atom = currentAtom;
+ if (copyRefs)
+ stab.str = copyDebugString(stabSym.name, *allocator);
+ else
+ stab.str = stabSym.name;
+ } else {
+ currentAtom = findDefinedAtomByName(file, stabSym.name);
+ stab.atom = currentAtom;
+ if (copyRefs)
+ stab.str = copyDebugString(stabSym.name, *allocator);
+ else
+ stab.str = stabSym.name;
+ }
+ if (stab.atom == nullptr)
+ return llvm::make_error<GenericError>(
+ "can't find atom for N_GSYM stabs" + stabSym.name +
+ " in " + file.path());
+ break;
+ }
+ case N_FUN:
+ return llvm::make_error<GenericError>(
+ "old-style N_FUN stab '" + stabSym.name + "' unsupported");
+ default:
+ return llvm::make_error<GenericError>(
+ "unrecognized stab symbol '" + stabSym.name + "'");
+ }
+ break;
+ case inBeginEnd:
+ stab.atom = currentAtom;
+ switch (static_cast<StabType>(stabSym.type)) {
+ case N_ENSYM:
+ state = start;
+ currentAtom = nullptr;
+ break;
+ case N_FUN:
+ // Just copy the string.
+ if (copyRefs)
+ stab.str = copyDebugString(stabSym.name, *allocator);
+ else
+ stab.str = stabSym.name;
+ break;
+ default:
+ return llvm::make_error<GenericError>(
+ "unrecognized stab symbol '" + stabSym.name + "'");
+ }
+ }
+ llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
+ stabsList.push_back(stab);
+ }
+ file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
+ // FIXME: Kill this off when we fix YAML memory ownership.
+ file.debugInfo()->setAllocator(std::move(allocator));
+ return llvm::Error::success();
+static llvm::DataExtractor
+dataExtractorFromSection(const NormalizedFile &normalizedFile,
+ const Section &S) {
+ const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
+ S.content.size());
+ return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
+// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+// inspection" code if possible.
+static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
+ uint64_t abbrCode) {
+ uint64_t curCode;
+ uint32_t offset = 0;
+ while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
+ // Tag
+ abbrevData.getULEB128(&offset);
+ abbrevData.getU8(&offset);
+ // Attributes
+ while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
+ ;
+ }
+ return offset;
+// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+// inspection" code if possible.
+static Expected<const char *>
+getIndexedString(const NormalizedFile &normalizedFile,
+ llvm::dwarf::Form form, llvm::DataExtractor infoData,
+ uint32_t &infoOffset, const Section &stringsSection) {
+ if (form == llvm::dwarf::DW_FORM_string)
+ return infoData.getCStr(&infoOffset);
+ if (form != llvm::dwarf::DW_FORM_strp)
+ return llvm::make_error<GenericError>(
+ "string field encoded without DW_FORM_strp");
+ uint32_t stringOffset = infoData.getU32(&infoOffset);
+ llvm::DataExtractor stringsData =
+ dataExtractorFromSection(normalizedFile, stringsSection);
+ return stringsData.getCStr(&stringOffset);
+// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+// inspection" code if possible.
+static llvm::Expected<TranslationUnitSource>
+readCompUnit(const NormalizedFile &normalizedFile,
+ const Section &info,
+ const Section &abbrev,
+ const Section &strings,
+ StringRef path) {
+ // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+ // inspection" code if possible.
+ uint32_t offset = 0;
+ llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
+ auto infoData = dataExtractorFromSection(normalizedFile, info);
+ uint32_t length = infoData.getU32(&offset);
+ if (length == 0xffffffff) {
+ Format = llvm::dwarf::DwarfFormat::DWARF64;
+ infoData.getU64(&offset);
+ }
+ else if (length > 0xffffff00)
+ return llvm::make_error<GenericError>("Malformed DWARF in " + path);
+ uint16_t version = infoData.getU16(&offset);
+ if (version < 2 || version > 4)
+ return llvm::make_error<GenericError>("Unsupported DWARF version in " +
+ path);
+ infoData.getU32(&offset); // Abbrev offset (should be zero)
+ uint8_t addrSize = infoData.getU8(&offset);
+ uint32_t abbrCode = infoData.getULEB128(&offset);
+ auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
+ uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
+ uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
+ if (tag != llvm::dwarf::DW_TAG_compile_unit)
+ return llvm::make_error<GenericError>("top level DIE is not a compile unit");
+ abbrevData.getU8(&abbrevOffset);
+ uint32_t name;
+ llvm::dwarf::Form form;
+ llvm::dwarf::FormParams formParams = {version, addrSize, Format};
+ TranslationUnitSource tu;
+ while ((name = abbrevData.getULEB128(&abbrevOffset)) |
+ (form = static_cast<llvm::dwarf::Form>(
+ abbrevData.getULEB128(&abbrevOffset))) &&
+ (name != 0 || form != 0)) {
+ switch (name) {
+ case llvm::dwarf::DW_AT_name: {
+ if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
+ strings))
+ tu.name = *eName;
+ else
+ return eName.takeError();
+ break;
+ }
+ case llvm::dwarf::DW_AT_comp_dir: {
+ if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
+ strings))
+ tu.path = *eName;
+ else
+ return eName.takeError();
+ break;
+ }
+ default:
+ llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
+ }
+ }
+ return tu;
+llvm::Error parseDebugInfo(MachOFile &file,
+ const NormalizedFile &normalizedFile, bool copyRefs) {
+ // Find the interesting debug info sections.
+ const Section *debugInfo = nullptr;
+ const Section *debugAbbrev = nullptr;
+ const Section *debugStrings = nullptr;
+ for (auto &s : normalizedFile.sections) {
+ if (s.segmentName == "__DWARF") {
+ if (s.sectionName == "__debug_info")
+ debugInfo = &s;
+ else if (s.sectionName == "__debug_abbrev")
+ debugAbbrev = &s;
+ else if (s.sectionName == "__debug_str")
+ debugStrings = &s;
+ }
+ }
+ if (!debugInfo)
+ return parseStabs(file, normalizedFile, copyRefs);
+ if (debugInfo->content.size() == 0)
+ return llvm::Error::success();
+ if (debugInfo->content.size() < 12)
+ return llvm::make_error<GenericError>("Malformed __debug_info section in " +
+ file.path() + ": too small");
+ if (!debugAbbrev)
+ return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
+ file.path());
+ if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
+ *debugStrings, file.path())) {
+ // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
+ // memory ownership.
+ std::unique_ptr<BumpPtrAllocator> allocator;
+ if (copyRefs) {
+ allocator = llvm::make_unique<BumpPtrAllocator>();
+ tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
+ tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
+ }
+ file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
+ if (copyRefs)
+ file.debugInfo()->setAllocator(std::move(allocator));
+ } else
+ return tuOrErr.takeError();
+ return llvm::Error::success();
+static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
+ if (is64)
+ return read64(addr, isBig);
+ int32_t res = read32(addr, isBig);
+ return res;
+/// --- Augmentation String Processing ---
+struct CIEInfo {
+ bool _augmentationDataPresent = false;
+ bool _mayHaveEH = false;
+ uint32_t _offsetOfLSDA = ~0U;
+ uint32_t _offsetOfPersonality = ~0U;
+ uint32_t _offsetOfFDEPointerEncoding = ~0U;
+ uint32_t _augmentationDataLength = ~0U;
+typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
+static llvm::Error processAugmentationString(const uint8_t *augStr,
+ CIEInfo &cieInfo,
+ unsigned &len) {
+ if (augStr[0] == '\0') {
+ len = 1;
+ return llvm::Error::success();
+ }
+ if (augStr[0] != 'z')
+ return llvm::make_error<GenericError>("expected 'z' at start of "
+ "augmentation string");
+ cieInfo._augmentationDataPresent = true;
+ uint64_t idx = 1;
+ uint32_t offsetInAugmentationData = 0;
+ while (augStr[idx] != '\0') {
+ if (augStr[idx] == 'L') {
+ cieInfo._offsetOfLSDA = offsetInAugmentationData;
+ // This adds a single byte to the augmentation data.
+ ++offsetInAugmentationData;
+ ++idx;
+ continue;
+ }
+ if (augStr[idx] == 'P') {
+ cieInfo._offsetOfPersonality = offsetInAugmentationData;
+ // This adds a single byte to the augmentation data for the encoding,
+ // then a number of bytes for the pointer data.
+ // FIXME: We are assuming 4 is correct here for the pointer size as we
+ // always currently use delta32ToGOT.
+ offsetInAugmentationData += 5;
+ ++idx;
+ continue;
+ }
+ if (augStr[idx] == 'R') {
+ cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
+ // This adds a single byte to the augmentation data.
+ ++offsetInAugmentationData;
+ ++idx;
+ continue;
+ }
+ if (augStr[idx] == 'e') {
+ if (augStr[idx + 1] != 'h')
+ return llvm::make_error<GenericError>("expected 'eh' in "
+ "augmentation string");
+ cieInfo._mayHaveEH = true;
+ idx += 2;
+ continue;
+ }
+ ++idx;
+ }
+ cieInfo._augmentationDataLength = offsetInAugmentationData;
+ len = idx + 1;
+ return llvm::Error::success();
+static llvm::Error processCIE(const NormalizedFile &normalizedFile,
+ MachOFile &file,
+ mach_o::ArchHandler &handler,
+ const Section *ehFrameSection,
+ MachODefinedAtom *atom,
+ uint64_t offset,
+ CIEInfoMap &cieInfos) {
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ const uint8_t *frameData = atom->rawContent().data();
+ CIEInfo cieInfo;
+ uint32_t size = read32(frameData, isBig);
+ uint64_t cieIDField = size == 0xffffffffU
+ ? sizeof(uint32_t) + sizeof(uint64_t)
+ : sizeof(uint32_t);
+ uint64_t versionField = cieIDField + sizeof(uint32_t);
+ uint64_t augmentationStringField = versionField + sizeof(uint8_t);
+ unsigned augmentationStringLength = 0;
+ if (auto err = processAugmentationString(frameData + augmentationStringField,
+ cieInfo, augmentationStringLength))
+ return err;
+ if (cieInfo._offsetOfPersonality != ~0U) {
+ // If we have augmentation data for the personality function, then we may
+ // need to implicitly generate its relocation.
+ // Parse the EH Data field which is pointer sized.
+ uint64_t EHDataField = augmentationStringField + augmentationStringLength;
+ const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+ unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
+ // Parse Code Align Factor which is a ULEB128.
+ uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
+ unsigned lengthFieldSize = 0;
+ llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
+ // Parse Data Align Factor which is a SLEB128.
+ uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
+ llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
+ // Parse Return Address Register which is a byte.
+ uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
+ // Parse the augmentation length which is a ULEB128.
+ uint64_t AugmentationLengthField = ReturnAddressField + 1;
+ uint64_t AugmentationLength =
+ llvm::decodeULEB128(frameData + AugmentationLengthField,
+ &lengthFieldSize);
+ if (AugmentationLength != cieInfo._augmentationDataLength)
+ return llvm::make_error<GenericError>("CIE augmentation data length "
+ "mismatch");
+ // Get the start address of the augmentation data.
+ uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
+ // Parse the personality function from the augmentation data.
+ uint64_t PersonalityField =
+ AugmentationDataField + cieInfo._offsetOfPersonality;
+ // Parse the personality encoding.
+ // FIXME: Verify that this is a 32-bit pcrel offset.
+ uint64_t PersonalityFunctionField = PersonalityField + 1;
+ if (atom->begin() != atom->end()) {
+ // If we have an explicit relocation, then make sure it matches this
+ // offset as this is where we'd expect it to be applied to.
+ DefinedAtom::reference_iterator CurrentRef = atom->begin();
+ if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
+ return llvm::make_error<GenericError>("CIE personality reloc at "
+ "wrong offset");
+ if (++CurrentRef != atom->end())
+ return llvm::make_error<GenericError>("CIE contains too many relocs");
+ } else {
+ // Implicitly generate the personality function reloc. It's assumed to
+ // be a delta32 offset to a GOT entry.
+ // FIXME: Parse the encoding and check this.
+ int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
+ uint64_t funcAddress = ehFrameSection->address + offset +
+ PersonalityFunctionField;
+ funcAddress += funcDelta;
+ const MachODefinedAtom *func = nullptr;
+ Reference::Addend addend;
+ func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
+ addend);
+ atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
+ handler.unwindRefToPersonalityFunctionKind(),
+ PersonalityFunctionField, func, addend);
+ }
+ } else if (atom->begin() != atom->end()) {
+ // Otherwise, we expect there to be no relocations in this atom as the only
+ // relocation would have been to the personality function.
+ return llvm::make_error<GenericError>("unexpected relocation in CIE");
+ }
+ cieInfos[atom] = std::move(cieInfo);
+ return llvm::Error::success();
+static llvm::Error processFDE(const NormalizedFile &normalizedFile,
+ MachOFile &file,
+ mach_o::ArchHandler &handler,
+ const Section *ehFrameSection,
+ MachODefinedAtom *atom,
+ uint64_t offset,
+ const CIEInfoMap &cieInfos) {
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+ // Compiler wasn't lazy and actually told us what it meant.
+ // Unfortunately, the compiler may not have generated references for all of
+ // [cie, func, lsda] and so we still need to parse the FDE and add references
+ // for any the compiler didn't generate.
+ if (atom->begin() != atom->end())
+ atom->sortReferences();
+ DefinedAtom::reference_iterator CurrentRef = atom->begin();
+ // This helper returns the reference (if one exists) at the offset we are
+ // currently processing. It automatically increments the ref iterator if we
+ // do return a ref, and throws an error if we pass over a ref without
+ // comsuming it.
+ auto currentRefGetter = [&CurrentRef,
+ &atom](uint64_t Offset)->const Reference* {
+ // If there are no more refs found, then we are done.
+ if (CurrentRef == atom->end())
+ return nullptr;
+ const Reference *Ref = *CurrentRef;
+ // If we haven't reached the offset for this reference, then return that
+ // we don't yet have a reference to process.
+ if (Offset < Ref->offsetInAtom())
+ return nullptr;
+ // If the offset is equal, then we want to process this ref.
+ if (Offset == Ref->offsetInAtom()) {
+ ++CurrentRef;
+ return Ref;
+ }
+ // The current ref is at an offset which is earlier than the current
+ // offset, then we failed to consume it when we should have. In this case
+ // throw an error.
+ llvm::report_fatal_error("Skipped reference when processing FDE");
+ };
+ // Helper to either get the reference at this current location, and verify
+ // that it is of the expected type, or add a reference of that type.
+ // Returns the reference target.
+ auto verifyOrAddReference = [&](uint64_t targetAddress,
+ Reference::KindValue refKind,
+ uint64_t refAddress,
+ bool allowsAddend)->const Atom* {
+ if (auto *ref = currentRefGetter(refAddress)) {
+ // The compiler already emitted a relocation for the CIE ref. This should
+ // have been converted to the correct type of reference in
+ // get[Pair]ReferenceInfo().
+ assert(ref->kindValue() == refKind &&
+ "Incorrect EHFrame reference kind");
+ return ref->target();
+ }
+ Reference::Addend addend;
+ auto *target = findAtomCoveringAddress(normalizedFile, file,
+ targetAddress, addend);
+ atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
+ refKind, refAddress, target, addend);
+ if (!allowsAddend)
+ assert(!addend && "EHFrame reference cannot have addend");
+ return target;
+ };
+ const uint8_t *startFrameData = atom->rawContent().data();
+ const uint8_t *frameData = startFrameData;
+ uint32_t size = read32(frameData, isBig);
+ uint64_t cieFieldInFDE = size == 0xffffffffU
+ ? sizeof(uint32_t) + sizeof(uint64_t)
+ : sizeof(uint32_t);
+ // Linker needs to fixup a reference from the FDE to its parent CIE (a
+ // 32-bit byte offset backwards in the __eh_frame section).
+ uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
+ uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
+ cieAddress -= cieDelta;
+ auto *cieRefTarget = verifyOrAddReference(cieAddress,
+ handler.unwindRefToCIEKind(),
+ cieFieldInFDE, false);
+ const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
+ assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
+ "FDE's CIE field does not point at the start of a CIE.");
+ const CIEInfo &cieInfo = cieInfos.find(cie)->second;
+ // Linker needs to fixup reference from the FDE to the function it's
+ // describing. FIXME: there are actually different ways to do this, and the
+ // particular method used is specified in the CIE's augmentation fields
+ // (hopefully)
+ uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
+ int64_t functionFromFDE = readSPtr(is64, isBig,
+ frameData + rangeFieldInFDE);
+ uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
+ rangeStart += functionFromFDE;
+ verifyOrAddReference(rangeStart,
+ handler.unwindRefToFunctionKind(),
+ rangeFieldInFDE, true);
+ // Handle the augmentation data if there is any.
+ if (cieInfo._augmentationDataPresent) {
+ // First process the augmentation data length field.
+ uint64_t augmentationDataLengthFieldInFDE =
+ rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
+ unsigned lengthFieldSize = 0;
+ uint64_t augmentationDataLength =
+ llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
+ &lengthFieldSize);
+ if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
+ // Look at the augmentation data field.
+ uint64_t augmentationDataFieldInFDE =
+ augmentationDataLengthFieldInFDE + lengthFieldSize;
+ int64_t lsdaFromFDE = readSPtr(is64, isBig,
+ frameData + augmentationDataFieldInFDE);
+ uint64_t lsdaStart =
+ ehFrameSection->address + offset + augmentationDataFieldInFDE +
+ lsdaFromFDE;
+ verifyOrAddReference(lsdaStart,
+ handler.unwindRefToFunctionKind(),
+ augmentationDataFieldInFDE, true);
+ }
+ }
+ return llvm::Error::success();
+llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
+ MachOFile &file,
+ mach_o::ArchHandler &handler) {
+ const Section *ehFrameSection = nullptr;
+ for (auto &section : normalizedFile.sections)
+ if (section.segmentName == "__TEXT" &&
+ section.sectionName == "__eh_frame") {
+ ehFrameSection = &section;
+ break;
+ }
+ // No __eh_frame so nothing to do.
+ if (!ehFrameSection)
+ return llvm::Error::success();
+ llvm::Error ehFrameErr = llvm::Error::success();
+ CIEInfoMap cieInfos;
+ file.eachAtomInSection(*ehFrameSection,
+ [&](MachODefinedAtom *atom, uint64_t offset) -> void {
+ assert(atom->contentType() == DefinedAtom::typeCFI);
+ // Bail out if we've encountered an error.
+ if (ehFrameErr)
+ return;
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ if (ArchHandler::isDwarfCIE(isBig, atom))
+ ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
+ atom, offset, cieInfos);
+ else
+ ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
+ atom, offset, cieInfos);
+ });
+ return ehFrameErr;
+llvm::Error parseObjCImageInfo(const Section &sect,
+ const NormalizedFile &normalizedFile,
+ MachOFile &file) {
+ // struct objc_image_info {
+ // uint32_t version; // initially 0
+ // uint32_t flags;
+ // };
+ ArrayRef<uint8_t> content = sect.content;
+ if (content.size() != 8)
+ return llvm::make_error<GenericError>(sect.segmentName + "/" +
+ sect.sectionName +
+ " in file " + file.path() +
+ " should be 8 bytes in size");
+ const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+ uint32_t version = read32(content.data(), isBig);
+ if (version)
+ return llvm::make_error<GenericError>(sect.segmentName + "/" +
+ sect.sectionName +
+ " in file " + file.path() +
+ " should have version=0");
+ uint32_t flags = read32(content.data() + 4, isBig);
+ if (flags & (MachOLinkingContext::objc_supports_gc |
+ MachOLinkingContext::objc_gc_only))
+ return llvm::make_error<GenericError>(sect.segmentName + "/" +
+ sect.sectionName +
+ " in file " + file.path() +
+ " uses GC. This is not supported");
+ if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
+ file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
+ else
+ file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
+ file.setSwiftVersion((flags >> 8) & 0xFF);
+ return llvm::Error::success();
+/// Converts normalized mach-o file into an lld::File and lld::Atoms.
+objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+ bool copyRefs) {
+ std::unique_ptr<MachOFile> file(new MachOFile(path));
+ if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
+ return std::move(ec);
+ return std::unique_ptr<File>(std::move(file));
+dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+ bool copyRefs) {
+ // Instantiate SharedLibraryFile object.
+ std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
+ if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
+ return std::move(ec);
+ return std::unique_ptr<File>(std::move(file));
+} // anonymous namespace
+namespace normalized {
+static bool isObjCImageInfo(const Section &sect) {
+ return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
+ (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
+normalizedObjectToAtoms(MachOFile *file,
+ const NormalizedFile &normalizedFile,
+ bool copyRefs) {
+ LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
+ << file->path() << "\n");
+ bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
+ // Create atoms from each section.
+ for (auto &sect : normalizedFile.sections) {
+ // If this is a debug-info section parse it specially.
+ if (isDebugInfoSection(sect))
+ continue;
+ // If the file contains an objc_image_info struct, then we should parse the
+ // ObjC flags and Swift version.
+ if (isObjCImageInfo(sect)) {
+ if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
+ return ec;
+ // We then skip adding atoms for this section as we use the ObjCPass to
+ // re-emit this data after it has been aggregated for all files.
+ continue;
+ }
+ bool customSectionName;
+ DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
+ customSectionName);
+ if (auto ec = processSection(atomType, sect, customSectionName,
+ normalizedFile, *file, scatterable, copyRefs))
+ return ec;
+ }
+ // Create atoms from undefined symbols.
+ for (auto &sym : normalizedFile.undefinedSymbols) {
+ // Undefinded symbols with n_value != 0 are actually tentative definitions.
+ if (sym.value == Hex64(0)) {
+ file->addUndefinedAtom(sym.name, copyRefs);
+ } else {
+ file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
+ DefinedAtom::Alignment(1 << (sym.desc >> 8)),
+ copyRefs);
+ }
+ }
+ // Convert mach-o relocations to References
+ std::unique_ptr<mach_o::ArchHandler> handler
+ = ArchHandler::create(normalizedFile.arch);
+ for (auto &sect : normalizedFile.sections) {
+ if (isDebugInfoSection(sect))
+ continue;
+ if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
+ *file, *handler))
+ return ec;
+ }
+ // Add additional arch-specific References
+ file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
+ handler->addAdditionalReferences(*atom);
+ });
+ // Each __eh_frame section needs references to both __text (the function we're
+ // providing unwind info for) and itself (FDE -> CIE). These aren't
+ // represented in the relocations on some architectures, so we have to add
+ // them back in manually there.
+ if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
+ return ec;
+ // Process mach-o data-in-code regions array. That information is encoded in
+ // atoms as References at each transition point.
+ unsigned nextIndex = 0;
+ for (const DataInCode &entry : normalizedFile.dataInCode) {
+ ++nextIndex;
+ const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
+ if (!s) {
+ return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
+ + Twine(entry.offset)
+ + ") is not in any section"));
+ }
+ uint64_t offsetInSect = entry.offset - s->address;
+ uint32_t offsetInAtom;
+ MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
+ &offsetInAtom);
+ if (offsetInAtom + entry.length > atom->size()) {
+ return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
+ "(offset="
+ + Twine(entry.offset)
+ + ", length="
+ + Twine(entry.length)
+ + ") crosses atom boundary."));
+ }
+ // Add reference that marks start of data-in-code.
+ atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
+ handler->dataInCodeTransitionStart(*atom),
+ offsetInAtom, atom, entry.kind);
+ // Peek at next entry, if it starts where this one ends, skip ending ref.
+ if (nextIndex < normalizedFile.dataInCode.size()) {
+ const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
+ if (nextEntry.offset == (entry.offset + entry.length))
+ continue;
+ }
+ // If data goes to end of function, skip ending ref.
+ if ((offsetInAtom + entry.length) == atom->size())
+ continue;
+ // Add reference that marks end of data-in-code.
+ atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
+ handler->dataInCodeTransitionEnd(*atom),
+ offsetInAtom+entry.length, atom, 0);
+ }
+ // Cache some attributes on the file for use later.
+ file->setFlags(normalizedFile.flags);
+ file->setArch(normalizedFile.arch);
+ file->setOS(normalizedFile.os);
+ file->setMinVersion(normalizedFile.minOSverson);
+ file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
+ // Sort references in each atom to their canonical order.
+ for (const DefinedAtom* defAtom : file->defined()) {
+ reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
+ }
+ if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
+ return err;
+ return llvm::Error::success();
+normalizedDylibToAtoms(MachODylibFile *file,
+ const NormalizedFile &normalizedFile,
+ bool copyRefs) {
+ file->setInstallName(normalizedFile.installName);
+ file->setCompatVersion(normalizedFile.compatVersion);
+ file->setCurrentVersion(normalizedFile.currentVersion);
+ // Tell MachODylibFile object about all symbols it exports.
+ if (!normalizedFile.exportInfo.empty()) {
+ // If exports trie exists, use it instead of traditional symbol table.
+ for (const Export &exp : normalizedFile.exportInfo) {
+ bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
+ // StringRefs from export iterator are ephemeral, so force copy.
+ file->addExportedSymbol(exp.name, weakDef, true);
+ }
+ } else {
+ for (auto &sym : normalizedFile.globalSymbols) {
+ assert((sym.scope & N_EXT) && "only expect external symbols here");
+ bool weakDef = (sym.desc & N_WEAK_DEF);
+ file->addExportedSymbol(sym.name, weakDef, copyRefs);
+ }
+ }
+ // Tell MachODylibFile object about all dylibs it re-exports.
+ for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
+ if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
+ file->addReExportedDylib(dep.path);
+ }
+ return llvm::Error::success();
+void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
+ StringRef &segmentName,
+ StringRef &sectionName,
+ SectionType &sectionType,
+ SectionAttr &sectionAttrs,
+ bool &relocsToDefinedCanBeImplicit) {
+ for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
+ p->atomType != DefinedAtom::typeUnknown; ++p) {
+ if (p->atomType != atomType)
+ continue;
+ // Wild carded entries are ignored for reverse lookups.
+ if (p->segmentName.empty() || p->sectionName.empty())
+ continue;
+ segmentName = p->segmentName;
+ sectionName = p->sectionName;
+ sectionType = p->sectionType;
+ sectionAttrs = 0;
+ relocsToDefinedCanBeImplicit = false;
+ if (atomType == DefinedAtom::typeCode)
+ if (atomType == DefinedAtom::typeCFI)
+ relocsToDefinedCanBeImplicit = true;
+ return;
+ }
+ llvm_unreachable("content type not yet supported");
+normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+ bool copyRefs) {
+ switch (normalizedFile.fileType) {
+ case MH_DYLIB:
+ return dylibToAtoms(normalizedFile, path, copyRefs);
+ case MH_OBJECT:
+ return objectToAtoms(normalizedFile, path, copyRefs);
+ default:
+ llvm_unreachable("unhandled MachO file type!");
+ }
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp
new file mode 100644
index 000000000000..92a646dab5e0
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp
@@ -0,0 +1,843 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file For mach-o object files, this implementation uses YAML I/O to
+/// provide the convert between YAML and the normalized mach-o (NM).
+/// +------------+ +------+
+/// | normalized | <-> | yaml |
+/// +------------+ +------+
+#include "MachONormalizedFile.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "lld/ReaderWriter/YamlContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <system_error>
+using llvm::StringRef;
+using namespace llvm::yaml;
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+using lld::YamlContext;
+// for compatibility with gcc-4.7 in C++11 mode, add extra namespace
+namespace llvm {
+namespace yaml {
+// A vector of Sections is a sequence.
+struct SequenceTraits< std::vector<Section> > {
+ static size_t size(IO &io, std::vector<Section> &seq) {
+ return seq.size();
+ }
+ static Section& element(IO &io, std::vector<Section> &seq, size_t index) {
+ if ( index >= seq.size() )
+ seq.resize(index+1);
+ return seq[index];
+ }
+struct SequenceTraits< std::vector<Symbol> > {
+ static size_t size(IO &io, std::vector<Symbol> &seq) {
+ return seq.size();
+ }
+ static Symbol& element(IO &io, std::vector<Symbol> &seq, size_t index) {
+ if ( index >= seq.size() )
+ seq.resize(index+1);
+ return seq[index];
+ }
+// A vector of Relocations is a sequence.
+struct SequenceTraits< Relocations > {
+ static size_t size(IO &io, Relocations &seq) {
+ return seq.size();
+ }
+ static Relocation& element(IO &io, Relocations &seq, size_t index) {
+ if ( index >= seq.size() )
+ seq.resize(index+1);
+ return seq[index];
+ }
+// The content for a section is represented as a flow sequence of hex bytes.
+struct SequenceTraits< ContentBytes > {
+ static size_t size(IO &io, ContentBytes &seq) {
+ return seq.size();
+ }
+ static Hex8& element(IO &io, ContentBytes &seq, size_t index) {
+ if ( index >= seq.size() )
+ seq.resize(index+1);
+ return seq[index];
+ }
+ static const bool flow = true;
+// The indirect symbols for a section is represented as a flow sequence
+// of numbers (symbol table indexes).
+struct SequenceTraits< IndirectSymbols > {
+ static size_t size(IO &io, IndirectSymbols &seq) {
+ return seq.size();
+ }
+ static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) {
+ if ( index >= seq.size() )
+ seq.resize(index+1);
+ return seq[index];
+ }
+ static const bool flow = true;
+template <>
+struct ScalarEnumerationTraits<lld::MachOLinkingContext::Arch> {
+ static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) {
+ io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown);
+ io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc);
+ io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86);
+ io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64);
+ io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6);
+ io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7);
+ io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s);
+ io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64);
+ }
+template <>
+struct ScalarEnumerationTraits<lld::MachOLinkingContext::OS> {
+ static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) {
+ io.enumCase(value, "unknown",
+ lld::MachOLinkingContext::OS::unknown);
+ io.enumCase(value, "Mac OS X",
+ lld::MachOLinkingContext::OS::macOSX);
+ io.enumCase(value, "iOS",
+ lld::MachOLinkingContext::OS::iOS);
+ io.enumCase(value, "iOS Simulator",
+ lld::MachOLinkingContext::OS::iOS_simulator);
+ }
+template <>
+struct ScalarEnumerationTraits<HeaderFileType> {
+ static void enumeration(IO &io, HeaderFileType &value) {
+ io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT);
+ io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB);
+ io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE);
+ io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE);
+ }
+template <>
+struct ScalarBitSetTraits<FileFlags> {
+ static void bitset(IO &io, FileFlags &value) {
+ io.bitSetCase(value, "MH_TWOLEVEL",
+ llvm::MachO::MH_TWOLEVEL);
+ io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS",
+ }
+template <>
+struct ScalarEnumerationTraits<SectionType> {
+ static void enumeration(IO &io, SectionType &value) {
+ io.enumCase(value, "S_REGULAR",
+ llvm::MachO::S_REGULAR);
+ io.enumCase(value, "S_ZEROFILL",
+ llvm::MachO::S_ZEROFILL);
+ io.enumCase(value, "S_CSTRING_LITERALS",
+ io.enumCase(value, "S_4BYTE_LITERALS",
+ llvm::MachO::S_4BYTE_LITERALS);
+ io.enumCase(value, "S_8BYTE_LITERALS",
+ llvm::MachO::S_8BYTE_LITERALS);
+ io.enumCase(value, "S_LITERAL_POINTERS",
+ io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS",
+ io.enumCase(value, "S_LAZY_SYMBOL_POINTERS",
+ io.enumCase(value, "S_SYMBOL_STUBS",
+ llvm::MachO::S_SYMBOL_STUBS);
+ io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS",
+ io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS",
+ io.enumCase(value, "S_COALESCED",
+ llvm::MachO::S_COALESCED);
+ io.enumCase(value, "S_GB_ZEROFILL",
+ llvm::MachO::S_GB_ZEROFILL);
+ io.enumCase(value, "S_INTERPOSING",
+ llvm::MachO::S_INTERPOSING);
+ io.enumCase(value, "S_16BYTE_LITERALS",
+ llvm::MachO::S_16BYTE_LITERALS);
+ io.enumCase(value, "S_DTRACE_DOF",
+ llvm::MachO::S_DTRACE_DOF);
+ io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS",
+ io.enumCase(value, "S_THREAD_LOCAL_REGULAR",
+ io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL",
+ io.enumCase(value, "S_THREAD_LOCAL_VARIABLES",
+ }
+template <>
+struct ScalarBitSetTraits<SectionAttr> {
+ static void bitset(IO &io, SectionAttr &value) {
+ io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS",
+ io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS",
+ io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP",
+ llvm::MachO::S_ATTR_NO_DEAD_STRIP);
+ io.bitSetCase(value, "S_ATTR_EXT_RELOC",
+ llvm::MachO::S_ATTR_EXT_RELOC);
+ io.bitSetCase(value, "S_ATTR_LOC_RELOC",
+ llvm::MachO::S_ATTR_LOC_RELOC);
+ io.bitSetCase(value, "S_ATTR_DEBUG",
+ llvm::MachO::S_ATTR_DEBUG);
+ }
+/// This is a custom formatter for SectionAlignment. Values are
+/// the power to raise by, ie, the n in 2^n.
+template <> struct ScalarTraits<SectionAlignment> {
+ static void output(const SectionAlignment &value, void *ctxt,
+ raw_ostream &out) {
+ out << llvm::format("%d", (uint32_t)value);
+ }
+ static StringRef input(StringRef scalar, void *ctxt,
+ SectionAlignment &value) {
+ uint32_t alignment;
+ if (scalar.getAsInteger(0, alignment)) {
+ return "malformed alignment value";
+ }
+ if (!llvm::isPowerOf2_32(alignment))
+ return "alignment must be a power of 2";
+ value = alignment;
+ return StringRef(); // returning empty string means success
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+template <>
+struct ScalarEnumerationTraits<NListType> {
+ static void enumeration(IO &io, NListType &value) {
+ io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF);
+ io.enumCase(value, "N_ABS", llvm::MachO::N_ABS);
+ io.enumCase(value, "N_SECT", llvm::MachO::N_SECT);
+ io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD);
+ io.enumCase(value, "N_INDR", llvm::MachO::N_INDR);
+ }
+template <>
+struct ScalarBitSetTraits<SymbolScope> {
+ static void bitset(IO &io, SymbolScope &value) {
+ io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT);
+ io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT);
+ }
+template <>
+struct ScalarBitSetTraits<SymbolDesc> {
+ static void bitset(IO &io, SymbolDesc &value) {
+ io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP);
+ io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF);
+ io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF);
+ io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF);
+ io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER);
+ }
+template <>
+struct MappingTraits<Section> {
+ struct NormalizedContentBytes;
+ static void mapping(IO &io, Section &sect) {
+ io.mapRequired("segment", sect.segmentName);
+ io.mapRequired("section", sect.sectionName);
+ io.mapRequired("type", sect.type);
+ io.mapOptional("attributes", sect.attributes);
+ io.mapOptional("alignment", sect.alignment, (SectionAlignment)1);
+ io.mapRequired("address", sect.address);
+ if (isZeroFillSection(sect.type)) {
+ // S_ZEROFILL sections use "size:" instead of "content:"
+ uint64_t size = sect.content.size();
+ io.mapOptional("size", size);
+ if (!io.outputting()) {
+ uint8_t *bytes = nullptr;
+ sect.content = makeArrayRef(bytes, size);
+ }
+ } else {
+ MappingNormalization<NormalizedContent, ArrayRef<uint8_t>> content(
+ io, sect.content);
+ io.mapOptional("content", content->_normalizedContent);
+ }
+ io.mapOptional("relocations", sect.relocations);
+ io.mapOptional("indirect-syms", sect.indirectSymbols);
+ }
+ struct NormalizedContent {
+ NormalizedContent(IO &io) : _io(io) {}
+ NormalizedContent(IO &io, ArrayRef<uint8_t> content) : _io(io) {
+ // When writing yaml, copy content byte array to Hex8 vector.
+ for (auto &c : content) {
+ _normalizedContent.push_back(c);
+ }
+ }
+ ArrayRef<uint8_t> denormalize(IO &io) {
+ // When reading yaml, allocate byte array owned by NormalizedFile and
+ // copy Hex8 vector to byte array.
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ NormalizedFile *file = info->_normalizeMachOFile;
+ assert(file != nullptr);
+ size_t size = _normalizedContent.size();
+ if (!size)
+ return None;
+ uint8_t *bytes = file->ownedAllocations.Allocate<uint8_t>(size);
+ std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes);
+ return makeArrayRef(bytes, size);
+ }
+ IO &_io;
+ ContentBytes _normalizedContent;
+ };
+template <>
+struct MappingTraits<Relocation> {
+ static void mapping(IO &io, Relocation &reloc) {
+ io.mapRequired("offset", reloc.offset);
+ io.mapOptional("scattered", reloc.scattered, false);
+ io.mapRequired("type", reloc.type);
+ io.mapRequired("length", reloc.length);
+ io.mapRequired("pc-rel", reloc.pcRel);
+ if ( !reloc.scattered )
+ io.mapRequired("extern", reloc.isExtern);
+ if ( reloc.scattered )
+ io.mapRequired("value", reloc.value);
+ if ( !reloc.scattered )
+ io.mapRequired("symbol", reloc.symbol);
+ }
+template <>
+struct ScalarEnumerationTraits<RelocationInfoType> {
+ static void enumeration(IO &io, RelocationInfoType &value) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ NormalizedFile *file = info->_normalizeMachOFile;
+ assert(file != nullptr);
+ switch (file->arch) {
+ case lld::MachOLinkingContext::arch_x86_64:
+ io.enumCase(value, "X86_64_RELOC_UNSIGNED",
+ llvm::MachO::X86_64_RELOC_UNSIGNED);
+ io.enumCase(value, "X86_64_RELOC_SIGNED",
+ llvm::MachO::X86_64_RELOC_SIGNED);
+ io.enumCase(value, "X86_64_RELOC_BRANCH",
+ llvm::MachO::X86_64_RELOC_BRANCH);
+ io.enumCase(value, "X86_64_RELOC_GOT_LOAD",
+ llvm::MachO::X86_64_RELOC_GOT_LOAD);
+ io.enumCase(value, "X86_64_RELOC_GOT",
+ llvm::MachO::X86_64_RELOC_GOT);
+ io.enumCase(value, "X86_64_RELOC_SUBTRACTOR",
+ llvm::MachO::X86_64_RELOC_SUBTRACTOR);
+ io.enumCase(value, "X86_64_RELOC_SIGNED_1",
+ llvm::MachO::X86_64_RELOC_SIGNED_1);
+ io.enumCase(value, "X86_64_RELOC_SIGNED_2",
+ llvm::MachO::X86_64_RELOC_SIGNED_2);
+ io.enumCase(value, "X86_64_RELOC_SIGNED_4",
+ llvm::MachO::X86_64_RELOC_SIGNED_4);
+ io.enumCase(value, "X86_64_RELOC_TLV",
+ llvm::MachO::X86_64_RELOC_TLV);
+ break;
+ case lld::MachOLinkingContext::arch_x86:
+ io.enumCase(value, "GENERIC_RELOC_VANILLA",
+ io.enumCase(value, "GENERIC_RELOC_PAIR",
+ io.enumCase(value, "GENERIC_RELOC_SECTDIFF",
+ io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF",
+ io.enumCase(value, "GENERIC_RELOC_TLV",
+ llvm::MachO::GENERIC_RELOC_TLV);
+ break;
+ case lld::MachOLinkingContext::arch_armv6:
+ case lld::MachOLinkingContext::arch_armv7:
+ case lld::MachOLinkingContext::arch_armv7s:
+ io.enumCase(value, "ARM_RELOC_VANILLA",
+ llvm::MachO::ARM_RELOC_VANILLA);
+ io.enumCase(value, "ARM_RELOC_PAIR",
+ llvm::MachO::ARM_RELOC_PAIR);
+ io.enumCase(value, "ARM_RELOC_SECTDIFF",
+ io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF",
+ io.enumCase(value, "ARM_RELOC_BR24",
+ llvm::MachO::ARM_RELOC_BR24);
+ io.enumCase(value, "ARM_THUMB_RELOC_BR22",
+ llvm::MachO::ARM_THUMB_RELOC_BR22);
+ io.enumCase(value, "ARM_RELOC_HALF",
+ llvm::MachO::ARM_RELOC_HALF);
+ io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF",
+ break;
+ case lld::MachOLinkingContext::arch_arm64:
+ io.enumCase(value, "ARM64_RELOC_UNSIGNED",
+ llvm::MachO::ARM64_RELOC_UNSIGNED);
+ io.enumCase(value, "ARM64_RELOC_SUBTRACTOR",
+ io.enumCase(value, "ARM64_RELOC_BRANCH26",
+ llvm::MachO::ARM64_RELOC_BRANCH26);
+ io.enumCase(value, "ARM64_RELOC_PAGE21",
+ llvm::MachO::ARM64_RELOC_PAGE21);
+ io.enumCase(value, "ARM64_RELOC_PAGEOFF12",
+ llvm::MachO::ARM64_RELOC_PAGEOFF12);
+ io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21",
+ llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21);
+ io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12",
+ io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT",
+ io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21",
+ llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21);
+ io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12",
+ io.enumCase(value, "ARM64_RELOC_ADDEND",
+ llvm::MachO::ARM64_RELOC_ADDEND);
+ break;
+ default:
+ llvm_unreachable("unknown architecture");
+ }
+ }
+template <>
+struct MappingTraits<Symbol> {
+ static void mapping(IO &io, Symbol& sym) {
+ io.mapRequired("name", sym.name);
+ io.mapRequired("type", sym.type);
+ io.mapOptional("scope", sym.scope, SymbolScope(0));
+ io.mapOptional("sect", sym.sect, (uint8_t)0);
+ if (sym.type == llvm::MachO::N_UNDF) {
+ // In undef symbols, desc field contains alignment/ordinal info
+ // which is better represented as a hex vaule.
+ uint16_t t1 = sym.desc;
+ Hex16 t2 = t1;
+ io.mapOptional("desc", t2, Hex16(0));
+ sym.desc = t2;
+ } else {
+ // In defined symbols, desc fit is a set of option bits.
+ io.mapOptional("desc", sym.desc, SymbolDesc(0));
+ }
+ io.mapRequired("value", sym.value);
+ }
+// Custom mapping for VMProtect (e.g. "r-x").
+template <>
+struct ScalarTraits<VMProtect> {
+ static void output(const VMProtect &value, void*, raw_ostream &out) {
+ out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-');
+ out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-');
+ out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-');
+ }
+ static StringRef input(StringRef scalar, void*, VMProtect &value) {
+ value = 0;
+ if (scalar.size() != 3)
+ return "segment access protection must be three chars (e.g. \"r-x\")";
+ switch (scalar[0]) {
+ case 'r':
+ value = llvm::MachO::VM_PROT_READ;
+ break;
+ case '-':
+ break;
+ default:
+ return "segment access protection first char must be 'r' or '-'";
+ }
+ switch (scalar[1]) {
+ case 'w':
+ value = value | llvm::MachO::VM_PROT_WRITE;
+ break;
+ case '-':
+ break;
+ default:
+ return "segment access protection second char must be 'w' or '-'";
+ }
+ switch (scalar[2]) {
+ case 'x':
+ value = value | llvm::MachO::VM_PROT_EXECUTE;
+ break;
+ case '-':
+ break;
+ default:
+ return "segment access protection third char must be 'x' or '-'";
+ }
+ // Return the empty string on success,
+ return StringRef();
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+template <>
+struct MappingTraits<Segment> {
+ static void mapping(IO &io, Segment& seg) {
+ io.mapRequired("name", seg.name);
+ io.mapRequired("address", seg.address);
+ io.mapRequired("size", seg.size);
+ io.mapRequired("init-access", seg.init_access);
+ io.mapRequired("max-access", seg.max_access);
+ }
+template <>
+struct ScalarEnumerationTraits<LoadCommandType> {
+ static void enumeration(IO &io, LoadCommandType &value) {
+ io.enumCase(value, "LC_LOAD_DYLIB",
+ llvm::MachO::LC_LOAD_DYLIB);
+ io.enumCase(value, "LC_LOAD_WEAK_DYLIB",
+ llvm::MachO::LC_LOAD_WEAK_DYLIB);
+ io.enumCase(value, "LC_REEXPORT_DYLIB",
+ llvm::MachO::LC_REEXPORT_DYLIB);
+ io.enumCase(value, "LC_LOAD_UPWARD_DYLIB",
+ io.enumCase(value, "LC_LAZY_LOAD_DYLIB",
+ llvm::MachO::LC_LAZY_LOAD_DYLIB);
+ io.enumCase(value, "LC_VERSION_MIN_MACOSX",
+ io.enumCase(value, "LC_VERSION_MIN_IPHONEOS",
+ io.enumCase(value, "LC_VERSION_MIN_TVOS",
+ llvm::MachO::LC_VERSION_MIN_TVOS);
+ io.enumCase(value, "LC_VERSION_MIN_WATCHOS",
+ }
+template <>
+struct MappingTraits<DependentDylib> {
+ static void mapping(IO &io, DependentDylib& dylib) {
+ io.mapRequired("path", dylib.path);
+ io.mapOptional("kind", dylib.kind,
+ llvm::MachO::LC_LOAD_DYLIB);
+ io.mapOptional("compat-version", dylib.compatVersion,
+ PackedVersion(0x10000));
+ io.mapOptional("current-version", dylib.currentVersion,
+ PackedVersion(0x10000));
+ }
+template <>
+struct ScalarEnumerationTraits<RebaseType> {
+ static void enumeration(IO &io, RebaseType &value) {
+ io.enumCase(value, "REBASE_TYPE_POINTER",
+ io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32",
+ io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32",
+ }
+template <>
+struct MappingTraits<RebaseLocation> {
+ static void mapping(IO &io, RebaseLocation& rebase) {
+ io.mapRequired("segment-index", rebase.segIndex);
+ io.mapRequired("segment-offset", rebase.segOffset);
+ io.mapOptional("kind", rebase.kind,
+ }
+template <>
+struct ScalarEnumerationTraits<BindType> {
+ static void enumeration(IO &io, BindType &value) {
+ io.enumCase(value, "BIND_TYPE_POINTER",
+ llvm::MachO::BIND_TYPE_POINTER);
+ io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32",
+ io.enumCase(value, "BIND_TYPE_TEXT_PCREL32",
+ llvm::MachO::BIND_TYPE_TEXT_PCREL32);
+ }
+template <>
+struct MappingTraits<BindLocation> {
+ static void mapping(IO &io, BindLocation &bind) {
+ io.mapRequired("segment-index", bind.segIndex);
+ io.mapRequired("segment-offset", bind.segOffset);
+ io.mapOptional("kind", bind.kind,
+ llvm::MachO::BIND_TYPE_POINTER);
+ io.mapOptional("can-be-null", bind.canBeNull, false);
+ io.mapRequired("ordinal", bind.ordinal);
+ io.mapRequired("symbol-name", bind.symbolName);
+ io.mapOptional("addend", bind.addend, Hex64(0));
+ }
+template <>
+struct ScalarEnumerationTraits<ExportSymbolKind> {
+ static void enumeration(IO &io, ExportSymbolKind &value) {
+ }
+template <>
+struct ScalarBitSetTraits<ExportFlags> {
+ static void bitset(IO &io, ExportFlags &value) {
+ io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT",
+ }
+template <>
+struct MappingTraits<Export> {
+ static void mapping(IO &io, Export &exp) {
+ io.mapRequired("name", exp.name);
+ io.mapOptional("offset", exp.offset);
+ io.mapOptional("kind", exp.kind,
+ if (!io.outputting() || exp.flags)
+ io.mapOptional("flags", exp.flags);
+ io.mapOptional("other", exp.otherOffset, Hex32(0));
+ io.mapOptional("other-name", exp.otherName, StringRef());
+ }
+template <>
+struct ScalarEnumerationTraits<DataRegionType> {
+ static void enumeration(IO &io, DataRegionType &value) {
+ io.enumCase(value, "DICE_KIND_DATA",
+ llvm::MachO::DICE_KIND_DATA);
+ io.enumCase(value, "DICE_KIND_JUMP_TABLE8",
+ llvm::MachO::DICE_KIND_JUMP_TABLE8);
+ io.enumCase(value, "DICE_KIND_JUMP_TABLE16",
+ llvm::MachO::DICE_KIND_JUMP_TABLE16);
+ io.enumCase(value, "DICE_KIND_JUMP_TABLE32",
+ llvm::MachO::DICE_KIND_JUMP_TABLE32);
+ io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32",
+ }
+template <>
+struct MappingTraits<DataInCode> {
+ static void mapping(IO &io, DataInCode &entry) {
+ io.mapRequired("offset", entry.offset);
+ io.mapRequired("length", entry.length);
+ io.mapRequired("kind", entry.kind);
+ }
+template <>
+struct ScalarTraits<PackedVersion> {
+ static void output(const PackedVersion &value, void*, raw_ostream &out) {
+ out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF);
+ if (value & 0xFF) {
+ out << llvm::format(".%d", (value & 0xFF));
+ }
+ }
+ static StringRef input(StringRef scalar, void*, PackedVersion &result) {
+ uint32_t value;
+ if (lld::MachOLinkingContext::parsePackedVersion(scalar, value))
+ return "malformed version number";
+ result = value;
+ // Return the empty string on success,
+ return StringRef();
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+template <>
+struct MappingTraits<NormalizedFile> {
+ static void mapping(IO &io, NormalizedFile &file) {
+ io.mapRequired("arch", file.arch);
+ io.mapRequired("file-type", file.fileType);
+ io.mapOptional("flags", file.flags);
+ io.mapOptional("dependents", file.dependentDylibs);
+ io.mapOptional("install-name", file.installName, StringRef());
+ io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000));
+ io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000));
+ io.mapOptional("has-UUID", file.hasUUID, true);
+ io.mapOptional("rpaths", file.rpaths);
+ io.mapOptional("entry-point", file.entryAddress, Hex64(0));
+ io.mapOptional("stack-size", file.stackSize, Hex64(0));
+ io.mapOptional("source-version", file.sourceVersion, Hex64(0));
+ io.mapOptional("OS", file.os);
+ io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0));
+ io.mapOptional("min-os-version-kind", file.minOSVersionKind, (LoadCommandType)0);
+ io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0));
+ io.mapOptional("segments", file.segments);
+ io.mapOptional("sections", file.sections);
+ io.mapOptional("local-symbols", file.localSymbols);
+ io.mapOptional("global-symbols", file.globalSymbols);
+ io.mapOptional("undefined-symbols",file.undefinedSymbols);
+ io.mapOptional("page-size", file.pageSize, Hex32(4096));
+ io.mapOptional("rebasings", file.rebasingInfo);
+ io.mapOptional("bindings", file.bindingInfo);
+ io.mapOptional("weak-bindings", file.weakBindingInfo);
+ io.mapOptional("lazy-bindings", file.lazyBindingInfo);
+ io.mapOptional("exports", file.exportInfo);
+ io.mapOptional("dataInCode", file.dataInCode);
+ }
+ static StringRef validate(IO &io, NormalizedFile &file) {
+ return StringRef();
+ }
+} // namespace llvm
+} // namespace yaml
+namespace lld {
+namespace mach_o {
+/// Handles !mach-o tagged yaml documents.
+bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io,
+ const lld::File *&file) const {
+ if (!io.mapTag("!mach-o"))
+ return false;
+ // Step 1: parse yaml into normalized mach-o struct.
+ NormalizedFile nf;
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ assert(info->_normalizeMachOFile == nullptr);
+ info->_normalizeMachOFile = &nf;
+ MappingTraits<NormalizedFile>::mapping(io, nf);
+ // Step 2: parse normalized mach-o struct into atoms.
+ auto fileOrError = normalizedToAtoms(nf, info->_path, true);
+ // Check that we parsed successfully.
+ if (!fileOrError) {
+ std::string buffer;
+ llvm::raw_string_ostream stream(buffer);
+ handleAllErrors(fileOrError.takeError(),
+ [&](const llvm::ErrorInfoBase &EI) {
+ EI.log(stream);
+ stream << "\n";
+ });
+ io.setError(stream.str());
+ return false;
+ }
+ if (nf.arch != _arch) {
+ io.setError(Twine("file is wrong architecture. Expected ("
+ + MachOLinkingContext::nameFromArch(_arch)
+ + ") found ("
+ + MachOLinkingContext::nameFromArch(nf.arch)
+ + ")"));
+ return false;
+ }
+ info->_normalizeMachOFile = nullptr;
+ file = fileOrError->release();
+ return true;
+namespace normalized {
+/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
+readYaml(std::unique_ptr<MemoryBuffer> &mb) {
+ // Make empty NormalizedFile.
+ std::unique_ptr<NormalizedFile> f(new NormalizedFile());
+ // Create YAML Input parser.
+ YamlContext yamlContext;
+ yamlContext._normalizeMachOFile = f.get();
+ llvm::yaml::Input yin(mb->getBuffer(), &yamlContext);
+ // Fill NormalizedFile by parsing yaml.
+ yin >> *f;
+ // Return error if there were parsing problems.
+ if (auto ec = yin.error())
+ return llvm::make_error<GenericError>(Twine("YAML parsing error: ")
+ + ec.message());
+ // Hand ownership of instantiated NormalizedFile to caller.
+ return std::move(f);
+/// Writes a yaml encoded mach-o files from an in-memory normalized view.
+std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) {
+ // YAML I/O is not const aware, so need to cast away ;-(
+ NormalizedFile *f = const_cast<NormalizedFile*>(&file);
+ // Create yaml Output writer, using yaml options for context.
+ YamlContext yamlContext;
+ yamlContext._normalizeMachOFile = f;
+ llvm::yaml::Output yout(out, &yamlContext);
+ // Stream out yaml.
+ yout << *f;
+ return std::error_code();
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h
new file mode 100644
index 000000000000..cd01d4aa2c93
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h
@@ -0,0 +1,30 @@
+//===- lib/ReaderWriter/MachO/MachOPasses.h -------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/PassManager.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+namespace lld {
+namespace mach_o {
+void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx);
+void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx);
+void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx);
+void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx);
+void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx);
+void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx);
+void addShimPass(PassManager &pm, const MachOLinkingContext &ctx);
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp
new file mode 100644
index 000000000000..23c71e0f5ecd
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp
@@ -0,0 +1,132 @@
+//===- lib/ReaderWriter/MachO/ObjCPass.cpp -------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ArchHandler.h"
+#include "File.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "MachOPasses.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+namespace lld {
+namespace mach_o {
+/// ObjC Image Info Atom created by the ObjC pass.
+class ObjCImageInfoAtom : public SimpleDefinedAtom {
+ ObjCImageInfoAtom(const File &file, bool isBig,
+ MachOLinkingContext::ObjCConstraint objCConstraint,
+ uint32_t swiftVersion)
+ : SimpleDefinedAtom(file) {
+ Data.info.version = 0;
+ switch (objCConstraint) {
+ case MachOLinkingContext::objc_unknown:
+ llvm_unreachable("Shouldn't run the objc pass without a constraint");
+ case MachOLinkingContext::objc_supports_gc:
+ case MachOLinkingContext::objc_gc_only:
+ llvm_unreachable("GC is not supported");
+ case MachOLinkingContext::objc_retainReleaseForSimulator:
+ // The retain/release for simulator flag is already the correct
+ // encoded value for the data so just set it here.
+ Data.info.flags = (uint32_t)objCConstraint;
+ break;
+ case MachOLinkingContext::objc_retainRelease:
+ // We don't need to encode this flag, so just leave the flags as 0.
+ Data.info.flags = 0;
+ break;
+ }
+ Data.info.flags |= (swiftVersion << 8);
+ normalized::write32(Data.bytes + 4, Data.info.flags, isBig);
+ }
+ ~ObjCImageInfoAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeObjCImageInfo;
+ }
+ Alignment alignment() const override {
+ return 4;
+ }
+ uint64_t size() const override {
+ return 8;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR__;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ return llvm::makeArrayRef(Data.bytes, size());
+ }
+ struct objc_image_info {
+ uint32_t version;
+ uint32_t flags;
+ };
+ union {
+ objc_image_info info;
+ uint8_t bytes[8];
+ } Data;
+class ObjCPass : public Pass {
+ ObjCPass(const MachOLinkingContext &context)
+ : _ctx(context),
+ _file(*_ctx.make_file<MachOFile>("<mach-o objc pass>")) {
+ _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ }
+ llvm::Error perform(SimpleFile &mergedFile) override {
+ // Add the image info.
+ mergedFile.addAtom(*getImageInfo());
+ return llvm::Error::success();
+ }
+ const DefinedAtom* getImageInfo() {
+ bool IsBig = MachOLinkingContext::isBigEndian(_ctx.arch());
+ return new (_file.allocator()) ObjCImageInfoAtom(_file, IsBig,
+ _ctx.objcConstraint(),
+ _ctx.swiftVersion());
+ }
+ const MachOLinkingContext &_ctx;
+ MachOFile &_file;
+void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ pm.add(llvm::make_unique<ObjCPass>(ctx));
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h
new file mode 100644
index 000000000000..49e65f63151d
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h
@@ -0,0 +1,102 @@
+//===---- lib/ReaderWriter/MachO/SectCreateFile.h ---------------*- c++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/Simple.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+namespace lld {
+namespace mach_o {
+// A FlateNamespaceFile instance may be added as a resolution source of last
+// resort, depending on how -flat_namespace and -undefined are set.
+class SectCreateFile : public File {
+ class SectCreateAtom : public SimpleDefinedAtom {
+ public:
+ SectCreateAtom(const File &file, StringRef segName, StringRef sectName,
+ std::unique_ptr<MemoryBuffer> content)
+ : SimpleDefinedAtom(file),
+ _combinedName((segName + "/" + sectName).str()),
+ _content(std::move(content)) {}
+ ~SectCreateAtom() override = default;
+ uint64_t size() const override { return _content->getBufferSize(); }
+ Scope scope() const override { return scopeGlobal; }
+ ContentType contentType() const override { return typeSectCreate; }
+ SectionChoice sectionChoice() const override { return sectionCustomRequired; }
+ StringRef customSectionName() const override { return _combinedName; }
+ DeadStripKind deadStrip() const override { return deadStripNever; }
+ ArrayRef<uint8_t> rawContent() const override {
+ const uint8_t *data =
+ reinterpret_cast<const uint8_t*>(_content->getBufferStart());
+ return ArrayRef<uint8_t>(data, _content->getBufferSize());
+ }
+ StringRef segmentName() const { return _segName; }
+ StringRef sectionName() const { return _sectName; }
+ private:
+ std::string _combinedName;
+ StringRef _segName;
+ StringRef _sectName;
+ std::unique_ptr<MemoryBuffer> _content;
+ };
+ SectCreateFile() : File("sectcreate", kindSectCreateObject) {}
+ void addSection(StringRef seg, StringRef sect,
+ std::unique_ptr<MemoryBuffer> content) {
+ _definedAtoms.push_back(
+ new (allocator()) SectCreateAtom(*this, seg, sect, std::move(content)));
+ }
+ const AtomRange<DefinedAtom> defined() const override {
+ return _definedAtoms;
+ }
+ const AtomRange<UndefinedAtom> undefined() const override {
+ return _noUndefinedAtoms;
+ }
+ const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
+ return _noSharedLibraryAtoms;
+ }
+ const AtomRange<AbsoluteAtom> absolute() const override {
+ return _noAbsoluteAtoms;
+ }
+ void clearAtoms() override {
+ _definedAtoms.clear();
+ _noUndefinedAtoms.clear();
+ _noSharedLibraryAtoms.clear();
+ _noAbsoluteAtoms.clear();
+ }
+ AtomVector<DefinedAtom> _definedAtoms;
+} // namespace mach_o
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp
new file mode 100644
index 000000000000..8a2d2e910cad
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp
@@ -0,0 +1,129 @@
+//===- lib/ReaderWriter/MachO/ShimPass.cpp -------------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This linker pass updates branch-sites whose target is a different mode
+// (thumb vs arm).
+// Arm code has two instruction encodings thumb and arm. When branching from
+// one code encoding to another, you need to use an instruction that switches
+// the instruction mode. Usually the transition only happens at call sites, and
+// the linker can transform a BL instruction in BLX (or vice versa). But if the
+// compiler did a tail call optimization and a function ends with a branch (not
+// branch and link), there is no pc-rel BX instruction.
+// The ShimPass looks for pc-rel B instructions that will need to switch mode.
+// For those cases it synthesizes a shim which does the transition, then
+// modifies the original atom with the B instruction to target to the shim atom.
+#include "ArchHandler.h"
+#include "File.h"
+#include "MachOPasses.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+namespace lld {
+namespace mach_o {
+class ShimPass : public Pass {
+ ShimPass(const MachOLinkingContext &context)
+ : _ctx(context), _archHandler(_ctx.archHandler()),
+ _stubInfo(_archHandler.stubInfo()),
+ _file(*_ctx.make_file<MachOFile>("<mach-o shim pass>")) {
+ _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ }
+ llvm::Error perform(SimpleFile &mergedFile) override {
+ // Scan all references in all atoms.
+ for (const DefinedAtom *atom : mergedFile.defined()) {
+ for (const Reference *ref : *atom) {
+ // Look at non-call branches.
+ if (!_archHandler.isNonCallBranch(*ref))
+ continue;
+ const Atom *target = ref->target();
+ assert(target != nullptr);
+ if (const lld::DefinedAtom *daTarget = dyn_cast<DefinedAtom>(target)) {
+ bool atomIsThumb = _archHandler.isThumbFunction(*atom);
+ bool targetIsThumb = _archHandler.isThumbFunction(*daTarget);
+ if (atomIsThumb != targetIsThumb)
+ updateBranchToUseShim(atomIsThumb, *daTarget, ref);
+ }
+ }
+ }
+ // Exit early if no shims needed.
+ if (_targetToShim.empty())
+ return llvm::Error::success();
+ // Sort shim atoms so the layout order is stable.
+ std::vector<const DefinedAtom *> shims;
+ shims.reserve(_targetToShim.size());
+ for (auto element : _targetToShim) {
+ shims.push_back(element.second);
+ }
+ std::sort(shims.begin(), shims.end(),
+ [](const DefinedAtom *l, const DefinedAtom *r) {
+ return (l->name() < r->name());
+ });
+ // Add all shims to master file.
+ for (const DefinedAtom *shim : shims)
+ mergedFile.addAtom(*shim);
+ return llvm::Error::success();
+ }
+ void updateBranchToUseShim(bool thumbToArm, const DefinedAtom& target,
+ const Reference *ref) {
+ // Make file-format specific stub and other support atoms.
+ const DefinedAtom *shim = this->getShim(thumbToArm, target);
+ assert(shim != nullptr);
+ // Switch branch site to target shim atom.
+ const_cast<Reference *>(ref)->setTarget(shim);
+ }
+ const DefinedAtom* getShim(bool thumbToArm, const DefinedAtom& target) {
+ auto pos = _targetToShim.find(&target);
+ if ( pos != _targetToShim.end() ) {
+ // Reuse an existing shim.
+ assert(pos->second != nullptr);
+ return pos->second;
+ } else {
+ // There is no existing shim, so create a new one.
+ const DefinedAtom *shim = _archHandler.createShim(_file, thumbToArm,
+ target);
+ _targetToShim[&target] = shim;
+ return shim;
+ }
+ }
+ const MachOLinkingContext &_ctx;
+ mach_o::ArchHandler &_archHandler;
+ const ArchHandler::StubInfo &_stubInfo;
+ MachOFile &_file;
+ llvm::DenseMap<const Atom*, const DefinedAtom*> _targetToShim;
+void addShimPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ pm.add(llvm::make_unique<ShimPass>(ctx));
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp
new file mode 100644
index 000000000000..04c586df336c
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp
@@ -0,0 +1,379 @@
+//===- lib/ReaderWriter/MachO/StubsPass.cpp ---------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This linker pass updates call-sites which have references to shared library
+// atoms to instead have a reference to a stub (PLT entry) for the specified
+// symbol. Each file format defines a subclass of StubsPass which implements
+// the abstract methods for creating the file format specific StubAtoms.
+#include "ArchHandler.h"
+#include "File.h"
+#include "MachOPasses.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/Simple.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+namespace lld {
+namespace mach_o {
+// Lazy Pointer Atom created by the stubs pass.
+class LazyPointerAtom : public SimpleDefinedAtom {
+ LazyPointerAtom(const File &file, bool is64)
+ : SimpleDefinedAtom(file), _is64(is64) { }
+ ~LazyPointerAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeLazyPointer;
+ }
+ Alignment alignment() const override {
+ return _is64 ? 8 : 4;
+ }
+ uint64_t size() const override {
+ return _is64 ? 8 : 4;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permRW_;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ static const uint8_t zeros[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ return llvm::makeArrayRef(zeros, size());
+ }
+ const bool _is64;
+// NonLazyPointer (GOT) Atom created by the stubs pass.
+class NonLazyPointerAtom : public SimpleDefinedAtom {
+ NonLazyPointerAtom(const File &file, bool is64, ContentType contentType)
+ : SimpleDefinedAtom(file), _is64(is64), _contentType(contentType) { }
+ ~NonLazyPointerAtom() override = default;
+ ContentType contentType() const override {
+ return _contentType;
+ }
+ Alignment alignment() const override {
+ return _is64 ? 8 : 4;
+ }
+ uint64_t size() const override {
+ return _is64 ? 8 : 4;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permRW_;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ static const uint8_t zeros[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ return llvm::makeArrayRef(zeros, size());
+ }
+ const bool _is64;
+ const ContentType _contentType;
+// Stub Atom created by the stubs pass.
+class StubAtom : public SimpleDefinedAtom {
+ StubAtom(const File &file, const ArchHandler::StubInfo &stubInfo)
+ : SimpleDefinedAtom(file), _stubInfo(stubInfo){ }
+ ~StubAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeStub;
+ }
+ Alignment alignment() const override {
+ return 1 << _stubInfo.codeAlignment;
+ }
+ uint64_t size() const override {
+ return _stubInfo.stubSize;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR_X;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ return llvm::makeArrayRef(_stubInfo.stubBytes, _stubInfo.stubSize);
+ }
+ const ArchHandler::StubInfo &_stubInfo;
+// Stub Helper Atom created by the stubs pass.
+class StubHelperAtom : public SimpleDefinedAtom {
+ StubHelperAtom(const File &file, const ArchHandler::StubInfo &stubInfo)
+ : SimpleDefinedAtom(file), _stubInfo(stubInfo) { }
+ ~StubHelperAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeStubHelper;
+ }
+ Alignment alignment() const override {
+ return 1 << _stubInfo.codeAlignment;
+ }
+ uint64_t size() const override {
+ return _stubInfo.stubHelperSize;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR_X;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ return llvm::makeArrayRef(_stubInfo.stubHelperBytes,
+ _stubInfo.stubHelperSize);
+ }
+ const ArchHandler::StubInfo &_stubInfo;
+// Stub Helper Common Atom created by the stubs pass.
+class StubHelperCommonAtom : public SimpleDefinedAtom {
+ StubHelperCommonAtom(const File &file, const ArchHandler::StubInfo &stubInfo)
+ : SimpleDefinedAtom(file), _stubInfo(stubInfo) { }
+ ~StubHelperCommonAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeStubHelper;
+ }
+ Alignment alignment() const override {
+ return 1 << _stubInfo.stubHelperCommonAlignment;
+ }
+ uint64_t size() const override {
+ return _stubInfo.stubHelperCommonSize;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permR_X;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ return llvm::makeArrayRef(_stubInfo.stubHelperCommonBytes,
+ _stubInfo.stubHelperCommonSize);
+ }
+ const ArchHandler::StubInfo &_stubInfo;
+class StubsPass : public Pass {
+ StubsPass(const MachOLinkingContext &context)
+ : _ctx(context), _archHandler(_ctx.archHandler()),
+ _stubInfo(_archHandler.stubInfo()),
+ _file(*_ctx.make_file<MachOFile>("<mach-o Stubs pass>")) {
+ _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ }
+ llvm::Error perform(SimpleFile &mergedFile) override {
+ // Skip this pass if output format uses text relocations instead of stubs.
+ if (!this->noTextRelocs())
+ return llvm::Error::success();
+ // Scan all references in all atoms.
+ for (const DefinedAtom *atom : mergedFile.defined()) {
+ for (const Reference *ref : *atom) {
+ // Look at call-sites.
+ if (!this->isCallSite(*ref))
+ continue;
+ const Atom *target = ref->target();
+ assert(target != nullptr);
+ if (isa<SharedLibraryAtom>(target)) {
+ // Calls to shared libraries go through stubs.
+ _targetToUses[target].push_back(ref);
+ continue;
+ }
+ const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target);
+ if (defTarget && defTarget->interposable() != DefinedAtom::interposeNo){
+ // Calls to interposable functions in same linkage unit must also go
+ // through a stub.
+ assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit);
+ _targetToUses[target].push_back(ref);
+ }
+ }
+ }
+ // Exit early if no stubs needed.
+ if (_targetToUses.empty())
+ return llvm::Error::success();
+ // First add help-common and GOT slots used by lazy binding.
+ SimpleDefinedAtom *helperCommonAtom =
+ new (_file.allocator()) StubHelperCommonAtom(_file, _stubInfo);
+ SimpleDefinedAtom *helperCacheNLPAtom =
+ new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(),
+ _stubInfo.stubHelperImageCacheContentType);
+ SimpleDefinedAtom *helperBinderNLPAtom =
+ new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(),
+ _stubInfo.stubHelperImageCacheContentType);
+ addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache,
+ helperCacheNLPAtom);
+ addOptReference(
+ helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache,
+ _stubInfo.optStubHelperCommonReferenceToCache, helperCacheNLPAtom);
+ addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder,
+ helperBinderNLPAtom);
+ addOptReference(
+ helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder,
+ _stubInfo.optStubHelperCommonReferenceToBinder, helperBinderNLPAtom);
+ mergedFile.addAtom(*helperCommonAtom);
+ mergedFile.addAtom(*helperBinderNLPAtom);
+ mergedFile.addAtom(*helperCacheNLPAtom);
+ // Add reference to dyld_stub_binder in libSystem.dylib
+ auto I = std::find_if(
+ mergedFile.sharedLibrary().begin(), mergedFile.sharedLibrary().end(),
+ [&](const SharedLibraryAtom *atom) {
+ return atom->name().equals(_stubInfo.binderSymbolName);
+ });
+ assert(I != mergedFile.sharedLibrary().end() &&
+ "dyld_stub_binder not found");
+ addReference(helperBinderNLPAtom, _stubInfo.nonLazyPointerReferenceToBinder, *I);
+ // Sort targets by name, so stubs and lazy pointers are consistent
+ std::vector<const Atom *> targetsNeedingStubs;
+ for (auto it : _targetToUses)
+ targetsNeedingStubs.push_back(it.first);
+ std::sort(targetsNeedingStubs.begin(), targetsNeedingStubs.end(),
+ [](const Atom * left, const Atom * right) {
+ return (left->name().compare(right->name()) < 0);
+ });
+ // Make and append stubs, lazy pointers, and helpers in alphabetical order.
+ unsigned lazyOffset = 0;
+ for (const Atom *target : targetsNeedingStubs) {
+ auto *stub = new (_file.allocator()) StubAtom(_file, _stubInfo);
+ auto *lp =
+ new (_file.allocator()) LazyPointerAtom(_file, _ctx.is64Bit());
+ auto *helper = new (_file.allocator()) StubHelperAtom(_file, _stubInfo);
+ addReference(stub, _stubInfo.stubReferenceToLP, lp);
+ addOptReference(stub, _stubInfo.stubReferenceToLP,
+ _stubInfo.optStubReferenceToLP, lp);
+ addReference(lp, _stubInfo.lazyPointerReferenceToHelper, helper);
+ addReference(lp, _stubInfo.lazyPointerReferenceToFinal, target);
+ addReference(helper, _stubInfo.stubHelperReferenceToImm, helper);
+ addReferenceAddend(helper, _stubInfo.stubHelperReferenceToImm, helper,
+ lazyOffset);
+ addReference(helper, _stubInfo.stubHelperReferenceToHelperCommon,
+ helperCommonAtom);
+ mergedFile.addAtom(*stub);
+ mergedFile.addAtom(*lp);
+ mergedFile.addAtom(*helper);
+ // Update each reference to use stub.
+ for (const Reference *ref : _targetToUses[target]) {
+ assert(ref->target() == target);
+ // Switch call site to reference stub atom instead.
+ const_cast<Reference *>(ref)->setTarget(stub);
+ }
+ // Calculate new offset
+ lazyOffset += target->name().size() + 12;
+ }
+ return llvm::Error::success();
+ }
+ bool noTextRelocs() {
+ return true;
+ }
+ bool isCallSite(const Reference &ref) {
+ return _archHandler.isCallSite(ref);
+ }
+ void addReference(SimpleDefinedAtom* atom,
+ const ArchHandler::ReferenceInfo &refInfo,
+ const lld::Atom* target) {
+ atom->addReference(Reference::KindNamespace::mach_o,
+ refInfo.arch, refInfo.kind, refInfo.offset,
+ target, refInfo.addend);
+ }
+ void addReferenceAddend(SimpleDefinedAtom *atom,
+ const ArchHandler::ReferenceInfo &refInfo,
+ const lld::Atom *target, uint64_t addend) {
+ atom->addReference(Reference::KindNamespace::mach_o, refInfo.arch,
+ refInfo.kind, refInfo.offset, target, addend);
+ }
+ void addOptReference(SimpleDefinedAtom* atom,
+ const ArchHandler::ReferenceInfo &refInfo,
+ const ArchHandler::OptionalRefInfo &optRef,
+ const lld::Atom* target) {
+ if (!optRef.used)
+ return;
+ atom->addReference(Reference::KindNamespace::mach_o,
+ refInfo.arch, optRef.kind, optRef.offset,
+ target, optRef.addend);
+ }
+ typedef llvm::DenseMap<const Atom*,
+ llvm::SmallVector<const Reference *, 8>> TargetToUses;
+ const MachOLinkingContext &_ctx;
+ mach_o::ArchHandler &_archHandler;
+ const ArchHandler::StubInfo &_stubInfo;
+ MachOFile &_file;
+ TargetToUses _targetToUses;
+void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ pm.add(std::unique_ptr<Pass>(new StubsPass(ctx)));
+} // end namespace mach_o
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp
new file mode 100644
index 000000000000..e362e507ebf2
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp
@@ -0,0 +1,141 @@
+//===- lib/ReaderWriter/MachO/TLVPass.cpp -----------------------*- C++ -*-===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+/// \file
+/// This linker pass transforms all TLV references to real references.
+#include "ArchHandler.h"
+#include "File.h"
+#include "MachOPasses.h"
+#include "lld/Core/Simple.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+namespace lld {
+namespace mach_o {
+// TLVP Entry Atom created by the TLV pass.
+class TLVPEntryAtom : public SimpleDefinedAtom {
+ TLVPEntryAtom(const File &file, bool is64, StringRef name)
+ : SimpleDefinedAtom(file), _is64(is64), _name(name) {}
+ ~TLVPEntryAtom() override = default;
+ ContentType contentType() const override {
+ return DefinedAtom::typeTLVInitializerPtr;
+ }
+ Alignment alignment() const override {
+ return _is64 ? 8 : 4;
+ }
+ uint64_t size() const override {
+ return _is64 ? 8 : 4;
+ }
+ ContentPermissions permissions() const override {
+ return DefinedAtom::permRW_;
+ }
+ ArrayRef<uint8_t> rawContent() const override {
+ static const uint8_t zeros[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ return llvm::makeArrayRef(zeros, size());
+ }
+ StringRef slotName() const {
+ return _name;
+ }
+ const bool _is64;
+ StringRef _name;
+class TLVPass : public Pass {
+ TLVPass(const MachOLinkingContext &context)
+ : _ctx(context), _archHandler(_ctx.archHandler()),
+ _file(*_ctx.make_file<MachOFile>("<mach-o TLV pass>")) {
+ _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
+ }
+ llvm::Error perform(SimpleFile &mergedFile) override {
+ bool allowTLV = _ctx.minOS("10.7", "1.0");
+ for (const DefinedAtom *atom : mergedFile.defined()) {
+ for (const Reference *ref : *atom) {
+ if (!_archHandler.isTLVAccess(*ref))
+ continue;
+ if (!allowTLV)
+ return llvm::make_error<GenericError>(
+ "targeted OS version does not support use of thread local "
+ "variables in " + atom->name() + " for architecture " +
+ _ctx.archName());
+ const Atom *target = ref->target();
+ assert(target != nullptr);
+ const DefinedAtom *tlvpEntry = makeTLVPEntry(target);
+ const_cast<Reference*>(ref)->setTarget(tlvpEntry);
+ _archHandler.updateReferenceToTLV(ref);
+ }
+ }
+ std::vector<const TLVPEntryAtom*> entries;
+ entries.reserve(_targetToTLVP.size());
+ for (auto &it : _targetToTLVP)
+ entries.push_back(it.second);
+ std::sort(entries.begin(), entries.end(),
+ [](const TLVPEntryAtom *lhs, const TLVPEntryAtom *rhs) {
+ return (lhs->slotName().compare(rhs->slotName()) < 0);
+ });
+ for (const TLVPEntryAtom *slot : entries)
+ mergedFile.addAtom(*slot);
+ return llvm::Error::success();
+ }
+ const DefinedAtom *makeTLVPEntry(const Atom *target) {
+ auto pos = _targetToTLVP.find(target);
+ if (pos != _targetToTLVP.end())
+ return pos->second;
+ auto *tlvpEntry = new (_file.allocator())
+ TLVPEntryAtom(_file, _ctx.is64Bit(), target->name());
+ _targetToTLVP[target] = tlvpEntry;
+ const ArchHandler::ReferenceInfo &nlInfo =
+ _archHandler.stubInfo().nonLazyPointerReferenceToBinder;
+ tlvpEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch,
+ nlInfo.kind, 0, target, 0);
+ return tlvpEntry;
+ }
+ const MachOLinkingContext &_ctx;
+ mach_o::ArchHandler &_archHandler;
+ MachOFile &_file;
+ llvm::DenseMap<const Atom*, const TLVPEntryAtom*> _targetToTLVP;
+void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx) {
+ assert(ctx.needsTLVPass());
+ pm.add(llvm::make_unique<TLVPass>(ctx));
+} // end namesapce mach_o
+} // end namesapce lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp
new file mode 100644
index 000000000000..c457e7b55a43
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp
@@ -0,0 +1,71 @@
+//===- lib/ReaderWriter/MachO/WriterMachO.cpp -----------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "ExecutableAtoms.h"
+#include "MachONormalizedFile.h"
+#include "lld/Core/File.h"
+#include "lld/Core/Writer.h"
+#include "lld/ReaderWriter/MachOLinkingContext.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <system_error>
+using lld::mach_o::normalized::NormalizedFile;
+namespace lld {
+namespace mach_o {
+class MachOWriter : public Writer {
+ MachOWriter(const MachOLinkingContext &ctxt) : _ctx(ctxt) {}
+ llvm::Error writeFile(const lld::File &file, StringRef path) override {
+ // Construct empty normalized file from atoms.
+ llvm::Expected<std::unique_ptr<NormalizedFile>> nFile =
+ normalized::normalizedFromAtoms(file, _ctx);
+ if (auto ec = nFile.takeError())
+ return ec;
+ // For testing, write out yaml form of normalized file.
+ if (_ctx.printAtoms()) {
+ std::unique_ptr<Writer> yamlWriter = createWriterYAML(_ctx);
+ if (auto ec = yamlWriter->writeFile(file, "-"))
+ return ec;
+ }
+ // Write normalized file as mach-o binary.
+ return writeBinary(*nFile->get(), path);
+ }
+ void createImplicitFiles(std::vector<std::unique_ptr<File>> &r) override {
+ // When building main executables, add _main as required entry point.
+ if (_ctx.outputTypeHasEntry())
+ r.emplace_back(new CEntryFile(_ctx));
+ // If this can link with dylibs, need helper function (dyld_stub_binder).
+ if (_ctx.needsStubsPass())
+ r.emplace_back(new StubHelperFile(_ctx));
+ // Final linked images can access a symbol for their mach_header.
+ if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT)
+ r.emplace_back(new MachHeaderAliasFile(_ctx));
+ }
+ const MachOLinkingContext &_ctx;
+ };
+} // namespace mach_o
+std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &context) {
+ return std::unique_ptr<Writer>(new lld::mach_o::MachOWriter(context));
+} // namespace lld
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt
new file mode 100644
index 000000000000..0e63574a63d2
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt
@@ -0,0 +1,9 @@
+ ReaderWriterYAML.cpp
+ Support
+ lldCore
+ )
diff --git a/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp
new file mode 100644
index 000000000000..59548684e677
--- /dev/null
+++ b/contrib/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp
@@ -0,0 +1,1404 @@
+//===- lib/ReaderWriter/YAML/ReaderWriterYAML.cpp -------------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "lld/Core/AbsoluteAtom.h"
+#include "lld/Core/ArchiveLibraryFile.h"
+#include "lld/Core/Atom.h"
+#include "lld/Core/DefinedAtom.h"
+#include "lld/Core/Error.h"
+#include "lld/Core/File.h"
+#include "lld/Core/LinkingContext.h"
+#include "lld/Core/Reader.h"
+#include "lld/Core/Reference.h"
+#include "lld/Core/SharedLibraryAtom.h"
+#include "lld/Core/Simple.h"
+#include "lld/Core/UndefinedAtom.h"
+#include "lld/Core/Writer.h"
+#include "lld/ReaderWriter/YamlContext.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+using llvm::file_magic;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::ScalarEnumerationTraits;
+using llvm::yaml::ScalarTraits;
+using llvm::yaml::IO;
+using llvm::yaml::SequenceTraits;
+using llvm::yaml::DocumentListTraits;
+using namespace lld;
+/// The conversion of Atoms to and from YAML uses LLVM's YAML I/O. This
+/// file just defines template specializations on the lld types which control
+/// how the mapping is done to and from YAML.
+namespace {
+/// Used when writing yaml files.
+/// In most cases, atoms names are unambiguous, so references can just
+/// use the atom name as the target (e.g. target: foo). But in a few
+/// cases that does not work, so ref-names are added. These are labels
+/// used only in yaml. The labels do not exist in the Atom model.
+/// One need for ref-names are when atoms have no user supplied name
+/// (e.g. c-string literal). Another case is when two object files with
+/// identically named static functions are merged (ld -r) into one object file.
+/// In that case referencing the function by name is ambiguous, so a unique
+/// ref-name is added.
+class RefNameBuilder {
+ RefNameBuilder(const lld::File &file)
+ : _collisionCount(0), _unnamedCounter(0) {
+ // visit all atoms
+ for (const lld::DefinedAtom *atom : file.defined()) {
+ // Build map of atoms names to detect duplicates
+ if (!atom->name().empty())
+ buildDuplicateNameMap(*atom);
+ // Find references to unnamed atoms and create ref-names for them.
+ for (const lld::Reference *ref : *atom) {
+ // create refname for any unnamed reference target
+ const lld::Atom *target = ref->target();
+ if ((target != nullptr) && target->name().empty()) {
+ std::string storage;
+ llvm::raw_string_ostream buffer(storage);
+ buffer << llvm::format("L%03d", _unnamedCounter++);
+ StringRef newName = copyString(buffer.str());
+ _refNames[target] = newName;
+ llvm::dbgs() << "unnamed atom: creating ref-name: '"
+ << newName << "' ("
+ << (const void *)newName.data() << ", "
+ << newName.size() << ")\n");
+ }
+ }
+ }
+ for (const lld::UndefinedAtom *undefAtom : file.undefined()) {
+ buildDuplicateNameMap(*undefAtom);
+ }
+ for (const lld::SharedLibraryAtom *shlibAtom : file.sharedLibrary()) {
+ buildDuplicateNameMap(*shlibAtom);
+ }
+ for (const lld::AbsoluteAtom *absAtom : file.absolute()) {
+ if (!absAtom->name().empty())
+ buildDuplicateNameMap(*absAtom);
+ }
+ }
+ void buildDuplicateNameMap(const lld::Atom &atom) {
+ assert(!atom.name().empty());
+ NameToAtom::iterator pos = _nameMap.find(atom.name());
+ if (pos != _nameMap.end()) {
+ // Found name collision, give each a unique ref-name.
+ std::string Storage;
+ llvm::raw_string_ostream buffer(Storage);
+ buffer << atom.name() << llvm::format(".%03d", ++_collisionCount);
+ StringRef newName = copyString(buffer.str());
+ _refNames[&atom] = newName;
+ llvm::dbgs() << "name collsion: creating ref-name: '"
+ << newName << "' ("
+ << (const void *)newName.data()
+ << ", " << newName.size() << ")\n");
+ const lld::Atom *prevAtom = pos->second;
+ AtomToRefName::iterator pos2 = _refNames.find(prevAtom);
+ if (pos2 == _refNames.end()) {
+ // Only create ref-name for previous if none already created.
+ std::string Storage2;
+ llvm::raw_string_ostream buffer2(Storage2);
+ buffer2 << prevAtom->name() << llvm::format(".%03d", ++_collisionCount);
+ StringRef newName2 = copyString(buffer2.str());
+ _refNames[prevAtom] = newName2;
+ llvm::dbgs() << "name collsion: creating ref-name: '"
+ << newName2 << "' ("
+ << (const void *)newName2.data() << ", "
+ << newName2.size() << ")\n");
+ }
+ } else {
+ // First time we've seen this name, just add it to map.
+ _nameMap[atom.name()] = &atom;
+ DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs()
+ << "atom name seen for first time: '"
+ << atom.name() << "' ("
+ << (const void *)atom.name().data()
+ << ", " << atom.name().size() << ")\n");
+ }
+ }
+ bool hasRefName(const lld::Atom *atom) { return _refNames.count(atom); }
+ StringRef refName(const lld::Atom *atom) {
+ return _refNames.find(atom)->second;
+ }
+ typedef llvm::StringMap<const lld::Atom *> NameToAtom;
+ typedef llvm::DenseMap<const lld::Atom *, std::string> AtomToRefName;
+ // Allocate a new copy of this string in _storage, so the strings
+ // can be freed when RefNameBuilder is destroyed.
+ StringRef copyString(StringRef str) {
+ char *s = _storage.Allocate<char>(str.size());
+ memcpy(s, str.data(), str.size());
+ return StringRef(s, str.size());
+ }
+ unsigned int _collisionCount;
+ unsigned int _unnamedCounter;
+ NameToAtom _nameMap;
+ AtomToRefName _refNames;
+ llvm::BumpPtrAllocator _storage;
+/// Used when reading yaml files to find the target of a reference
+/// that could be a name or ref-name.
+class RefNameResolver {
+ RefNameResolver(const lld::File *file, IO &io);
+ const lld::Atom *lookup(StringRef name) const {
+ NameToAtom::const_iterator pos = _nameMap.find(name);
+ if (pos != _nameMap.end())
+ return pos->second;
+ _io.setError(Twine("no such atom name: ") + name);
+ return nullptr;
+ }
+ typedef llvm::StringMap<const lld::Atom *> NameToAtom;
+ void add(StringRef name, const lld::Atom *atom) {
+ if (_nameMap.count(name)) {
+ _io.setError(Twine("duplicate atom name: ") + name);
+ } else {
+ _nameMap[name] = atom;
+ }
+ }
+ IO &_io;
+ NameToAtom _nameMap;
+/// Mapping of Atoms.
+template <typename T> class AtomList {
+ using Ty = std::vector<OwningAtomPtr<T>>;
+ typename Ty::iterator begin() { return _atoms.begin(); }
+ typename Ty::iterator end() { return _atoms.end(); }
+ Ty _atoms;
+/// Mapping of kind: field in yaml files.
+enum FileKinds {
+ fileKindObjectAtoms, // atom based object file encoded in yaml
+ fileKindArchive, // static archive library encoded in yaml
+ fileKindObjectMachO // mach-o object files encoded in yaml
+struct ArchMember {
+ FileKinds _kind;
+ StringRef _name;
+ const lld::File *_content;
+// The content bytes in a DefinedAtom are just uint8_t but we want
+// special formatting, so define a strong type.
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, ImplicitHex8)
+// SharedLibraryAtoms have a bool canBeNull() method which we'd like to be
+// more readable than just true/false.
+// lld::Reference::Kind is a tuple of <namespace, arch, value>.
+// For yaml, we just want one string that encapsulates the tuple.
+struct RefKind {
+ Reference::KindNamespace ns;
+ Reference::KindArch arch;
+ Reference::KindValue value;
+} // end anonymous namespace
+LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *)
+// Always write DefinedAtoms content bytes as a flow sequence.
+// for compatibility with gcc-4.7 in C++11 mode, add extra namespace
+namespace llvm {
+namespace yaml {
+// This is a custom formatter for RefKind
+template <> struct ScalarTraits<RefKind> {
+ static void output(const RefKind &kind, void *ctxt, raw_ostream &out) {
+ assert(ctxt != nullptr);
+ YamlContext *info = reinterpret_cast<YamlContext *>(ctxt);
+ assert(info->_registry);
+ StringRef str;
+ if (info->_registry->referenceKindToString(kind.ns, kind.arch, kind.value,
+ str))
+ out << str;
+ else
+ out << (int)(kind.ns) << "-" << (int)(kind.arch) << "-" << kind.value;
+ }
+ static StringRef input(StringRef scalar, void *ctxt, RefKind &kind) {
+ assert(ctxt != nullptr);
+ YamlContext *info = reinterpret_cast<YamlContext *>(ctxt);
+ assert(info->_registry);
+ if (info->_registry->referenceKindFromString(scalar, kind.ns, kind.arch,
+ kind.value))
+ return StringRef();
+ return StringRef("unknown reference kind");
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+template <> struct ScalarEnumerationTraits<lld::File::Kind> {
+ static void enumeration(IO &io, lld::File::Kind &value) {
+ io.enumCase(value, "error-object", lld::File::kindErrorObject);
+ io.enumCase(value, "object", lld::File::kindMachObject);
+ io.enumCase(value, "shared-library", lld::File::kindSharedLibrary);
+ io.enumCase(value, "static-library", lld::File::kindArchiveLibrary);
+ }
+template <> struct ScalarEnumerationTraits<lld::Atom::Scope> {
+ static void enumeration(IO &io, lld::Atom::Scope &value) {
+ io.enumCase(value, "global", lld::Atom::scopeGlobal);
+ io.enumCase(value, "hidden", lld::Atom::scopeLinkageUnit);
+ io.enumCase(value, "static", lld::Atom::scopeTranslationUnit);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::SectionChoice> {
+ static void enumeration(IO &io, lld::DefinedAtom::SectionChoice &value) {
+ io.enumCase(value, "content", lld::DefinedAtom::sectionBasedOnContent);
+ io.enumCase(value, "custom", lld::DefinedAtom::sectionCustomPreferred);
+ io.enumCase(value, "custom-required",
+ lld::DefinedAtom::sectionCustomRequired);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Interposable> {
+ static void enumeration(IO &io, lld::DefinedAtom::Interposable &value) {
+ io.enumCase(value, "no", DefinedAtom::interposeNo);
+ io.enumCase(value, "yes", DefinedAtom::interposeYes);
+ io.enumCase(value, "yes-and-weak", DefinedAtom::interposeYesAndRuntimeWeak);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Merge> {
+ static void enumeration(IO &io, lld::DefinedAtom::Merge &value) {
+ io.enumCase(value, "no", lld::DefinedAtom::mergeNo);
+ io.enumCase(value, "as-tentative", lld::DefinedAtom::mergeAsTentative);
+ io.enumCase(value, "as-weak", lld::DefinedAtom::mergeAsWeak);
+ io.enumCase(value, "as-addressed-weak",
+ lld::DefinedAtom::mergeAsWeakAndAddressUsed);
+ io.enumCase(value, "by-content", lld::DefinedAtom::mergeByContent);
+ io.enumCase(value, "same-name-and-size",
+ lld::DefinedAtom::mergeSameNameAndSize);
+ io.enumCase(value, "largest", lld::DefinedAtom::mergeByLargestSection);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DeadStripKind> {
+ static void enumeration(IO &io, lld::DefinedAtom::DeadStripKind &value) {
+ io.enumCase(value, "normal", lld::DefinedAtom::deadStripNormal);
+ io.enumCase(value, "never", lld::DefinedAtom::deadStripNever);
+ io.enumCase(value, "always", lld::DefinedAtom::deadStripAlways);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DynamicExport> {
+ static void enumeration(IO &io, lld::DefinedAtom::DynamicExport &value) {
+ io.enumCase(value, "normal", lld::DefinedAtom::dynamicExportNormal);
+ io.enumCase(value, "always", lld::DefinedAtom::dynamicExportAlways);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::CodeModel> {
+ static void enumeration(IO &io, lld::DefinedAtom::CodeModel &value) {
+ io.enumCase(value, "none", lld::DefinedAtom::codeNA);
+ io.enumCase(value, "mips-pic", lld::DefinedAtom::codeMipsPIC);
+ io.enumCase(value, "mips-micro", lld::DefinedAtom::codeMipsMicro);
+ io.enumCase(value, "mips-micro-pic", lld::DefinedAtom::codeMipsMicroPIC);
+ io.enumCase(value, "mips-16", lld::DefinedAtom::codeMips16);
+ io.enumCase(value, "arm-thumb", lld::DefinedAtom::codeARMThumb);
+ io.enumCase(value, "arm-a", lld::DefinedAtom::codeARM_a);
+ io.enumCase(value, "arm-d", lld::DefinedAtom::codeARM_d);
+ io.enumCase(value, "arm-t", lld::DefinedAtom::codeARM_t);
+ }
+template <>
+struct ScalarEnumerationTraits<lld::DefinedAtom::ContentPermissions> {
+ static void enumeration(IO &io, lld::DefinedAtom::ContentPermissions &value) {
+ io.enumCase(value, "---", lld::DefinedAtom::perm___);
+ io.enumCase(value, "r--", lld::DefinedAtom::permR__);
+ io.enumCase(value, "r-x", lld::DefinedAtom::permR_X);
+ io.enumCase(value, "rw-", lld::DefinedAtom::permRW_);
+ io.enumCase(value, "rwx", lld::DefinedAtom::permRWX);
+ io.enumCase(value, "rw-l", lld::DefinedAtom::permRW_L);
+ io.enumCase(value, "unknown", lld::DefinedAtom::permUnknown);
+ }
+template <> struct ScalarEnumerationTraits<lld::DefinedAtom::ContentType> {
+ static void enumeration(IO &io, lld::DefinedAtom::ContentType &value) {
+ io.enumCase(value, "unknown", DefinedAtom::typeUnknown);
+ io.enumCase(value, "code", DefinedAtom::typeCode);
+ io.enumCase(value, "stub", DefinedAtom::typeStub);
+ io.enumCase(value, "constant", DefinedAtom::typeConstant);
+ io.enumCase(value, "data", DefinedAtom::typeData);
+ io.enumCase(value, "quick-data", DefinedAtom::typeDataFast);
+ io.enumCase(value, "zero-fill", DefinedAtom::typeZeroFill);
+ io.enumCase(value, "zero-fill-quick", DefinedAtom::typeZeroFillFast);
+ io.enumCase(value, "const-data", DefinedAtom::typeConstData);
+ io.enumCase(value, "got", DefinedAtom::typeGOT);
+ io.enumCase(value, "resolver", DefinedAtom::typeResolver);
+ io.enumCase(value, "branch-island", DefinedAtom::typeBranchIsland);
+ io.enumCase(value, "branch-shim", DefinedAtom::typeBranchShim);
+ io.enumCase(value, "stub-helper", DefinedAtom::typeStubHelper);
+ io.enumCase(value, "c-string", DefinedAtom::typeCString);
+ io.enumCase(value, "utf16-string", DefinedAtom::typeUTF16String);
+ io.enumCase(value, "unwind-cfi", DefinedAtom::typeCFI);
+ io.enumCase(value, "unwind-lsda", DefinedAtom::typeLSDA);
+ io.enumCase(value, "const-4-byte", DefinedAtom::typeLiteral4);
+ io.enumCase(value, "const-8-byte", DefinedAtom::typeLiteral8);
+ io.enumCase(value, "const-16-byte", DefinedAtom::typeLiteral16);
+ io.enumCase(value, "lazy-pointer", DefinedAtom::typeLazyPointer);
+ io.enumCase(value, "lazy-dylib-pointer",
+ DefinedAtom::typeLazyDylibPointer);
+ io.enumCase(value, "cfstring", DefinedAtom::typeCFString);
+ io.enumCase(value, "initializer-pointer",
+ DefinedAtom::typeInitializerPtr);
+ io.enumCase(value, "terminator-pointer",
+ DefinedAtom::typeTerminatorPtr);
+ io.enumCase(value, "c-string-pointer",DefinedAtom::typeCStringPtr);
+ io.enumCase(value, "objc-class-pointer",
+ DefinedAtom::typeObjCClassPtr);
+ io.enumCase(value, "objc-category-list",
+ DefinedAtom::typeObjC2CategoryList);
+ io.enumCase(value, "objc-image-info",
+ DefinedAtom::typeObjCImageInfo);
+ io.enumCase(value, "objc-method-list",
+ DefinedAtom::typeObjCMethodList);
+ io.enumCase(value, "objc-class1", DefinedAtom::typeObjC1Class);
+ io.enumCase(value, "dtraceDOF", DefinedAtom::typeDTraceDOF);
+ io.enumCase(value, "interposing-tuples",
+ DefinedAtom::typeInterposingTuples);
+ io.enumCase(value, "lto-temp", DefinedAtom::typeTempLTO);
+ io.enumCase(value, "compact-unwind", DefinedAtom::typeCompactUnwindInfo);
+ io.enumCase(value, "unwind-info", DefinedAtom::typeProcessedUnwindInfo);
+ io.enumCase(value, "tlv-thunk", DefinedAtom::typeThunkTLV);
+ io.enumCase(value, "tlv-data", DefinedAtom::typeTLVInitialData);
+ io.enumCase(value, "tlv-zero-fill", DefinedAtom::typeTLVInitialZeroFill);
+ io.enumCase(value, "tlv-initializer-ptr",
+ DefinedAtom::typeTLVInitializerPtr);
+ io.enumCase(value, "mach_header", DefinedAtom::typeMachHeader);
+ io.enumCase(value, "dso_handle", DefinedAtom::typeDSOHandle);
+ io.enumCase(value, "sectcreate", DefinedAtom::typeSectCreate);
+ }
+template <> struct ScalarEnumerationTraits<lld::UndefinedAtom::CanBeNull> {
+ static void enumeration(IO &io, lld::UndefinedAtom::CanBeNull &value) {
+ io.enumCase(value, "never", lld::UndefinedAtom::canBeNullNever);
+ io.enumCase(value, "at-runtime", lld::UndefinedAtom::canBeNullAtRuntime);
+ io.enumCase(value, "at-buildtime",lld::UndefinedAtom::canBeNullAtBuildtime);
+ }
+template <> struct ScalarEnumerationTraits<ShlibCanBeNull> {
+ static void enumeration(IO &io, ShlibCanBeNull &value) {
+ io.enumCase(value, "never", false);
+ io.enumCase(value, "at-runtime", true);
+ }
+template <>
+struct ScalarEnumerationTraits<lld::SharedLibraryAtom::Type> {
+ static void enumeration(IO &io, lld::SharedLibraryAtom::Type &value) {
+ io.enumCase(value, "code", lld::SharedLibraryAtom::Type::Code);
+ io.enumCase(value, "data", lld::SharedLibraryAtom::Type::Data);
+ io.enumCase(value, "unknown", lld::SharedLibraryAtom::Type::Unknown);
+ }
+/// This is a custom formatter for lld::DefinedAtom::Alignment. Values look
+/// like:
+/// 8 # 8-byte aligned
+/// 7 mod 16 # 16-byte aligned plus 7 bytes
+template <> struct ScalarTraits<lld::DefinedAtom::Alignment> {
+ static void output(const lld::DefinedAtom::Alignment &value, void *ctxt,
+ raw_ostream &out) {
+ if (value.modulus == 0) {
+ out << llvm::format("%d", value.value);
+ } else {
+ out << llvm::format("%d mod %d", value.modulus, value.value);
+ }
+ }
+ static StringRef input(StringRef scalar, void *ctxt,
+ lld::DefinedAtom::Alignment &value) {
+ value.modulus = 0;
+ size_t modStart = scalar.find("mod");
+ if (modStart != StringRef::npos) {
+ StringRef modStr = scalar.slice(0, modStart);
+ modStr = modStr.rtrim();
+ unsigned int modulus;
+ if (modStr.getAsInteger(0, modulus)) {
+ return "malformed alignment modulus";
+ }
+ value.modulus = modulus;
+ scalar = scalar.drop_front(modStart + 3);
+ scalar = scalar.ltrim();
+ }
+ unsigned int power;
+ if (scalar.getAsInteger(0, power)) {
+ return "malformed alignment power";
+ }
+ value.value = power;
+ if (value.modulus >= power) {
+ return "malformed alignment, modulus too large for power";
+ }
+ return StringRef(); // returning empty string means success
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+template <> struct ScalarEnumerationTraits<FileKinds> {
+ static void enumeration(IO &io, FileKinds &value) {
+ io.enumCase(value, "object", fileKindObjectAtoms);
+ io.enumCase(value, "archive", fileKindArchive);
+ io.enumCase(value, "object-mach-o", fileKindObjectMachO);
+ }
+template <> struct MappingTraits<ArchMember> {
+ static void mapping(IO &io, ArchMember &member) {
+ io.mapOptional("kind", member._kind, fileKindObjectAtoms);
+ io.mapOptional("name", member._name);
+ io.mapRequired("content", member._content);
+ }
+// Declare that an AtomList is a yaml sequence.
+template <typename T> struct SequenceTraits<AtomList<T> > {
+ static size_t size(IO &io, AtomList<T> &seq) { return seq._atoms.size(); }
+ static T *&element(IO &io, AtomList<T> &seq, size_t index) {
+ if (index >= seq._atoms.size())
+ seq._atoms.resize(index + 1);
+ return seq._atoms[index].get();
+ }
+// Declare that an AtomRange is a yaml sequence.
+template <typename T> struct SequenceTraits<File::AtomRange<T> > {
+ static size_t size(IO &io, File::AtomRange<T> &seq) { return seq.size(); }
+ static T *&element(IO &io, File::AtomRange<T> &seq, size_t index) {
+ assert(io.outputting() && "AtomRange only used when outputting");
+ assert(index < seq.size() && "Out of range access");
+ return seq[index].get();
+ }
+// Used to allow DefinedAtom content bytes to be a flow sequence of
+// two-digit hex numbers without the leading 0x (e.g. FF, 04, 0A)
+template <> struct ScalarTraits<ImplicitHex8> {
+ static void output(const ImplicitHex8 &val, void *, raw_ostream &out) {
+ uint8_t num = val;
+ out << llvm::format("%02X", num);
+ }
+ static StringRef input(StringRef str, void *, ImplicitHex8 &val) {
+ unsigned long long n;
+ if (getAsUnsignedInteger(str, 16, n))
+ return "invalid two-digit-hex number";
+ if (n > 0xFF)
+ return "out of range two-digit-hex number";
+ val = n;
+ return StringRef(); // returning empty string means success
+ }
+ static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+// YAML conversion for std::vector<const lld::File*>
+template <> struct DocumentListTraits<std::vector<const lld::File *> > {
+ static size_t size(IO &io, std::vector<const lld::File *> &seq) {
+ return seq.size();
+ }
+ static const lld::File *&element(IO &io, std::vector<const lld::File *> &seq,
+ size_t index) {
+ if (index >= seq.size())
+ seq.resize(index + 1);
+ return seq[index];
+ }
+// YAML conversion for const lld::File*
+template <> struct MappingTraits<const lld::File *> {
+ class NormArchiveFile : public lld::ArchiveLibraryFile {
+ public:
+ NormArchiveFile(IO &io) : ArchiveLibraryFile("") {}
+ NormArchiveFile(IO &io, const lld::File *file)
+ : ArchiveLibraryFile(file->path()), _path(file->path()) {
+ // If we want to support writing archives, this constructor would
+ // need to populate _members.
+ }
+ const lld::File *denormalize(IO &io) { return this; }
+ const AtomRange<lld::DefinedAtom> defined() const override {
+ return _noDefinedAtoms;
+ }
+ const AtomRange<lld::UndefinedAtom> undefined() const override {
+ return _noUndefinedAtoms;
+ }
+ const AtomRange<lld::SharedLibraryAtom> sharedLibrary() const override {
+ return _noSharedLibraryAtoms;
+ }
+ const AtomRange<lld::AbsoluteAtom> absolute() const override {
+ return _noAbsoluteAtoms;
+ }
+ void clearAtoms() override {
+ _noDefinedAtoms.clear();
+ _noUndefinedAtoms.clear();
+ _noSharedLibraryAtoms.clear();
+ _noAbsoluteAtoms.clear();
+ }
+ File *find(StringRef name) override {
+ for (const ArchMember &member : _members)
+ for (const lld::DefinedAtom *atom : member._content->defined())
+ if (name == atom->name())
+ return const_cast<File *>(member._content);
+ return nullptr;
+ }
+ std::error_code
+ parseAllMembers(std::vector<std::unique_ptr<File>> &result) override {
+ return std::error_code();
+ }
+ StringRef _path;
+ std::vector<ArchMember> _members;
+ };
+ class NormalizedFile : public lld::File {
+ public:
+ NormalizedFile(IO &io)
+ : File("", kindNormalizedObject), _io(io), _rnb(nullptr),
+ _definedAtomsRef(_definedAtoms._atoms),
+ _undefinedAtomsRef(_undefinedAtoms._atoms),
+ _sharedLibraryAtomsRef(_sharedLibraryAtoms._atoms),
+ _absoluteAtomsRef(_absoluteAtoms._atoms) {}
+ NormalizedFile(IO &io, const lld::File *file)
+ : File(file->path(), kindNormalizedObject), _io(io),
+ _rnb(new RefNameBuilder(*file)), _path(file->path()),
+ _definedAtomsRef(file->defined()),
+ _undefinedAtomsRef(file->undefined()),
+ _sharedLibraryAtomsRef(file->sharedLibrary()),
+ _absoluteAtomsRef(file->absolute()) {
+ }
+ ~NormalizedFile() override {
+ }
+ const lld::File *denormalize(IO &io);
+ const AtomRange<lld::DefinedAtom> defined() const override {
+ return _definedAtomsRef;
+ }
+ const AtomRange<lld::UndefinedAtom> undefined() const override {
+ return _undefinedAtomsRef;
+ }
+ const AtomRange<lld::SharedLibraryAtom> sharedLibrary() const override {
+ return _sharedLibraryAtomsRef;
+ }
+ const AtomRange<lld::AbsoluteAtom> absolute() const override {
+ return _absoluteAtomsRef;
+ }
+ void clearAtoms() override {
+ _definedAtoms._atoms.clear();
+ _undefinedAtoms._atoms.clear();
+ _sharedLibraryAtoms._atoms.clear();
+ _absoluteAtoms._atoms.clear();
+ }
+ // Allocate a new copy of this string in _storage, so the strings
+ // can be freed when File is destroyed.
+ StringRef copyString(StringRef str) {
+ char *s = _storage.Allocate<char>(str.size());
+ memcpy(s, str.data(), str.size());
+ return StringRef(s, str.size());
+ }
+ IO &_io;
+ std::unique_ptr<RefNameBuilder> _rnb;
+ StringRef _path;
+ AtomList<lld::DefinedAtom> _definedAtoms;
+ AtomList<lld::UndefinedAtom> _undefinedAtoms;
+ AtomList<lld::SharedLibraryAtom> _sharedLibraryAtoms;
+ AtomList<lld::AbsoluteAtom> _absoluteAtoms;
+ AtomRange<lld::DefinedAtom> _definedAtomsRef;
+ AtomRange<lld::UndefinedAtom> _undefinedAtomsRef;
+ AtomRange<lld::SharedLibraryAtom> _sharedLibraryAtomsRef;
+ AtomRange<lld::AbsoluteAtom> _absoluteAtomsRef;
+ llvm::BumpPtrAllocator _storage;
+ };
+ static void mapping(IO &io, const lld::File *&file) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ // Let any register tag handler process this.
+ if (info->_registry && info->_registry->handleTaggedDoc(io, file))
+ return;
+ // If no registered handler claims this tag and there is no tag,
+ // grandfather in as "!native".
+ if (io.mapTag("!native", true) || io.mapTag("tag:yaml.org,2002:map"))
+ mappingAtoms(io, file);
+ }
+ static void mappingAtoms(IO &io, const lld::File *&file) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormalizedFile, const lld::File *>
+ keys(io, file, nullptr);
+ assert(info != nullptr);
+ info->_file = keys.operator->();
+ io.mapOptional("path", keys->_path);
+ if (io.outputting()) {
+ io.mapOptional("defined-atoms", keys->_definedAtomsRef);
+ io.mapOptional("undefined-atoms", keys->_undefinedAtomsRef);
+ io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtomsRef);
+ io.mapOptional("absolute-atoms", keys->_absoluteAtomsRef);
+ } else {
+ io.mapOptional("defined-atoms", keys->_definedAtoms);
+ io.mapOptional("undefined-atoms", keys->_undefinedAtoms);
+ io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtoms);
+ io.mapOptional("absolute-atoms", keys->_absoluteAtoms);
+ }
+ }
+ static void mappingArchive(IO &io, const lld::File *&file) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormArchiveFile, const lld::File *>
+ keys(io, file, &info->_file->allocator());
+ io.mapOptional("path", keys->_path);
+ io.mapOptional("members", keys->_members);
+ }
+// YAML conversion for const lld::Reference*
+template <> struct MappingTraits<const lld::Reference *> {
+ class NormalizedReference : public lld::Reference {
+ public:
+ NormalizedReference(IO &io)
+ : lld::Reference(lld::Reference::KindNamespace::all,
+ lld::Reference::KindArch::all, 0),
+ _target(nullptr), _offset(0), _addend(0), _tag(0) {}
+ NormalizedReference(IO &io, const lld::Reference *ref)
+ : lld::Reference(ref->kindNamespace(), ref->kindArch(),
+ ref->kindValue()),
+ _target(nullptr), _targetName(targetName(io, ref)),
+ _offset(ref->offsetInAtom()), _addend(ref->addend()),
+ _tag(ref->tag()) {
+ _mappedKind.ns = ref->kindNamespace();
+ _mappedKind.arch = ref->kindArch();
+ _mappedKind.value = ref->kindValue();
+ }
+ const lld::Reference *denormalize(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ if (!_targetName.empty())
+ _targetName = f->copyString(_targetName);
+ DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs()
+ << "created Reference to name: '"
+ << _targetName << "' ("
+ << (const void *)_targetName.data()
+ << ", " << _targetName.size() << ")\n");
+ setKindNamespace(_mappedKind.ns);
+ setKindArch(_mappedKind.arch);
+ setKindValue(_mappedKind.value);
+ return this;
+ }
+ void bind(const RefNameResolver &);
+ static StringRef targetName(IO &io, const lld::Reference *ref);
+ uint64_t offsetInAtom() const override { return _offset; }
+ const lld::Atom *target() const override { return _target; }
+ Addend addend() const override { return _addend; }
+ void setAddend(Addend a) override { _addend = a; }
+ void setTarget(const lld::Atom *a) override { _target = a; }
+ const lld::Atom *_target;
+ StringRef _targetName;
+ uint32_t _offset;
+ Addend _addend;
+ RefKind _mappedKind;
+ uint32_t _tag;
+ };
+ static void mapping(IO &io, const lld::Reference *&ref) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormalizedReference, const lld::Reference *> keys(
+ io, ref, &info->_file->allocator());
+ io.mapRequired("kind", keys->_mappedKind);
+ io.mapOptional("offset", keys->_offset);
+ io.mapOptional("target", keys->_targetName);
+ io.mapOptional("addend", keys->_addend, (lld::Reference::Addend)0);
+ io.mapOptional("tag", keys->_tag, 0u);
+ }
+// YAML conversion for const lld::DefinedAtom*
+template <> struct MappingTraits<const lld::DefinedAtom *> {
+ class NormalizedAtom : public lld::DefinedAtom {
+ public:
+ NormalizedAtom(IO &io)
+ : _file(fileFromContext(io)), _contentType(), _alignment(1) {
+ static uint32_t ordinalCounter = 1;
+ _ordinal = ordinalCounter++;
+ }
+ NormalizedAtom(IO &io, const lld::DefinedAtom *atom)
+ : _file(fileFromContext(io)), _name(atom->name()),
+ _scope(atom->scope()), _interpose(atom->interposable()),
+ _merge(atom->merge()), _contentType(atom->contentType()),
+ _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()),
+ _deadStrip(atom->deadStrip()), _dynamicExport(atom->dynamicExport()),
+ _codeModel(atom->codeModel()),
+ _permissions(atom->permissions()), _size(atom->size()),
+ _sectionName(atom->customSectionName()),
+ _sectionSize(atom->sectionSize()) {
+ for (const lld::Reference *r : *atom)
+ _references.push_back(r);
+ if (!atom->occupiesDiskSpace())
+ return;
+ ArrayRef<uint8_t> cont = atom->rawContent();
+ _content.reserve(cont.size());
+ for (uint8_t x : cont)
+ _content.push_back(x);
+ }
+ ~NormalizedAtom() override = default;
+ const lld::DefinedAtom *denormalize(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ if (!_name.empty())
+ _name = f->copyString(_name);
+ if (!_refName.empty())
+ _refName = f->copyString(_refName);
+ if (!_sectionName.empty())
+ _sectionName = f->copyString(_sectionName);
+ llvm::dbgs() << "created DefinedAtom named: '" << _name
+ << "' (" << (const void *)_name.data()
+ << ", " << _name.size() << ")\n");
+ return this;
+ }
+ void bind(const RefNameResolver &);
+ // Extract current File object from YAML I/O parsing context
+ const lld::File &fileFromContext(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ assert(info->_file != nullptr);
+ return *info->_file;
+ }
+ const lld::File &file() const override { return _file; }
+ StringRef name() const override { return _name; }
+ uint64_t size() const override { return _size; }
+ Scope scope() const override { return _scope; }
+ Interposable interposable() const override { return _interpose; }
+ Merge merge() const override { return _merge; }
+ ContentType contentType() const override { return _contentType; }
+ Alignment alignment() const override { return _alignment; }
+ SectionChoice sectionChoice() const override { return _sectionChoice; }
+ StringRef customSectionName() const override { return _sectionName; }
+ uint64_t sectionSize() const override { return _sectionSize; }
+ DeadStripKind deadStrip() const override { return _deadStrip; }
+ DynamicExport dynamicExport() const override { return _dynamicExport; }
+ CodeModel codeModel() const override { return _codeModel; }
+ ContentPermissions permissions() const override { return _permissions; }
+ ArrayRef<uint8_t> rawContent() const override {
+ if (!occupiesDiskSpace())
+ return ArrayRef<uint8_t>();
+ return ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(_content.data()), _content.size());
+ }
+ uint64_t ordinal() const override { return _ordinal; }
+ reference_iterator begin() const override {
+ uintptr_t index = 0;
+ const void *it = reinterpret_cast<const void *>(index);
+ return reference_iterator(*this, it);
+ }
+ reference_iterator end() const override {
+ uintptr_t index = _references.size();
+ const void *it = reinterpret_cast<const void *>(index);
+ return reference_iterator(*this, it);
+ }
+ const lld::Reference *derefIterator(const void *it) const override {
+ uintptr_t index = reinterpret_cast<uintptr_t>(it);
+ assert(index < _references.size());
+ return _references[index];
+ }
+ void incrementIterator(const void *&it) const override {
+ uintptr_t index = reinterpret_cast<uintptr_t>(it);
+ ++index;
+ it = reinterpret_cast<const void *>(index);
+ }
+ void addReference(Reference::KindNamespace ns,
+ Reference::KindArch arch,
+ Reference::KindValue kindValue, uint64_t off,
+ const Atom *target, Reference::Addend a) override {
+ assert(target && "trying to create reference to nothing");
+ auto node = new (file().allocator()) SimpleReference(ns, arch, kindValue,
+ off, target, a);
+ _references.push_back(node);
+ }
+ const lld::File &_file;
+ StringRef _name;
+ StringRef _refName;
+ Scope _scope;
+ Interposable _interpose;
+ Merge _merge;
+ ContentType _contentType;
+ Alignment _alignment;
+ SectionChoice _sectionChoice;
+ DeadStripKind _deadStrip;
+ DynamicExport _dynamicExport;
+ CodeModel _codeModel;
+ ContentPermissions _permissions;
+ uint32_t _ordinal;
+ std::vector<ImplicitHex8> _content;
+ uint64_t _size;
+ StringRef _sectionName;
+ uint64_t _sectionSize;
+ std::vector<const lld::Reference *> _references;
+ };
+ static void mapping(IO &io, const lld::DefinedAtom *&atom) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormalizedAtom, const lld::DefinedAtom *> keys(
+ io, atom, &info->_file->allocator());
+ if (io.outputting()) {
+ // If writing YAML, check if atom needs a ref-name.
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ assert(info != nullptr);
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ assert(f);
+ assert(f->_rnb);
+ if (f->_rnb->hasRefName(atom)) {
+ keys->_refName = f->_rnb->refName(atom);
+ }
+ }
+ io.mapOptional("name", keys->_name, StringRef());
+ io.mapOptional("ref-name", keys->_refName, StringRef());
+ io.mapOptional("scope", keys->_scope,
+ DefinedAtom::scopeTranslationUnit);
+ io.mapOptional("type", keys->_contentType,
+ DefinedAtom::typeCode);
+ io.mapOptional("content", keys->_content);
+ io.mapOptional("size", keys->_size, (uint64_t)keys->_content.size());
+ io.mapOptional("interposable", keys->_interpose,
+ DefinedAtom::interposeNo);
+ io.mapOptional("merge", keys->_merge, DefinedAtom::mergeNo);
+ io.mapOptional("alignment", keys->_alignment,
+ DefinedAtom::Alignment(1));
+ io.mapOptional("section-choice", keys->_sectionChoice,
+ DefinedAtom::sectionBasedOnContent);
+ io.mapOptional("section-name", keys->_sectionName, StringRef());
+ io.mapOptional("section-size", keys->_sectionSize, (uint64_t)0);
+ io.mapOptional("dead-strip", keys->_deadStrip,
+ DefinedAtom::deadStripNormal);
+ io.mapOptional("dynamic-export", keys->_dynamicExport,
+ DefinedAtom::dynamicExportNormal);
+ io.mapOptional("code-model", keys->_codeModel, DefinedAtom::codeNA);
+ // default permissions based on content type
+ io.mapOptional("permissions", keys->_permissions,
+ DefinedAtom::permissions(
+ keys->_contentType));
+ io.mapOptional("references", keys->_references);
+ }
+template <> struct MappingTraits<lld::DefinedAtom *> {
+ static void mapping(IO &io, lld::DefinedAtom *&atom) {
+ const lld::DefinedAtom *atomPtr = atom;
+ MappingTraits<const lld::DefinedAtom *>::mapping(io, atomPtr);
+ atom = const_cast<lld::DefinedAtom *>(atomPtr);
+ }
+// YAML conversion for const lld::UndefinedAtom*
+template <> struct MappingTraits<const lld::UndefinedAtom *> {
+ class NormalizedAtom : public lld::UndefinedAtom {
+ public:
+ NormalizedAtom(IO &io)
+ : _file(fileFromContext(io)), _canBeNull(canBeNullNever) {}
+ NormalizedAtom(IO &io, const lld::UndefinedAtom *atom)
+ : _file(fileFromContext(io)), _name(atom->name()),
+ _canBeNull(atom->canBeNull()) {}
+ ~NormalizedAtom() override = default;
+ const lld::UndefinedAtom *denormalize(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ if (!_name.empty())
+ _name = f->copyString(_name);
+ llvm::dbgs() << "created UndefinedAtom named: '" << _name
+ << "' (" << (const void *)_name.data() << ", "
+ << _name.size() << ")\n");
+ return this;
+ }
+ // Extract current File object from YAML I/O parsing context
+ const lld::File &fileFromContext(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ assert(info->_file != nullptr);
+ return *info->_file;
+ }
+ const lld::File &file() const override { return _file; }
+ StringRef name() const override { return _name; }
+ CanBeNull canBeNull() const override { return _canBeNull; }
+ const lld::File &_file;
+ StringRef _name;
+ CanBeNull _canBeNull;
+ };
+ static void mapping(IO &io, const lld::UndefinedAtom *&atom) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormalizedAtom, const lld::UndefinedAtom *> keys(
+ io, atom, &info->_file->allocator());
+ io.mapRequired("name", keys->_name);
+ io.mapOptional("can-be-null", keys->_canBeNull,
+ lld::UndefinedAtom::canBeNullNever);
+ }
+template <> struct MappingTraits<lld::UndefinedAtom *> {
+ static void mapping(IO &io, lld::UndefinedAtom *&atom) {
+ const lld::UndefinedAtom *atomPtr = atom;
+ MappingTraits<const lld::UndefinedAtom *>::mapping(io, atomPtr);
+ atom = const_cast<lld::UndefinedAtom *>(atomPtr);
+ }
+// YAML conversion for const lld::SharedLibraryAtom*
+template <> struct MappingTraits<const lld::SharedLibraryAtom *> {
+ class NormalizedAtom : public lld::SharedLibraryAtom {
+ public:
+ NormalizedAtom(IO &io)
+ : _file(fileFromContext(io)), _canBeNull(false),
+ _type(Type::Unknown), _size(0) {}
+ NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom)
+ : _file(fileFromContext(io)), _name(atom->name()),
+ _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()),
+ _type(atom->type()), _size(atom->size()) {}
+ ~NormalizedAtom() override = default;
+ const lld::SharedLibraryAtom *denormalize(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ if (!_name.empty())
+ _name = f->copyString(_name);
+ if (!_loadName.empty())
+ _loadName = f->copyString(_loadName);
+ llvm::dbgs() << "created SharedLibraryAtom named: '"
+ << _name << "' ("
+ << (const void *)_name.data()
+ << ", " << _name.size() << ")\n");
+ return this;
+ }
+ // Extract current File object from YAML I/O parsing context
+ const lld::File &fileFromContext(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ assert(info->_file != nullptr);
+ return *info->_file;
+ }
+ const lld::File &file() const override { return _file; }
+ StringRef name() const override { return _name; }
+ StringRef loadName() const override { return _loadName; }
+ bool canBeNullAtRuntime() const override { return _canBeNull; }
+ Type type() const override { return _type; }
+ uint64_t size() const override { return _size; }
+ const lld::File &_file;
+ StringRef _name;
+ StringRef _loadName;
+ ShlibCanBeNull _canBeNull;
+ Type _type;
+ uint64_t _size;
+ };
+ static void mapping(IO &io, const lld::SharedLibraryAtom *&atom) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormalizedAtom, const lld::SharedLibraryAtom *>
+ keys(io, atom, &info->_file->allocator());
+ io.mapRequired("name", keys->_name);
+ io.mapOptional("load-name", keys->_loadName);
+ io.mapOptional("can-be-null", keys->_canBeNull, (ShlibCanBeNull) false);
+ io.mapOptional("type", keys->_type, SharedLibraryAtom::Type::Code);
+ io.mapOptional("size", keys->_size, uint64_t(0));
+ }
+template <> struct MappingTraits<lld::SharedLibraryAtom *> {
+ static void mapping(IO &io, lld::SharedLibraryAtom *&atom) {
+ const lld::SharedLibraryAtom *atomPtr = atom;
+ MappingTraits<const lld::SharedLibraryAtom *>::mapping(io, atomPtr);
+ atom = const_cast<lld::SharedLibraryAtom *>(atomPtr);
+ }
+// YAML conversion for const lld::AbsoluteAtom*
+template <> struct MappingTraits<const lld::AbsoluteAtom *> {
+ class NormalizedAtom : public lld::AbsoluteAtom {
+ public:
+ NormalizedAtom(IO &io)
+ : _file(fileFromContext(io)), _scope(), _value(0) {}
+ NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom)
+ : _file(fileFromContext(io)), _name(atom->name()),
+ _scope(atom->scope()), _value(atom->value()) {}
+ ~NormalizedAtom() override = default;
+ const lld::AbsoluteAtom *denormalize(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ if (!_name.empty())
+ _name = f->copyString(_name);
+ llvm::dbgs() << "created AbsoluteAtom named: '" << _name
+ << "' (" << (const void *)_name.data()
+ << ", " << _name.size() << ")\n");
+ return this;
+ }
+ // Extract current File object from YAML I/O parsing context
+ const lld::File &fileFromContext(IO &io) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ assert(info->_file != nullptr);
+ return *info->_file;
+ }
+ const lld::File &file() const override { return _file; }
+ StringRef name() const override { return _name; }
+ uint64_t value() const override { return _value; }
+ Scope scope() const override { return _scope; }
+ const lld::File &_file;
+ StringRef _name;
+ StringRef _refName;
+ Scope _scope;
+ Hex64 _value;
+ };
+ static void mapping(IO &io, const lld::AbsoluteAtom *&atom) {
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ MappingNormalizationHeap<NormalizedAtom, const lld::AbsoluteAtom *> keys(
+ io, atom, &info->_file->allocator());
+ if (io.outputting()) {
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ assert(f);
+ assert(f->_rnb);
+ if (f->_rnb->hasRefName(atom)) {
+ keys->_refName = f->_rnb->refName(atom);
+ }
+ }
+ io.mapRequired("name", keys->_name);
+ io.mapOptional("ref-name", keys->_refName, StringRef());
+ io.mapOptional("scope", keys->_scope);
+ io.mapRequired("value", keys->_value);
+ }
+template <> struct MappingTraits<lld::AbsoluteAtom *> {
+ static void mapping(IO &io, lld::AbsoluteAtom *&atom) {
+ const lld::AbsoluteAtom *atomPtr = atom;
+ MappingTraits<const lld::AbsoluteAtom *>::mapping(io, atomPtr);
+ atom = const_cast<lld::AbsoluteAtom *>(atomPtr);
+ }
+} // end namespace llvm
+} // end namespace yaml
+RefNameResolver::RefNameResolver(const lld::File *file, IO &io) : _io(io) {
+ typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom
+ NormalizedAtom;
+ for (const lld::DefinedAtom *a : file->defined()) {
+ const auto *na = (const NormalizedAtom *)a;
+ if (!na->_refName.empty())
+ add(na->_refName, a);
+ else if (!na->_name.empty())
+ add(na->_name, a);
+ }
+ for (const lld::UndefinedAtom *a : file->undefined())
+ add(a->name(), a);
+ for (const lld::SharedLibraryAtom *a : file->sharedLibrary())
+ add(a->name(), a);
+ typedef MappingTraits<const lld::AbsoluteAtom *>::NormalizedAtom NormAbsAtom;
+ for (const lld::AbsoluteAtom *a : file->absolute()) {
+ const auto *na = (const NormAbsAtom *)a;
+ if (na->_refName.empty())
+ add(na->_name, a);
+ else
+ add(na->_refName, a);
+ }
+inline const lld::File *
+MappingTraits<const lld::File *>::NormalizedFile::denormalize(IO &io) {
+ typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom
+ NormalizedAtom;
+ RefNameResolver nameResolver(this, io);
+ // Now that all atoms are parsed, references can be bound.
+ for (const lld::DefinedAtom *a : this->defined()) {
+ auto *normAtom = (NormalizedAtom *)const_cast<DefinedAtom *>(a);
+ normAtom->bind(nameResolver);
+ }
+ return this;
+inline void MappingTraits<const lld::DefinedAtom *>::NormalizedAtom::bind(
+ const RefNameResolver &resolver) {
+ typedef MappingTraits<const lld::Reference *>::NormalizedReference
+ NormalizedReference;
+ for (const lld::Reference *ref : _references) {
+ auto *normRef = (NormalizedReference *)const_cast<Reference *>(ref);
+ normRef->bind(resolver);
+ }
+inline void MappingTraits<const lld::Reference *>::NormalizedReference::bind(
+ const RefNameResolver &resolver) {
+ _target = resolver.lookup(_targetName);
+inline StringRef
+MappingTraits<const lld::Reference *>::NormalizedReference::targetName(
+ IO &io, const lld::Reference *ref) {
+ if (ref->target() == nullptr)
+ return StringRef();
+ YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext());
+ assert(info != nullptr);
+ typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile;
+ NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file);
+ RefNameBuilder &rnb = *f->_rnb;
+ if (rnb.hasRefName(ref->target()))
+ return rnb.refName(ref->target());
+ return ref->target()->name();
+namespace lld {
+namespace yaml {
+class Writer : public lld::Writer {
+ Writer(const LinkingContext &context) : _ctx(context) {}
+ llvm::Error writeFile(const lld::File &file, StringRef outPath) override {
+ // Create stream to path.
+ std::error_code ec;
+ llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::F_Text);
+ if (ec)
+ return llvm::errorCodeToError(ec);
+ // Create yaml Output writer, using yaml options for context.
+ YamlContext yamlContext;
+ yamlContext._ctx = &_ctx;
+ yamlContext._registry = &_ctx.registry();
+ llvm::yaml::Output yout(out, &yamlContext);
+ // Write yaml output.
+ const lld::File *fileRef = &file;
+ yout << fileRef;
+ return llvm::Error::success();
+ }
+ const LinkingContext &_ctx;
+} // end namespace yaml
+namespace {
+/// Handles !native tagged yaml documents.
+class NativeYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
+ bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override {
+ if (io.mapTag("!native")) {
+ MappingTraits<const lld::File *>::mappingAtoms(io, file);
+ return true;
+ }
+ return false;
+ }
+/// Handles !archive tagged yaml documents.
+class ArchiveYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
+ bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override {
+ if (io.mapTag("!archive")) {
+ MappingTraits<const lld::File *>::mappingArchive(io, file);
+ return true;
+ }
+ return false;
+ }
+class YAMLReader : public Reader {
+ YAMLReader(const Registry &registry) : _registry(registry) {}
+ bool canParse(file_magic magic, MemoryBufferRef mb) const override {
+ StringRef name = mb.getBufferIdentifier();
+ return name.endswith(".objtxt") || name.endswith(".yaml");
+ }
+ ErrorOr<std::unique_ptr<File>>
+ loadFile(std::unique_ptr<MemoryBuffer> mb,
+ const class Registry &) const override {
+ // Create YAML Input Reader.
+ YamlContext yamlContext;
+ yamlContext._registry = &_registry;
+ yamlContext._path = mb->getBufferIdentifier();
+ llvm::yaml::Input yin(mb->getBuffer(), &yamlContext);
+ // Fill vector with File objects created by parsing yaml.
+ std::vector<const lld::File *> createdFiles;
+ yin >> createdFiles;
+ assert(createdFiles.size() == 1);
+ // Error out now if there were parsing errors.
+ if (yin.error())
+ return make_error_code(lld::YamlReaderError::illegal_value);
+ std::shared_ptr<MemoryBuffer> smb(mb.release());
+ const File *file = createdFiles[0];
+ // Note: loadFile() should return vector of *const* File
+ File *f = const_cast<File *>(file);
+ f->setLastError(std::error_code());
+ f->setSharedMemoryBuffer(smb);
+ return std::unique_ptr<File>(f);
+ }
+ const Registry &_registry;
+} // end anonymous namespace
+void Registry::addSupportYamlFiles() {
+ add(std::unique_ptr<Reader>(new YAMLReader(*this)));
+ add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
+ new NativeYamlIOTaggedDocumentHandler()));
+ add(std::unique_ptr<YamlIOTaggedDocumentHandler>(
+ new ArchiveYamlIOTaggedDocumentHandler()));
+std::unique_ptr<Writer> createWriterYAML(const LinkingContext &context) {
+ return std::unique_ptr<Writer>(new lld::yaml::Writer(context));
+} // end namespace lld
diff --git a/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt b/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt
new file mode 100644
index 000000000000..d8829493fc22
--- /dev/null
+++ b/contrib/llvm/tools/lld/tools/lld/CMakeLists.txt
@@ -0,0 +1,27 @@
+ Support
+ )
+ lld.cpp
+ )
+ lldCOFF
+ lldDriver
+ lldELF
+ lldMinGW
+ lldWasm
+ )
+install(TARGETS lld
+ set(LLD_SYMLINKS_TO_CREATE lld-link ld.lld ld64.lld wasm-ld)
+foreach(link ${LLD_SYMLINKS_TO_CREATE})
+ add_lld_symlink(${link} lld)
diff --git a/contrib/llvm/tools/lld/tools/lld/lld.cpp b/contrib/llvm/tools/lld/tools/lld/lld.cpp
new file mode 100644
index 000000000000..4ba0b258e8d3
--- /dev/null
+++ b/contrib/llvm/tools/lld/tools/lld/lld.cpp
@@ -0,0 +1,148 @@
+//===- tools/lld/lld.cpp - Linker Driver Dispatcher -----------------------===//
+// The LLVM Linker
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file contains the main function of the lld executable. The main
+// function is a thin wrapper which dispatches to the platform specific
+// driver.
+// lld is a single executable that contains four different linkers for ELF,
+// COFF, WebAssembly and Mach-O. The main function dispatches according to
+// argv[0] (i.e. command name). The most common name for each target is shown
+// below:
+// - ld.lld: ELF (Unix)
+// - ld64: Mach-O (macOS)
+// - lld-link: COFF (Windows)
+// - ld-wasm: WebAssembly
+// lld can be invoked as "lld" along with "-flavor" option. This is for
+// backward compatibility and not recommended.
+#include "lld/Common/Driver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/Path.h"
+#include <cstdlib>
+using namespace lld;
+using namespace llvm;
+using namespace llvm::sys;
+enum Flavor {
+ Invalid,
+ Gnu, // -flavor gnu
+ WinLink, // -flavor link
+ Darwin, // -flavor darwin
+ Wasm, // -flavor wasm
+LLVM_ATTRIBUTE_NORETURN static void die(const Twine &S) {
+ errs() << S << "\n";
+ exit(1);
+static Flavor getFlavor(StringRef S) {
+ return StringSwitch<Flavor>(S)
+ .CasesLower("ld", "ld.lld", "gnu", Gnu)
+ .CasesLower("wasm", "ld-wasm", Wasm)
+ .CaseLower("link", WinLink)
+ .CasesLower("ld64", "ld64.lld", "darwin", Darwin)
+ .Default(Invalid);
+static bool isPETarget(const std::vector<const char *> &V) {
+ for (auto It = V.begin(); It + 1 != V.end(); ++It) {
+ if (StringRef(*It) != "-m")
+ continue;
+ StringRef S = *(It + 1);
+ return S == "i386pe" || S == "i386pep" || S == "thumb2pe" || S == "arm64pe";
+ }
+ return false;
+static Flavor parseProgname(StringRef Progname) {
+#if __APPLE__
+ // Use Darwin driver for "ld" on Darwin.
+ if (Progname == "ld")
+ return Darwin;
+ // Use GNU driver for "ld" on other Unix-like system.
+ if (Progname == "ld")
+ return Gnu;
+ // Progname may be something like "lld-gnu". Parse it.
+ SmallVector<StringRef, 3> V;
+ Progname.split(V, "-");
+ for (StringRef S : V)
+ if (Flavor F = getFlavor(S))
+ return F;
+ return Invalid;
+static Flavor parseFlavor(std::vector<const char *> &V) {
+ // Parse -flavor option.
+ if (V.size() > 1 && V[1] == StringRef("-flavor")) {
+ if (V.size() <= 2)
+ die("missing arg value for '-flavor'");
+ Flavor F = getFlavor(V[2]);
+ if (F == Invalid)
+ die("Unknown flavor: " + StringRef(V[2]));
+ V.erase(V.begin() + 1, V.begin() + 3);
+ return F;
+ }
+ // Deduct the flavor from argv[0].
+ StringRef Arg0 = path::filename(V[0]);
+ if (Arg0.endswith_lower(".exe"))
+ Arg0 = Arg0.drop_back(4);
+ return parseProgname(Arg0);
+// If this function returns true, lld calls _exit() so that it quickly
+// exits without invoking destructors of globally allocated objects.
+// We don't want to do that if we are running tests though, because
+// doing that breaks leak sanitizer. So, lit sets this environment variable,
+// and we use it to detect whether we are running tests or not.
+static bool canExitEarly() { return StringRef(getenv("LLD_IN_TEST")) != "1"; }
+/// Universal linker main(). This linker emulates the gnu, darwin, or
+/// windows linker based on the argv[0] or -flavor option.
+int main(int Argc, const char **Argv) {
+ InitLLVM X(Argc, Argv);
+ std::vector<const char *> Args(Argv, Argv + Argc);
+#ifdef __FreeBSD__
+ return !elf::link(Args, true);
+ switch (parseFlavor(Args)) {
+ case Gnu:
+ if (isPETarget(Args))
+ return !mingw::link(Args);
+ return !elf::link(Args, canExitEarly());
+ case WinLink:
+ return !coff::link(Args, canExitEarly());
+ case Darwin:
+ return !mach_o::link(Args, canExitEarly());
+ case Wasm:
+ return !wasm::link(Args, canExitEarly());
+ default:
+ die("lld is a generic driver.\n"
+ "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
+ " (WebAssembly) instead");
+ }