diff options
Diffstat (limited to 'llvm/tools/llvm-objcopy')
28 files changed, 1327 insertions, 323 deletions
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp index b172fae527eb..43ec2b1fa82f 100644 --- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp @@ -89,6 +89,43 @@ static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { IMAGE_SCN_MEM_DISCARDABLE); } +static void setSectionFlags(Section &Sec, SectionFlag AllFlags) { + // Need to preserve alignment flags. + const uint32_t PreserveMask = + IMAGE_SCN_ALIGN_1BYTES | IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_ALIGN_4BYTES | + IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_ALIGN_16BYTES | + IMAGE_SCN_ALIGN_32BYTES | IMAGE_SCN_ALIGN_64BYTES | + IMAGE_SCN_ALIGN_128BYTES | IMAGE_SCN_ALIGN_256BYTES | + IMAGE_SCN_ALIGN_512BYTES | IMAGE_SCN_ALIGN_1024BYTES | + IMAGE_SCN_ALIGN_2048BYTES | IMAGE_SCN_ALIGN_4096BYTES | + IMAGE_SCN_ALIGN_8192BYTES; + + // Setup new section characteristics based on the flags provided in command + // line. + uint32_t NewCharacteristics = + (Sec.Header.Characteristics & PreserveMask) | IMAGE_SCN_MEM_READ; + + if ((AllFlags & SectionFlag::SecAlloc) && !(AllFlags & SectionFlag::SecLoad)) + NewCharacteristics |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; + if (AllFlags & SectionFlag::SecNoload) + NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; + if (!(AllFlags & SectionFlag::SecReadonly)) + NewCharacteristics |= IMAGE_SCN_MEM_WRITE; + if (AllFlags & SectionFlag::SecDebug) + NewCharacteristics |= + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_DISCARDABLE; + if (AllFlags & SectionFlag::SecCode) + NewCharacteristics |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; + if (AllFlags & SectionFlag::SecData) + NewCharacteristics |= IMAGE_SCN_CNT_INITIALIZED_DATA; + if (AllFlags & SectionFlag::SecShare) + NewCharacteristics |= IMAGE_SCN_MEM_SHARED; + if (AllFlags & SectionFlag::SecExclude) + NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; + + Sec.Header.Characteristics = NewCharacteristics; +} + static Error handleArgs(const CopyConfig &Config, Object &Obj) { // Perform the actual section removals. Obj.removeSections([&Config](const Section &Sec) { @@ -178,6 +215,13 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return false; }); + if (!Config.SetSectionFlags.empty()) + for (Section &Sec : Obj.getMutableSections()) { + const auto It = Config.SetSectionFlags.find(Sec.Name); + if (It != Config.SetSectionFlags.end()) + setSectionFlags(Sec, It->second.NewFlags); + } + for (const auto &Flag : Config.AddSection) { StringRef SecName, FileName; std::tie(SecName, FileName) = Flag.split("="); @@ -205,10 +249,11 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || - !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc || - Config.StripSections || Config.Weaken || Config.DecompressDebugSections || + !Config.SetSectionAlignment.empty() || Config.ExtractDWO || + Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO || + Config.StripNonAlloc || Config.StripSections || + Config.StripSwiftSymbols || Config.Weaken || + Config.DecompressDebugSections || Config.DiscardMode == DiscardType::Locals || !Config.SymbolsToAdd.empty() || Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/tools/llvm-objcopy/COFF/Reader.cpp index 7be9cce2be3d..d1beacb3bd67 100644 --- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp +++ b/llvm/tools/llvm-objcopy/COFF/Reader.cpp @@ -45,9 +45,9 @@ Error COFFReader::readExecutableHeaders(Object &Obj) const { } for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) { - const data_directory *Dir; - if (auto EC = COFFObj.getDataDirectory(I, Dir)) - return errorCodeToError(EC); + const data_directory *Dir = COFFObj.getDataDirectory(I); + if (!Dir) + return errorCodeToError(object_error::parse_failed); Obj.DataDirectories.emplace_back(*Dir); } return Error::success(); @@ -57,9 +57,10 @@ Error COFFReader::readSections(Object &Obj) const { std::vector<Section> Sections; // Section indexing starts from 1. for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) { - const coff_section *Sec; - if (auto EC = COFFObj.getSection(I, Sec)) - return errorCodeToError(EC); + Expected<const coff_section *> SecOrErr = COFFObj.getSection(I); + if (!SecOrErr) + return SecOrErr.takeError(); + const coff_section *Sec = *SecOrErr; Sections.push_back(Section()); Section &S = Sections.back(); S.Header = *Sec; @@ -99,8 +100,10 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const { else copySymbol(Sym.Sym, *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr())); - if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name)) - return errorCodeToError(EC); + auto NameOrErr = COFFObj.getSymbolName(SymRef); + if (!NameOrErr) + return NameOrErr.takeError(); + Sym.Name = *NameOrErr; ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef); size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16); diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp index e35e0474a36d..6b560890a4c1 100644 --- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp +++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp @@ -383,6 +383,16 @@ Error COFFWriter::write(bool IsBigObj) { return Buf.commit(); } +Expected<uint32_t> COFFWriter::virtualAddressToFileAddress(uint32_t RVA) { + for (const auto &S : Obj.getSections()) { + if (RVA >= S.Header.VirtualAddress && + RVA < S.Header.VirtualAddress + S.Header.SizeOfRawData) + return S.Header.PointerToRawData + RVA - S.Header.VirtualAddress; + } + return createStringError(object_error::parse_failed, + "debug directory payload not found"); +} + // Locate which sections contain the debug directories, iterate over all // the debug_directory structs in there, and set the PointerToRawData field // in all of them, according to their new physical location in the file. @@ -406,10 +416,17 @@ Error COFFWriter::patchDebugDirectory() { uint8_t *End = Ptr + Dir->Size; while (Ptr < End) { debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr); - Debug->PointerToRawData = - S.Header.PointerToRawData + Offset + sizeof(debug_directory); - Ptr += sizeof(debug_directory) + Debug->SizeOfData; - Offset += sizeof(debug_directory) + Debug->SizeOfData; + if (!Debug->AddressOfRawData) + return createStringError(object_error::parse_failed, + "debug directory payload outside of " + "mapped sections not supported"); + if (Expected<uint32_t> FilePosOrErr = + virtualAddressToFileAddress(Debug->AddressOfRawData)) + Debug->PointerToRawData = *FilePosOrErr; + else + return FilePosOrErr.takeError(); + Ptr += sizeof(debug_directory); + Offset += sizeof(debug_directory); } // Debug directory found and patched, all done. return Error::success(); diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h index 681a8d5e4a66..3c0bdcbd5d6f 100644 --- a/llvm/tools/llvm-objcopy/COFF/Writer.h +++ b/llvm/tools/llvm-objcopy/COFF/Writer.h @@ -45,6 +45,7 @@ class COFFWriter { Error write(bool IsBigObj); Error patchDebugDirectory(); + Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA); public: virtual ~COFFWriter() {} diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp index 73ed00b5cb2a..1fde54dd290a 100644 --- a/llvm/tools/llvm-objcopy/CopyConfig.cpp +++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp @@ -146,6 +146,7 @@ static SectionFlag parseSectionRenameFlag(StringRef SectionName) { .CaseLower("strings", SectionFlag::SecStrings) .CaseLower("contents", SectionFlag::SecContents) .CaseLower("share", SectionFlag::SecShare) + .CaseLower("exclude", SectionFlag::SecExclude) .Default(SectionFlag::SecNone); } @@ -158,8 +159,8 @@ parseSectionFlagSet(ArrayRef<StringRef> SectionFlags) { return createStringError( errc::invalid_argument, "unrecognized section flag '%s'. Flags supported for GNU " - "compatibility: alloc, load, noload, readonly, debug, code, data, " - "rom, share, contents, merge, strings", + "compatibility: alloc, load, noload, readonly, exclude, debug, " + "code, data, rom, share, contents, merge, strings", Flag.str().c_str()); ParsedFlags |= ParsedFlag; } @@ -272,6 +273,7 @@ static const StringMap<MachineInfo> TargetMap{ // SPARC {"elf32-sparc", {ELF::EM_SPARC, false, false}}, {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, + {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, }; static Expected<TargetInfo> @@ -391,9 +393,30 @@ template <class T> static ErrorOr<T> getAsInteger(StringRef Val) { return Result; } +namespace { + +enum class ToolType { Objcopy, Strip, InstallNameTool }; + +} // anonymous namespace + static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS, - StringRef ToolName) { - OptTable.PrintHelp(OS, (ToolName + " input [output]").str().c_str(), + ToolType Tool) { + StringRef HelpText, ToolName; + switch (Tool) { + case ToolType::Objcopy: + ToolName = "llvm-objcopy"; + HelpText = " [options] input [output]"; + break; + case ToolType::Strip: + ToolName = "llvm-strip"; + HelpText = " [options] inputs..."; + break; + case ToolType::InstallNameTool: + ToolName = "llvm-install-name-tool"; + HelpText = " [options] input"; + break; + } + OptTable.PrintHelp(OS, (ToolName + HelpText).str().c_str(), (ToolName + " tool").str().c_str()); // TODO: Replace this with libOption call once it adds extrahelp support. // The CommandLine library has a cl::extrahelp class to support this, @@ -414,12 +437,12 @@ parseObjcopyOptions(ArrayRef<const char *> ArgsArr, T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { - printHelp(T, errs(), "llvm-objcopy"); + printHelp(T, errs(), ToolType::Objcopy); exit(1); } if (InputArgs.hasArg(OBJCOPY_help)) { - printHelp(T, outs(), "llvm-objcopy"); + printHelp(T, outs(), ToolType::Objcopy); exit(0); } @@ -665,8 +688,10 @@ parseObjcopyOptions(ArrayRef<const char *> ArgsArr, Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols); Config.DecompressDebugSections = InputArgs.hasArg(OBJCOPY_decompress_debug_sections); - if (Config.DiscardMode == DiscardType::All) + if (Config.DiscardMode == DiscardType::All) { Config.StripDebug = true; + Config.KeepFileSymbols = true; + } for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) @@ -802,13 +827,20 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) { llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); + if (MissingArgumentCount) + return createStringError( + errc::invalid_argument, + "missing argument to " + + StringRef(InputArgs.getArgString(MissingArgumentIndex)) + + " option"); + if (InputArgs.size() == 0) { - printHelp(T, errs(), "llvm-install-name-tool"); + printHelp(T, errs(), ToolType::InstallNameTool); exit(1); } if (InputArgs.hasArg(INSTALL_NAME_TOOL_help)) { - printHelp(T, outs(), "llvm-install-name-tool"); + printHelp(T, outs(), ToolType::InstallNameTool); exit(0); } @@ -822,6 +854,61 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) { for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath)) Config.RPathToAdd.push_back(Arg->getValue()); + for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) { + StringRef RPath = Arg->getValue(); + + // Cannot add and delete the same rpath at the same time. + if (is_contained(Config.RPathToAdd, RPath)) + return createStringError( + errc::invalid_argument, + "cannot specify both -add_rpath %s and -delete_rpath %s", + RPath.str().c_str(), RPath.str().c_str()); + + Config.RPathsToRemove.insert(RPath); + } + + for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_rpath)) { + StringRef Old = Arg->getValue(0); + StringRef New = Arg->getValue(1); + + auto Match = [=](StringRef RPath) { return RPath == Old || RPath == New; }; + + // Cannot specify duplicate -rpath entries + auto It1 = find_if( + Config.RPathsToUpdate, + [&Match](const DenseMap<StringRef, StringRef>::value_type &OldNew) { + return Match(OldNew.getFirst()) || Match(OldNew.getSecond()); + }); + if (It1 != Config.RPathsToUpdate.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -rpath " + It1->getFirst() + + " " + It1->getSecond() + " and -rpath " + + Old + " " + New); + + // Cannot specify the same rpath under both -delete_rpath and -rpath + auto It2 = find_if(Config.RPathsToRemove, Match); + if (It2 != Config.RPathsToRemove.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -delete_rpath " + *It2 + + " and -rpath " + Old + " " + New); + + // Cannot specify the same rpath under both -add_rpath and -rpath + auto It3 = find_if(Config.RPathToAdd, Match); + if (It3 != Config.RPathToAdd.end()) + return createStringError(errc::invalid_argument, + "cannot specify both -add_rpath " + *It3 + + " and -rpath " + Old + " " + New); + + Config.RPathsToUpdate.insert({Old, New}); + } + + if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) + Config.SharedLibId = Arg->getValue(); + + for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change)) { + Config.InstallNamesToUpdate.insert({Arg->getValue(0), Arg->getValue(1)}); + } + SmallVector<StringRef, 2> Positional; for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN)) return createStringError(errc::invalid_argument, "unknown argument '%s'", @@ -853,12 +940,12 @@ parseStripOptions(ArrayRef<const char *> ArgsArr, T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { - printHelp(T, errs(), "llvm-strip"); + printHelp(T, errs(), ToolType::Strip); exit(1); } if (InputArgs.hasArg(STRIP_help)) { - printHelp(T, outs(), "llvm-strip"); + printHelp(T, outs(), ToolType::Strip); exit(0); } @@ -908,6 +995,7 @@ parseStripOptions(ArrayRef<const char *> ArgsArr, if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu); + Config.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols); Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug); Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); @@ -936,8 +1024,10 @@ parseStripOptions(ArrayRef<const char *> ArgsArr, !Config.StripAllGNU && Config.SymbolsToRemove.empty()) Config.StripAll = true; - if (Config.DiscardMode == DiscardType::All) + if (Config.DiscardMode == DiscardType::All) { Config.StripDebug = true; + Config.KeepFileSymbols = true; + } Config.DeterministicArchives = InputArgs.hasFlag(STRIP_enable_deterministic_archives, diff --git a/llvm/tools/llvm-objcopy/CopyConfig.h b/llvm/tools/llvm-objcopy/CopyConfig.h index c262934b4a41..1341dd674c7b 100644 --- a/llvm/tools/llvm-objcopy/CopyConfig.h +++ b/llvm/tools/llvm-objcopy/CopyConfig.h @@ -12,6 +12,7 @@ #include "ELF/ELFConfig.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -69,7 +70,8 @@ enum SectionFlag { SecStrings = 1 << 9, SecContents = 1 << 10, SecShare = 1 << 11, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare) + SecExclude = 1 << 12, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/SecExclude) }; struct SectionRename { @@ -176,6 +178,12 @@ struct CopyConfig { std::vector<StringRef> DumpSection; std::vector<StringRef> SymbolsToAdd; std::vector<StringRef> RPathToAdd; + DenseMap<StringRef, StringRef> RPathsToUpdate; + DenseMap<StringRef, StringRef> InstallNamesToUpdate; + DenseSet<StringRef> RPathsToRemove; + + // install-name-tool's id option + Optional<StringRef> SharedLibId; // Section matchers NameMatcher KeepSection; @@ -218,6 +226,7 @@ struct CopyConfig { bool StripDebug = false; bool StripNonAlloc = false; bool StripSections = false; + bool StripSwiftSymbols = false; bool StripUnneeded = false; bool Weaken = false; bool DecompressDebugSections = false; diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index a0cfd9a5ff86..66953f9ef0d5 100644 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -11,7 +11,6 @@ #include "CopyConfig.h" #include "Object.h" #include "llvm-objcopy.h" - #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" @@ -32,6 +31,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -83,6 +83,8 @@ uint64_t getNewShfFlags(SectionFlag AllFlags) { NewFlags |= ELF::SHF_MERGE; if (AllFlags & SectionFlag::SecStrings) NewFlags |= ELF::SHF_STRINGS; + if (AllFlags & SectionFlag::SecExclude) + NewFlags |= ELF::SHF_EXCLUDE; return NewFlags; } @@ -90,10 +92,11 @@ static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags, uint64_t NewFlags) { // Preserve some flags which should not be dropped when setting flags. // Also, preserve anything OS/processor dependant. - const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE | - ELF::SHF_GROUP | ELF::SHF_LINK_ORDER | - ELF::SHF_MASKOS | ELF::SHF_MASKPROC | - ELF::SHF_TLS | ELF::SHF_INFO_LINK; + const uint64_t PreserveMask = + (ELF::SHF_COMPRESSED | ELF::SHF_GROUP | ELF::SHF_LINK_ORDER | + ELF::SHF_MASKOS | ELF::SHF_MASKPROC | ELF::SHF_TLS | + ELF::SHF_INFO_LINK) & + ~ELF::SHF_EXCLUDE; return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask); } @@ -267,7 +270,7 @@ static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader, auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); }; - if (Error E = DWOFile->removeSections(Config.AllowBrokenLinks, + if (Error E = DWOFile->removeSections(Config.AllowBrokenLinks, OnlyKeepDWOPred)) return E; if (Config.OutputArch) { @@ -285,7 +288,7 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename, Object &Obj) { for (auto &Sec : Obj.sections()) { if (Sec.Name == SecName) { - if (Sec.OriginalData.empty()) + if (Sec.Type == SHT_NOBITS) return createStringError(object_error::parse_failed, "cannot dump section '%s': it has no contents", SecName.str().c_str()); @@ -387,7 +390,7 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { const auto I = Config.SymbolsToRename.find(Sym.Name); if (I != Config.SymbolsToRename.end()) - Sym.Name = I->getValue(); + Sym.Name = std::string(I->getValue()); if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); @@ -417,6 +420,9 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { if (Config.StripAll || Config.StripAllGNU) return true; + if (Config.StripDebug && Sym.Type == STT_FILE) + return true; + if (Config.SymbolsToRemove.matches(Sym.Name)) return true; @@ -572,11 +578,11 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { } if (Config.CompressionType != DebugCompressionType::None) - replaceDebugSections(Obj, RemovePred, isCompressable, + replaceDebugSections(Obj, RemovePred, isCompressable, [&Config, &Obj](const SectionBase *S) { return &Obj.addSection<CompressedSection>( *S, Config.CompressionType); - }); + }); else if (Config.DecompressDebugSections) replaceDebugSections( Obj, RemovePred, @@ -598,7 +604,9 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { // system. The only priority is that keeps/copies overrule removes. static Error handleArgs(const CopyConfig &Config, Object &Obj, const Reader &Reader, ElfType OutputElfType) { - + if (Config.StripSwiftSymbols) + return createStringError(llvm::errc::invalid_argument, + "option not supported by llvm-objcopy for ELF"); if (!Config.SplitDWO.empty()) if (Error E = splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType)) @@ -609,6 +617,15 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, Obj.OSABI = Config.OutputArch.getValue().OSABI; } + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + // It is important to remove the sections first. For example, we want to // remove the relocation sections before removing the symbols. That allows // us to avoid reporting the inappropriate errors about removing symbols @@ -624,7 +641,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, const auto Iter = Config.SectionsToRename.find(Sec.Name); if (Iter != Config.SectionsToRename.end()) { const SectionRename &SR = Iter->second; - Sec.Name = SR.NewName; + Sec.Name = std::string(SR.NewName); if (SR.NewFlags.hasValue()) setSectionFlagsAndType(Sec, SR.NewFlags.getValue()); } @@ -717,18 +734,16 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, NewSection.Type = SHT_NOTE; } - for (const auto &Flag : Config.DumpSection) { - std::pair<StringRef, StringRef> SecPair = Flag.split("="); - StringRef SecName = SecPair.first; - StringRef File = SecPair.second; - if (Error E = dumpSectionToFile(SecName, File, Obj)) - return E; - } - if (!Config.AddGnuDebugLink.empty()) Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink, Config.GnuDebugLinkCRC32); + // If the symbol table was previously removed, we need to create a new one + // before adding new symbols. + if (!Obj.SymbolTable && !Config.ELF->SymbolsToAdd.empty()) { + Obj.addNewSymbolTable(); + } + for (const NewSymbolInfo &SI : Config.ELF->SymbolsToAdd) { SectionBase *Sec = Obj.findSection(SI.SectionName); uint64_t Value = Sec ? Sec->Addr + SI.Value : SI.Value; diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp index ad53c75663ec..e15fb24f4c42 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -65,6 +65,7 @@ void SectionBase::finalize() {} void SectionBase::markSymbols() {} void SectionBase::replaceSectionReferences( const DenseMap<SectionBase *, SectionBase *> &) {} +void SectionBase::onRemove() {} template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) { uint8_t *B = Buf.getBufferStart() + Sec.HeaderOffset; @@ -111,7 +112,9 @@ void ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) { template <class ELFT> void ELFSectionSizer<ELFT>::visit(GnuDebugLinkSection &Sec) {} -template <class ELFT> void ELFSectionSizer<ELFT>::visit(GroupSection &Sec) {} +template <class ELFT> void ELFSectionSizer<ELFT>::visit(GroupSection &Sec) { + Sec.Size = sizeof(Elf_Word) + Sec.GroupMembers.size() * sizeof(Elf_Word); +} template <class ELFT> void ELFSectionSizer<ELFT>::visit(SectionIndexSection &Sec) {} @@ -605,6 +608,7 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { if (Machine == EM_HEXAGON) { switch (Index) { case SHN_HEXAGON_SCOMMON: + case SHN_HEXAGON_SCOMMON_1: case SHN_HEXAGON_SCOMMON_2: case SHN_HEXAGON_SCOMMON_4: case SHN_HEXAGON_SCOMMON_8: @@ -741,7 +745,7 @@ void SymbolTableSection::prepareForLayout() { // Reserve proper amount of space in section index table, so we can // layout sections correctly. We will fill the table with correct // indexes later in fillShdnxTable. - if (SectionIndexTable) + if (SectionIndexTable) SectionIndexTable->reserve(Symbols.size()); // Add all of our strings to SymbolNames so that SymbolNames has the right @@ -963,8 +967,24 @@ Error Section::removeSectionReferences( } void GroupSection::finalize() { - this->Info = Sym->Index; - this->Link = SymTab->Index; + this->Info = Sym ? Sym->Index : 0; + this->Link = SymTab ? SymTab->Index : 0; +} + +Error GroupSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) { + if (ToRemove(SymTab)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "section '.symtab' cannot be removed because it is " + "referenced by the group section '%s'", + this->Name.data()); + SymTab = nullptr; + Sym = nullptr; + } + llvm::erase_if(GroupMembers, ToRemove); + return Error::success(); } Error GroupSection::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) { @@ -988,6 +1008,13 @@ void GroupSection::replaceSectionReferences( Sec = To; } +void GroupSection::onRemove() { + // As the header section of the group is removed, drop the Group flag in its + // former members. + for (SectionBase *Sec : GroupMembers) + Sec->Flags &= ~SHF_GROUP; +} + void Section::initialize(SectionTableRef SecTable) { if (Link == ELF::SHN_UNDEF) return; @@ -1101,14 +1128,6 @@ static bool compareSegmentsByOffset(const Segment *A, const Segment *B) { return A->Index < B->Index; } -static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) { - if (A->PAddr < B->PAddr) - return true; - if (A->PAddr > B->PAddr) - return false; - return A->Index < B->Index; -} - void BasicELFBuilder::initFileHeader() { Obj->Flags = 0x0; Obj->Type = ET_REL; @@ -1241,7 +1260,7 @@ std::unique_ptr<Object> IHexELFBuilder::build() { template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) { for (Segment &Parent : Obj.segments()) { // Every segment will overlap with itself but we don't want a segment to - // be it's own parent so we avoid that situation. + // be its own parent so we avoid that situation. if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) { // We want a canonical "most parental" segment but this requires // inspecting the ParentSegment. @@ -1330,18 +1349,20 @@ void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) { error("invalid alignment " + Twine(GroupSec->Align) + " of group section '" + GroupSec->Name + "'"); SectionTableRef SecTable = Obj.sections(); - auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>( - GroupSec->Link, - "link field value '" + Twine(GroupSec->Link) + "' in section '" + - GroupSec->Name + "' is invalid", - "link field value '" + Twine(GroupSec->Link) + "' in section '" + - GroupSec->Name + "' is not a symbol table"); - Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info); - if (!Sym) - error("info field value '" + Twine(GroupSec->Info) + "' in section '" + - GroupSec->Name + "' is not a valid symbol index"); - GroupSec->setSymTab(SymTab); - GroupSec->setSymbol(Sym); + if (GroupSec->Link != SHN_UNDEF) { + auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>( + GroupSec->Link, + "link field value '" + Twine(GroupSec->Link) + "' in section '" + + GroupSec->Name + "' is invalid", + "link field value '" + Twine(GroupSec->Link) + "' in section '" + + GroupSec->Name + "' is not a symbol table"); + Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info); + if (!Sym) + error("info field value '" + Twine(GroupSec->Info) + "' in section '" + + GroupSec->Name + "' is not a valid symbol index"); + GroupSec->setSymTab(SymTab); + GroupSec->setSymbol(Sym); + } if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) || GroupSec->Contents.empty()) error("the content of the section " + GroupSec->Name + " is malformed"); @@ -1520,7 +1541,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() { continue; } auto &Sec = makeSection(Shdr); - Sec.Name = unwrapOrError(ElfFile.getSectionName(&Shdr)); + Sec.Name = std::string(unwrapOrError(ElfFile.getSectionName(&Shdr))); Sec.Type = Sec.OriginalType = Shdr.sh_type; Sec.Flags = Sec.OriginalFlags = Shdr.sh_flags; Sec.Addr = Shdr.sh_addr; @@ -1567,27 +1588,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readSections(bool EnsureSymtab) { Obj.SymbolTable->initialize(Obj.sections()); initSymbolTable(Obj.SymbolTable); } else if (EnsureSymtab) { - // Reuse an existing SHT_STRTAB section if it exists. - StringTableSection *StrTab = nullptr; - for (auto &Sec : Obj.sections()) { - if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { - StrTab = static_cast<StringTableSection *>(&Sec); - - // Prefer a string table that is not the section header string table, if - // such a table exists. - if (Obj.SectionNames != &Sec) - break; - } - } - if (!StrTab) - StrTab = &Obj.addSection<StringTableSection>(); - - SymbolTableSection &SymTab = Obj.addSection<SymbolTableSection>(); - SymTab.Name = ".symtab"; - SymTab.Link = StrTab->Index; - SymTab.initialize(Obj.sections()); - SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); - Obj.SymbolTable = &SymTab; + Obj.addNewSymbolTable(); } // Now that all sections and symbols have been added we can add @@ -1846,6 +1847,7 @@ Error Object::removeSections(bool AllowBrokenLinks, for (auto &RemoveSec : make_range(Iter, std::end(Sections))) { for (auto &Segment : Segments) Segment->removeSection(RemoveSec.get()); + RemoveSec->onRemove(); RemoveSections.insert(RemoveSec.get()); } @@ -1878,6 +1880,33 @@ Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) { return Error::success(); } +void Object::addNewSymbolTable() { + assert(!SymbolTable && "Object must not has a SymbolTable."); + + // Reuse an existing SHT_STRTAB section if it exists. + StringTableSection *StrTab = nullptr; + for (SectionBase &Sec : sections()) { + if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { + StrTab = static_cast<StringTableSection *>(&Sec); + + // Prefer a string table that is not the section header string table, if + // such a table exists. + if (SectionNames != &Sec) + break; + } + } + if (!StrTab) + StrTab = &addSection<StringTableSection>(); + + SymbolTableSection &SymTab = addSection<SymbolTableSection>(); + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + SymTab.initialize(sections()); + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + + SymbolTable = &SymTab; +} + void Object::sortSections() { // Use stable_sort to maintain the original ordering as closely as possible. llvm::stable_sort(Sections, [](const SecPtr &A, const SecPtr &B) { @@ -1902,8 +1931,7 @@ static void orderSegments(std::vector<Segment *> &Segments) { // returns an Offset one past the end of the last segment. static uint64_t layoutSegments(std::vector<Segment *> &Segments, uint64_t Offset) { - assert(std::is_sorted(std::begin(Segments), std::end(Segments), - compareSegmentsByOffset)); + assert(llvm::is_sorted(Segments, compareSegmentsByOffset)); // The only way a segment should move is if a section was between two // segments and that section was removed. If that section isn't in a segment // then it's acceptable, but not ideal, to simply move it to after the @@ -2225,56 +2253,29 @@ Error BinaryWriter::write() { } Error BinaryWriter::finalize() { - // We need a temporary list of segments that has a special order to it - // so that we know that anytime ->ParentSegment is set that segment has - // already had it's offset properly set. We only want to consider the segments - // that will affect layout of allocated sections so we only add those. - std::vector<Segment *> OrderedSegments; - for (const SectionBase &Sec : Obj.allocSections()) - if (Sec.ParentSegment != nullptr) - OrderedSegments.push_back(Sec.ParentSegment); - - // For binary output, we're going to use physical addresses instead of - // virtual addresses, since a binary output is used for cases like ROM - // loading and physical addresses are intended for ROM loading. - // However, if no segment has a physical address, we'll fallback to using - // virtual addresses for all. - if (all_of(OrderedSegments, - [](const Segment *Seg) { return Seg->PAddr == 0; })) - for (Segment *Seg : OrderedSegments) - Seg->PAddr = Seg->VAddr; - - llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr); - - // Because we add a ParentSegment for each section we might have duplicate - // segments in OrderedSegments. If there were duplicates then layoutSegments - // would do very strange things. - auto End = - std::unique(std::begin(OrderedSegments), std::end(OrderedSegments)); - OrderedSegments.erase(End, std::end(OrderedSegments)); - // Compute the section LMA based on its sh_offset and the containing segment's - // p_offset and p_paddr. Also compute the minimum LMA of all sections as - // MinAddr. In the output, the contents between address 0 and MinAddr will be - // skipped. + // p_offset and p_paddr. Also compute the minimum LMA of all non-empty + // sections as MinAddr. In the output, the contents between address 0 and + // MinAddr will be skipped. uint64_t MinAddr = UINT64_MAX; for (SectionBase &Sec : Obj.allocSections()) { if (Sec.ParentSegment != nullptr) Sec.Addr = Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr; - MinAddr = std::min(MinAddr, Sec.Addr); + if (Sec.Size > 0) + MinAddr = std::min(MinAddr, Sec.Addr); } // Now that every section has been laid out we just need to compute the total // file size. This might not be the same as the offset returned by // layoutSections, because we want to truncate the last segment to the end of - // its last section, to match GNU objcopy's behaviour. + // its last non-empty section, to match GNU objcopy's behaviour. TotalSize = 0; - for (SectionBase &Sec : Obj.allocSections()) { - Sec.Offset = Sec.Addr - MinAddr; - if (Sec.Type != SHT_NOBITS) + for (SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) { + Sec.Offset = Sec.Addr - MinAddr; TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); - } + } if (Error E = Buf.allocate(TotalSize)) return E; diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h index 97702a66bc47..ed89e916b838 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ b/llvm/tools/llvm-objcopy/ELF/Object.h @@ -424,6 +424,8 @@ public: virtual void markSymbols(); virtual void replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &); + // Notify the section that it is subject to removal. + virtual void onRemove(); }; class Segment { @@ -799,10 +801,14 @@ public: void accept(SectionVisitor &) const override; void accept(MutableSectionVisitor &Visitor) override; void finalize() override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove) override; Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; void markSymbols() override; void replaceSectionReferences( const DenseMap<SectionBase *, SectionBase *> &FromTo) override; + void onRemove() override; static bool classof(const SectionBase *S) { return S->OriginalType == ELF::SHT_GROUP; @@ -1066,6 +1072,7 @@ public: Ptr->Index = Sections.size(); return *Ptr; } + void addNewSymbolTable(); Segment &addSegment(ArrayRef<uint8_t> Data) { Segments.emplace_back(std::make_unique<Segment>(Data)); return *Segments.back(); diff --git a/llvm/tools/llvm-objcopy/InstallNameToolOpts.td b/llvm/tools/llvm-objcopy/InstallNameToolOpts.td index 35047a57994c..04ffe62c42fc 100644 --- a/llvm/tools/llvm-objcopy/InstallNameToolOpts.td +++ b/llvm/tools/llvm-objcopy/InstallNameToolOpts.td @@ -18,5 +18,17 @@ def h : Flag<["-"], "h">, Alias<help>; def add_rpath : Option<["-", "--"], "add_rpath", KIND_SEPARATE>, HelpText<"Add new rpath">; +def delete_rpath: Option<["-", "--"], "delete_rpath", KIND_SEPARATE>, + HelpText<"Delete specified rpath">; + +def rpath: MultiArg<["-", "--"], "rpath", 2>, + HelpText<"Change rpath path name">; + +def id : Option<["-","--"], "id", KIND_SEPARATE>, + HelpText<"Change dynamic shared library id">; + +def change: MultiArg<["-", "--"], "change", 2>, + HelpText<"Change dependent shared library install name">; + def version : Flag<["--"], "version">, HelpText<"Print the version and exit.">; diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp index 380f2e989fe4..256c830a44a4 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -17,7 +17,7 @@ namespace macho { uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { uint32_t Size = 0; - for (const auto &LC : O.LoadCommands) { + for (const LoadCommand &LC : O.LoadCommands) { const MachO::macho_load_command &MLC = LC.MachOLoadCommand; auto cmd = MLC.load_command_data.cmd; switch (cmd) { @@ -61,15 +61,16 @@ void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); // Make sure that nlist entries in the symbol table are sorted by the those // types. The order is: local < defined external < undefined external. - assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(), - [](const std::unique_ptr<SymbolEntry> &A, - const std::unique_ptr<SymbolEntry> &B) { - bool AL = A->isLocalSymbol(), BL = B->isLocalSymbol(); - if (AL != BL) - return AL; - return !AL && !A->isUndefinedSymbol() && - B->isUndefinedSymbol(); - }) && + assert(llvm::is_sorted(O.SymTable.Symbols, + [](const std::unique_ptr<SymbolEntry> &A, + const std::unique_ptr<SymbolEntry> &B) { + bool AL = A->isLocalSymbol(), + BL = B->isLocalSymbol(); + if (AL != BL) + return AL; + return !AL && !A->isUndefinedSymbol() && + B->isUndefinedSymbol(); + }) && "Symbols are not sorted by their types."); uint32_t NumLocalSymbols = 0; @@ -107,7 +108,7 @@ uint64_t MachOLayoutBuilder::layoutSegments() { const bool IsObjectFile = O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; - for (auto &LC : O.LoadCommands) { + for (LoadCommand &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; StringRef Segname; uint64_t SegmentVmAddr; @@ -142,30 +143,30 @@ uint64_t MachOLayoutBuilder::layoutSegments() { uint64_t SegOffset = Offset; uint64_t SegFileSize = 0; uint64_t VMSize = 0; - for (auto &Sec : LC.Sections) { + for (std::unique_ptr<Section> &Sec : LC.Sections) { + assert(SegmentVmAddr <= Sec->Addr && + "Section's address cannot be smaller than Segment's one"); + uint32_t SectOffset = Sec->Addr - SegmentVmAddr; if (IsObjectFile) { - if (Sec.isVirtualSection()) { - Sec.Offset = 0; + if (Sec->isVirtualSection()) { + Sec->Offset = 0; } else { uint64_t PaddingSize = - offsetToAlignment(SegFileSize, Align(1ull << Sec.Align)); - Sec.Offset = SegOffset + SegFileSize + PaddingSize; - Sec.Size = Sec.Content.size(); - SegFileSize += PaddingSize + Sec.Size; + offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); + Sec->Offset = SegOffset + SegFileSize + PaddingSize; + Sec->Size = Sec->Content.size(); + SegFileSize += PaddingSize + Sec->Size; } - VMSize = std::max(VMSize, Sec.Addr + Sec.Size); } else { - if (Sec.isVirtualSection()) { - Sec.Offset = 0; - VMSize += Sec.Size; + if (Sec->isVirtualSection()) { + Sec->Offset = 0; } else { - uint32_t SectOffset = Sec.Addr - SegmentVmAddr; - Sec.Offset = SegOffset + SectOffset; - Sec.Size = Sec.Content.size(); - SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size); - VMSize = std::max(VMSize, SegFileSize); + Sec->Offset = SegOffset + SectOffset; + Sec->Size = Sec->Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); } } + VMSize = std::max(VMSize, SectOffset + Sec->Size); } if (IsObjectFile) { @@ -204,21 +205,33 @@ uint64_t MachOLayoutBuilder::layoutSegments() { } uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { - for (auto &LC : O.LoadCommands) - for (auto &Sec : LC.Sections) { - Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; - Sec.NReloc = Sec.Relocations.size(); - Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) { + Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; + Sec->NReloc = Sec->Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; } return Offset; } Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // If we are building the layout of an executable or dynamic library + // which does not have any segments other than __LINKEDIT, + // the Offset can be equal to zero by this time. It happens because of the + // convention that in such cases the file offsets specified by LC_SEGMENT + // start with zero (unlike the case of a relocatable object file). + const uint64_t HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || + Offset >= HeaderSize + O.Header.SizeOfCmds) && + "Incorrect tail offset"); + Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); + // The order of LINKEDIT elements is as follows: // rebase info, binding info, weak binding info, lazy binding info, export // trie, data-in-code, symbol table, indirect symbol table, symbol table - // strings. + // strings, code signature. uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); uint64_t StartOfLinkEdit = Offset; uint64_t StartOfRebaseInfo = StartOfLinkEdit; @@ -237,8 +250,10 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { uint64_t StartOfSymbolStrings = StartOfIndirectSymbols + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t StartOfCodeSignature = + StartOfSymbolStrings + StrTableBuilder.getSize(); uint64_t LinkEditSize = - (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit; + (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit; // Now we have determined the layout of the contents of the __LINKEDIT // segment. Update its load command. @@ -260,10 +275,14 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { } } - for (auto &LC : O.LoadCommands) { + for (LoadCommand &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; auto cmd = MLC.load_command_data.cmd; switch (cmd) { + case MachO::LC_CODE_SIGNATURE: + MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; + MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size(); + break; case MachO::LC_SYMTAB: MLC.symtab_command_data.symoff = StartOfSymbols; MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); @@ -314,6 +333,19 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { O.Exports.Trie.empty() ? 0 : StartOfExportTrie; MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); break; + // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in + // <mach-o/loader.h> is not an offset in the binary file, instead, it is a + // relative virtual address. At the moment modification of the __TEXT + // segment of executables isn't supported anyway (e.g. data in code entries + // are not recalculated). Moreover, in general + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because + // without making additional assumptions (e.g. that the entire __TEXT + // segment should be encrypted) we do not know how to recalculate the + // boundaries of the encrypted part. For now just copy over these load + // commands until we encounter a real world usecase where + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. + case MachO::LC_ENCRYPTION_INFO: + case MachO::LC_ENCRYPTION_INFO_64: case MachO::LC_LOAD_DYLINKER: case MachO::LC_MAIN: case MachO::LC_RPATH: @@ -326,6 +358,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { case MachO::LC_BUILD_VERSION: case MachO::LC_ID_DYLIB: case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: case MachO::LC_UUID: case MachO::LC_SOURCE_VERSION: // Nothing to update. diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 4578d0bb75d4..5ca5b133572b 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -10,6 +10,7 @@ #include "../CopyConfig.h" #include "MachOReader.h" #include "MachOWriter.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -18,21 +19,44 @@ namespace objcopy { namespace macho { using namespace object; -using SectionPred = std::function<bool(const Section &Sec)>; +using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>; +using LoadCommandPred = std::function<bool(const LoadCommand &LC)>; + +#ifndef NDEBUG +static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { + // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and + // LC_LAZY_LOAD_DYLIB + return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; +} +#endif + +static StringRef getPayloadString(const LoadCommand &LC) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + return StringRef(reinterpret_cast<const char *>(LC.Payload.data()), + LC.Payload.size()) + .rtrim('\0'); +} -static void removeSections(const CopyConfig &Config, Object &Obj) { - SectionPred RemovePred = [](const Section &) { return false; }; +static Error removeSections(const CopyConfig &Config, Object &Obj) { + SectionPred RemovePred = [](const std::unique_ptr<Section> &) { + return false; + }; if (!Config.ToRemove.empty()) { - RemovePred = [&Config, RemovePred](const Section &Sec) { - return Config.ToRemove.matches(Sec.CanonicalName); + RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) { + return Config.ToRemove.matches(Sec->CanonicalName); }; } if (Config.StripAll || Config.StripDebug) { // Remove all debug sections. - RemovePred = [RemovePred](const Section &Sec) { - if (Sec.Segname == "__DWARF") + RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) { + if (Sec->Segname == "__DWARF") return true; return RemovePred(Sec); @@ -41,8 +65,8 @@ static void removeSections(const CopyConfig &Config, Object &Obj) { if (!Config.OnlySection.empty()) { // Overwrite RemovePred because --only-section takes priority. - RemovePred = [&Config](const Section &Sec) { - return !Config.OnlySection.matches(Sec.CanonicalName); + RemovePred = [&Config](const std::unique_ptr<Section> &Sec) { + return !Config.OnlySection.matches(Sec->CanonicalName); }; } @@ -60,41 +84,158 @@ static void updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { for (SymbolEntry &Sym : Obj.SymTable) { auto I = Config.SymbolsToRename.find(Sym.Name); if (I != Config.SymbolsToRename.end()) - Sym.Name = I->getValue(); + Sym.Name = std::string(I->getValue()); } - auto RemovePred = [Config](const std::unique_ptr<SymbolEntry> &N) { + auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) { if (N->Referenced) return false; - return Config.StripAll; + if (Config.StripAll) + return true; + if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) + return true; + // This behavior is consistent with cctools' strip. + if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) && + Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol()) + return true; + return false; }; Obj.SymTable.removeSymbols(RemovePred); } +template <typename LCType> +static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); + + LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; + LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); + std::copy(S.begin(), S.end(), LC.Payload.begin()); +} + static LoadCommand buildRPathLoadCommand(StringRef Path) { LoadCommand LC; MachO::rpath_command RPathLC; RPathLC.cmd = MachO::LC_RPATH; RPathLC.path = sizeof(MachO::rpath_command); - RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size(), 8); + RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); LC.MachOLoadCommand.rpath_command_data = RPathLC; LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); std::copy(Path.begin(), Path.end(), LC.Payload.begin()); return LC; } +static Error processLoadCommands(const CopyConfig &Config, Object &Obj) { + // Remove RPaths. + DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(), + Config.RPathsToRemove.end()); + + LoadCommandPred RemovePred = [&RPathsToRemove](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { + StringRef RPath = getPayloadString(LC); + if (RPathsToRemove.count(RPath)) { + RPathsToRemove.erase(RPath); + return true; + } + } + return false; + }; + + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + + // Emit an error if the Mach-O binary does not contain an rpath path name + // specified in -delete_rpath. + for (StringRef RPath : Config.RPathsToRemove) { + if (RPathsToRemove.count(RPath)) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: %s", + RPath.str().c_str()); + } + + DenseSet<StringRef> RPaths; + + // Get all existing RPaths. + for (LoadCommand &LC : Obj.LoadCommands) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) + RPaths.insert(getPayloadString(LC)); + } + + // Throw errors for invalid RPaths. + for (const auto &OldNew : Config.RPathsToUpdate) { + StringRef Old = OldNew.getFirst(); + StringRef New = OldNew.getSecond(); + if (RPaths.count(Old) == 0) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: " + Old); + if (RPaths.count(New) != 0) + return createStringError(errc::invalid_argument, + "rpath " + New + + " would create a duplicate load command"); + } + + // Update load commands. + for (LoadCommand &LC : Obj.LoadCommands) { + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_ID_DYLIB: + if (Config.SharedLibId) { + StringRef Id = Config.SharedLibId.getValue(); + if (Id.empty()) + return createStringError(errc::invalid_argument, + "cannot specify an empty id"); + updateLoadCommandPayloadString<MachO::dylib_command>(LC, Id); + } + break; + + case MachO::LC_RPATH: { + StringRef RPath = getPayloadString(LC); + StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath); + if (!NewRPath.empty()) + updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath); + break; + } + + // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB + // here once llvm-objcopy supports them. + case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: + StringRef InstallName = getPayloadString(LC); + StringRef NewInstallName = + Config.InstallNamesToUpdate.lookup(InstallName); + if (!NewInstallName.empty()) + updateLoadCommandPayloadString<MachO::dylib_command>(LC, + NewInstallName); + break; + } + } + + // Add new RPaths. + for (StringRef RPath : Config.RPathToAdd) { + if (RPaths.count(RPath) != 0) + return createStringError(errc::invalid_argument, + "rpath " + RPath + + " would create a duplicate load command"); + RPaths.insert(RPath); + Obj.addLoadCommand(buildRPathLoadCommand(RPath)); + } + + return Error::success(); +} + static Error dumpSectionToFile(StringRef SecName, StringRef Filename, Object &Obj) { for (LoadCommand &LC : Obj.LoadCommands) - for (Section &Sec : LC.Sections) { - if (Sec.CanonicalName == SecName) { + for (const std::unique_ptr<Section> &Sec : LC.Sections) { + if (Sec->CanonicalName == SecName) { Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(Filename, Sec.Content.size()); + FileOutputBuffer::create(Filename, Sec->Content.size()); if (!BufferOrErr) return BufferOrErr.takeError(); std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); - llvm::copy(Sec.Content, Buf->getBufferStart()); + llvm::copy(Sec->Content, Buf->getBufferStart()); if (Error E = Buf->commit()) return E; @@ -122,7 +263,7 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) { for (LoadCommand &LC : Obj.LoadCommands) { Optional<StringRef> SegName = LC.getSegmentName(); if (SegName && SegName == TargetSegName) { - LC.Sections.push_back(Sec); + LC.Sections.push_back(std::make_unique<Section>(Sec)); return Error::success(); } } @@ -130,7 +271,7 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) { // There's no segment named TargetSegName. Create a new load command and // Insert a new section into it. LoadCommand &NewSegment = Obj.addSegment(TargetSegName); - NewSegment.Sections.push_back(Sec); + NewSegment.Sections.push_back(std::make_unique<Section>(Sec)); return Error::success(); } @@ -167,17 +308,27 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { !Config.SectionsToRename.empty() || !Config.UnneededSymbolsToRemove.empty() || !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripAllGNU || Config.StripDWO || - Config.StripNonAlloc || Config.StripSections || Config.Weaken || - Config.DecompressDebugSections || Config.StripNonAlloc || - Config.StripSections || Config.StripUnneeded || - Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() || - Config.EntryExpr) { + Config.ExtractDWO || Config.LocalizeHidden || Config.PreserveDates || + Config.StripAllGNU || Config.StripDWO || Config.StripNonAlloc || + Config.StripSections || Config.Weaken || Config.DecompressDebugSections || + Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded || + Config.DiscardMode == DiscardType::Locals || + !Config.SymbolsToAdd.empty() || Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, "option not supported by llvm-objcopy for MachO"); } - removeSections(Config, Obj); + + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + + if (Error E = removeSections(Config, Obj)) + return E; // Mark symbols to determine which symbols are still needed. if (Config.StripAll) @@ -187,16 +338,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { if (Config.StripAll) for (LoadCommand &LC : Obj.LoadCommands) - for (Section &Sec : LC.Sections) - Sec.Relocations.clear(); - - for (const StringRef &Flag : Config.DumpSection) { - std::pair<StringRef, StringRef> SecPair = Flag.split("="); - StringRef SecName = SecPair.first; - StringRef File = SecPair.second; - if (Error E = dumpSectionToFile(SecName, File, Obj)) - return E; - } + for (std::unique_ptr<Section> &Sec : LC.Sections) + Sec->Relocations.clear(); for (const auto &Flag : Config.AddSection) { std::pair<StringRef, StringRef> SecPair = Flag.split("="); @@ -208,19 +351,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return E; } - for (StringRef RPath : Config.RPathToAdd) { - for (LoadCommand &LC : Obj.LoadCommands) { - if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH && - RPath == StringRef(reinterpret_cast<char *>(LC.Payload.data()), - LC.Payload.size()) - .trim(0)) { - return createStringError(errc::invalid_argument, - "rpath " + RPath + - " would create a duplicate load command"); - } - } - Obj.addLoadCommand(buildRPathLoadCommand(RPath)); - } + if (Error E = processLoadCommands(Config, Obj)) + return E; + return Error::success(); } @@ -237,9 +370,18 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, if (Error E = handleArgs(Config, *O)) return createFileError(Config.InputFilename, std::move(E)); - // TODO: Support 16KB pages which are employed in iOS arm64 binaries: - // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb - const uint64_t PageSize = 4096; + // Page size used for alignment of segment sizes in Mach-O executables and + // dynamic libraries. + uint64_t PageSize; + switch (In.getArch()) { + case Triple::ArchType::arm: + case Triple::ArchType::aarch64: + case Triple::ArchType::aarch64_32: + PageSize = 16384; + break; + default: + PageSize = 4096; + } MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); if (auto E = Writer.finalize()) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index 46bb11727322..99bcec7f6b51 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -28,10 +28,11 @@ void MachOReader::readHeader(Object &O) const { } template <typename SectionType> -Section constructSectionCommon(SectionType Sec) { +Section constructSectionCommon(SectionType Sec, uint32_t Index) { StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); Section S(SegName, SectName); + S.Index = Index; S.Addr = Sec.addr; S.Size = Sec.size; S.Offset = Sec.offset; @@ -45,39 +46,42 @@ Section constructSectionCommon(SectionType Sec) { return S; } -template <typename SectionType> Section constructSection(SectionType Sec); +template <typename SectionType> +Section constructSection(SectionType Sec, uint32_t Index); -template <> Section constructSection(MachO::section Sec) { - return constructSectionCommon(Sec); +template <> Section constructSection(MachO::section Sec, uint32_t Index) { + return constructSectionCommon(Sec, Index); } -template <> Section constructSection(MachO::section_64 Sec) { - Section S = constructSectionCommon(Sec); +template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) { + Section S = constructSectionCommon(Sec, Index); S.Reserved3 = Sec.reserved3; return S; } // TODO: get rid of reportError and make MachOReader return Expected<> instead. template <typename SectionType, typename SegmentType> -std::vector<Section> +std::vector<std::unique_ptr<Section>> extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, const object::MachOObjectFile &MachOObj, - size_t &NextSectionIndex) { + uint32_t &NextSectionIndex) { auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; const SectionType *Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); - std::vector<Section> Sections; + std::vector<std::unique_ptr<Section>> Sections; for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { SectionType Sec; memcpy((void *)&Sec, Curr, sizeof(SectionType)); MachO::swapStruct(Sec); - Sections.push_back(constructSection(Sec)); + Sections.push_back( + std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); } else { - Sections.push_back(constructSection(*Curr)); + Sections.push_back( + std::make_unique<Section>(constructSection(*Curr, NextSectionIndex))); } - Section &S = Sections.back(); + Section &S = *Sections.back(); Expected<object::SectionRef> SecRef = MachOObj.getSection(NextSectionIndex++); @@ -99,6 +103,7 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, R.Symbol = nullptr; // We'll fill this field later. R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); R.Scattered = MachOObj.isRelocationScattered(R.Info); + R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); S.Relocations.push_back(R); } @@ -110,10 +115,13 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, void MachOReader::readLoadCommands(Object &O) const { // For MachO sections indices start from 1. - size_t NextSectionIndex = 1; + uint32_t NextSectionIndex = 1; for (auto LoadCmd : MachOObj.load_commands()) { LoadCommand LC; switch (LoadCmd.C.cmd) { + case MachO::LC_CODE_SIGNATURE: + O.CodeSignatureCommandIndex = O.LoadCommands.size(); + break; case MachO::LC_SEGMENT: LC.Sections = extractSections<MachO::section, MachO::segment_command>( LoadCmd, MachOObj, NextSectionIndex); @@ -189,24 +197,36 @@ void MachOReader::readSymbolTable(Object &O) const { for (auto Symbol : MachOObj.symbols()) { SymbolEntry SE = (MachOObj.is64Bit() - ? constructSymbolEntry( - StrTable, - MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) - : constructSymbolEntry( - StrTable, - MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); + ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( + Symbol.getRawDataRefImpl())) + : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( + Symbol.getRawDataRefImpl()))); O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); } } void MachOReader::setSymbolInRelocationInfo(Object &O) const { + std::vector<const Section *> Sections; for (auto &LC : O.LoadCommands) - for (auto &Sec : LC.Sections) - for (auto &Reloc : Sec.Relocations) + for (std::unique_ptr<Section> &Sec : LC.Sections) + Sections.push_back(Sec.get()); + + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) + for (auto &Reloc : Sec->Relocations) if (!Reloc.Scattered) { - auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info); - Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum); + const uint32_t SymbolNum = + Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); + if (Reloc.Extern) { + Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); + } else { + // FIXME: Refactor error handling in MachOReader and report an error + // if we encounter an invalid relocation. + assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && + "Invalid section index."); + Reloc.Sec = Sections[SymbolNum - 1]; + } } } @@ -230,26 +250,26 @@ void MachOReader::readExportInfo(Object &O) const { O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); } -void MachOReader::readDataInCodeData(Object &O) const { - if (!O.DataInCodeCommandIndex) +void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex, + LinkData &LD) const { + if (!LCIndex) return; - const MachO::linkedit_data_command &LDC = - O.LoadCommands[*O.DataInCodeCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; + const MachO::linkedit_data_command &LC = + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; + LD.Data = + arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); +} - O.DataInCode.Data = arrayRefFromStringRef( - MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +void MachOReader::readCodeSignature(Object &O) const { + return readLinkData(O, O.CodeSignatureCommandIndex, O.CodeSignature); } -void MachOReader::readFunctionStartsData(Object &O) const { - if (!O.FunctionStartsCommandIndex) - return; - const MachO::linkedit_data_command &LDC = - O.LoadCommands[*O.FunctionStartsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; +void MachOReader::readDataInCodeData(Object &O) const { + return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); +} - O.FunctionStarts.Data = arrayRefFromStringRef( - MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +void MachOReader::readFunctionStartsData(Object &O) const { + return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); } void MachOReader::readIndirectSymbolTable(Object &O) const { @@ -266,6 +286,28 @@ void MachOReader::readIndirectSymbolTable(Object &O) const { } } +void MachOReader::readSwiftVersion(Object &O) const { + struct ObjCImageInfo { + uint32_t Version; + uint32_t Flags; + } ImageInfo; + + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) + if (Sec->Sectname == "__objc_imageinfo" && + (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || + Sec->Segname == "__DATA_DIRTY") && + Sec->Content.size() >= sizeof(ObjCImageInfo)) { + memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(ImageInfo.Version); + sys::swapByteOrder(ImageInfo.Flags); + } + O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; + return; + } +} + std::unique_ptr<Object> MachOReader::create() const { auto Obj = std::make_unique<Object>(); readHeader(*Obj); @@ -277,9 +319,11 @@ std::unique_ptr<Object> MachOReader::create() const { readWeakBindInfo(*Obj); readLazyBindInfo(*Obj); readExportInfo(*Obj); + readCodeSignature(*Obj); readDataInCodeData(*Obj); readFunctionStartsData(*Obj); readIndirectSymbolTable(*Obj); + readSwiftVersion(*Obj); return Obj; } diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h index 00c8f0d55f61..65824b6eb389 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -36,9 +36,12 @@ class MachOReader : public Reader { void readWeakBindInfo(Object &O) const; void readLazyBindInfo(Object &O) const; void readExportInfo(Object &O) const; + void readLinkData(Object &O, Optional<size_t> LCIndex, LinkData &LD) const; + void readCodeSignature(Object &O) const; void readDataInCodeData(Object &O) const; void readFunctionStartsData(Object &O) const; void readIndirectSymbolTable(Object &O) const; + void readSwiftVersion(Object &O) const; public: explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp index 0d9590612eca..3c41e73b2b01 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -89,6 +89,15 @@ size_t MachOWriter::totalSize() const { sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); } + if (O.CodeSignatureCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.CodeSignatureCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + if (O.DataInCodeCommandIndex) { const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*O.DataInCodeCommandIndex] @@ -110,12 +119,12 @@ size_t MachOWriter::totalSize() const { } // Otherwise, use the last section / reloction. - for (const auto &LC : O.LoadCommands) - for (const auto &S : LC.Sections) { - Ends.push_back(S.Offset + S.Size); - if (S.RelOff) - Ends.push_back(S.RelOff + - S.NReloc * sizeof(MachO::any_relocation_info)); + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &S : LC.Sections) { + Ends.push_back(S->Offset + S->Size); + if (S->RelOff) + Ends.push_back(S->RelOff + + S->NReloc * sizeof(MachO::any_relocation_info)); } if (!Ends.empty()) @@ -147,7 +156,7 @@ void MachOWriter::writeHeader() { void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); - for (const auto &LC : O.LoadCommands) { + for (const LoadCommand &LC : O.LoadCommands) { // Construct a load command. MachO::macho_load_command MLC = LC.MachOLoadCommand; switch (MLC.load_command_data.cmd) { @@ -157,8 +166,8 @@ void MachOWriter::writeLoadCommands() { memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); Begin += sizeof(MachO::segment_command); - for (const auto &Sec : LC.Sections) - writeSectionInLoadCommand<MachO::section>(Sec, Begin); + for (const std::unique_ptr<Section> &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section>(*Sec, Begin); continue; case MachO::LC_SEGMENT_64: if (IsLittleEndian != sys::IsLittleEndianHost) @@ -167,8 +176,8 @@ void MachOWriter::writeLoadCommands() { sizeof(MachO::segment_command_64)); Begin += sizeof(MachO::segment_command_64); - for (const auto &Sec : LC.Sections) - writeSectionInLoadCommand<MachO::section_64>(Sec, Begin); + for (const std::unique_ptr<Section> &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section_64>(*Sec, Begin); continue; } @@ -229,27 +238,27 @@ void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { } void MachOWriter::writeSections() { - for (const auto &LC : O.LoadCommands) - for (const auto &Sec : LC.Sections) { - if (Sec.isVirtualSection()) + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) { + if (Sec->isVirtualSection()) continue; - assert(Sec.Offset && "Section offset can not be zero"); - assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); - memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), - Sec.Content.size()); - for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) { - auto RelocInfo = Sec.Relocations[Index]; + assert(Sec->Offset && "Section offset can not be zero"); + assert((Sec->Size == Sec->Content.size()) && "Incorrect section size"); + memcpy(B.getBufferStart() + Sec->Offset, Sec->Content.data(), + Sec->Content.size()); + for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { + RelocationInfo RelocInfo = Sec->Relocations[Index]; if (!RelocInfo.Scattered) { - auto *Info = - reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info); - Info->r_symbolnum = RelocInfo.Symbol->Index; + const uint32_t SymbolNum = RelocInfo.Extern + ? (*RelocInfo.Symbol)->Index + : (*RelocInfo.Sec)->Index; + RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); } - if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct( reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); - memcpy(B.getBufferStart() + Sec.RelOff + + memcpy(B.getBufferStart() + Sec->RelOff + Index * sizeof(MachO::any_relocation_info), &RelocInfo.Info, sizeof(RelocInfo.Info)); } @@ -381,28 +390,27 @@ void MachOWriter::writeIndirectSymbolTable() { } } -void MachOWriter::writeDataInCodeData() { - if (!O.DataInCodeCommandIndex) +void MachOWriter::writeLinkData(Optional<size_t> LCIndex, const LinkData &LD) { + if (!LCIndex) return; const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.DataInCodeCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; - assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && - "Incorrect data in code data size"); - memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); + assert((LinkEditDataCommand.datasize == LD.Data.size()) && + "Incorrect data size"); + memcpy(Out, LD.Data.data(), LD.Data.size()); +} + +void MachOWriter::writeCodeSignatureData() { + return writeLinkData(O.CodeSignatureCommandIndex, O.CodeSignature); +} + +void MachOWriter::writeDataInCodeData() { + return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode); } void MachOWriter::writeFunctionStartsData() { - if (!O.FunctionStartsCommandIndex) - return; - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.FunctionStartsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; - assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && - "Incorrect function starts data size"); - memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); + return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts); } void MachOWriter::writeTail() { @@ -450,6 +458,16 @@ void MachOWriter::writeTail() { &MachOWriter::writeIndirectSymbolTable); } + if (O.CodeSignatureCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.CodeSignatureCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeCodeSignatureData); + } + if (O.DataInCodeCommandIndex) { const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*O.DataInCodeCommandIndex] diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h index 22abbad56f41..c2c6f5a55e9a 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -45,6 +45,8 @@ class MachOWriter { void writeLazyBindInfo(); void writeExportInfo(); void writeIndirectSymbolTable(); + void writeLinkData(Optional<size_t> LCIndex, const LinkData &LD); + void writeCodeSignatureData(); void writeDataInCodeData(); void writeFunctionStartsData(); void writeTail(); diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp index d3b4fdc2f633..de8cb0af108d 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -1,5 +1,15 @@ +//===- Object.cpp - Mach-O object file model --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "Object.h" #include "../llvm-objcopy.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <unordered_set> namespace llvm { namespace objcopy { @@ -22,11 +32,83 @@ void SymbolTable::removeSymbols( std::end(Symbols)); } -void Object::removeSections(function_ref<bool(const Section &)> ToRemove) { - for (LoadCommand &LC : LoadCommands) - LC.Sections.erase(std::remove_if(std::begin(LC.Sections), - std::end(LC.Sections), ToRemove), - std::end(LC.Sections)); +void Object::updateLoadCommandIndexes() { + // Update indices of special load commands + for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { + LoadCommand &LC = LoadCommands[Index]; + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_SYMTAB: + SymTabCommandIndex = Index; + break; + case MachO::LC_DYSYMTAB: + DySymTabCommandIndex = Index; + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + DyLdInfoCommandIndex = Index; + break; + case MachO::LC_DATA_IN_CODE: + DataInCodeCommandIndex = Index; + break; + case MachO::LC_FUNCTION_STARTS: + FunctionStartsCommandIndex = Index; + break; + } + } +} + +Error Object::removeLoadCommands( + function_ref<bool(const LoadCommand &)> ToRemove) { + auto It = std::stable_partition( + LoadCommands.begin(), LoadCommands.end(), + [&](const LoadCommand &LC) { return !ToRemove(LC); }); + LoadCommands.erase(It, LoadCommands.end()); + + updateLoadCommandIndexes(); + return Error::success(); +} + +Error Object::removeSections( + function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { + DenseMap<uint32_t, const Section *> OldIndexToSection; + uint32_t NextSectionIndex = 1; + for (LoadCommand &LC : LoadCommands) { + auto It = std::stable_partition( + std::begin(LC.Sections), std::end(LC.Sections), + [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); + for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { + OldIndexToSection[(*I)->Index] = I->get(); + (*I)->Index = NextSectionIndex++; + } + LC.Sections.erase(It, LC.Sections.end()); + } + + auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { + Optional<uint32_t> Section = S->section(); + return (Section && !OldIndexToSection.count(*Section)); + }; + + SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; + for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) + if (IsDead(Sym)) + DeadSymbols.insert(Sym.get()); + + for (const LoadCommand &LC : LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) + for (const RelocationInfo &R : Sec->Relocations) + if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) + return createStringError(std::errc::invalid_argument, + "symbol '%s' defined in section with index " + "'%u' cannot be removed because it is " + "referenced by a relocation in section '%s'", + (*R.Symbol)->Name.c_str(), + *((*R.Symbol)->section()), + Sec->CanonicalName.c_str()); + SymTable.removeSymbols(IsDead); + for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) + if (S->section()) + S->n_sect = OldIndexToSection[S->n_sect]->Index; + return Error::success(); } void Object::addLoadCommand(LoadCommand LC) { @@ -52,7 +134,7 @@ LoadCommand &Object::addSegment(StringRef SegName) { constructSegment(LC.MachOLoadCommand.segment_command_data, MachO::LC_SEGMENT, SegName); - LoadCommands.push_back(LC); + LoadCommands.push_back(std::move(LC)); return LoadCommands.back(); } diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h index dc2606eefa4a..e825d1867b09 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -37,6 +37,7 @@ struct MachHeader { struct RelocationInfo; struct Section { + uint32_t Index; std::string Segname; std::string Sectname; // CanonicalName is a string formatted as “<Segname>,<Sectname>". @@ -55,11 +56,11 @@ struct Section { std::vector<RelocationInfo> Relocations; Section(StringRef SegName, StringRef SectName) - : Segname(SegName), Sectname(SectName), + : Segname(std::string(SegName)), Sectname(std::string(SectName)), CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} Section(StringRef SegName, StringRef SectName, StringRef Content) - : Segname(SegName), Sectname(SectName), + : Segname(std::string(SegName)), Sectname(std::string(SectName)), CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), Content(Content) {} @@ -83,13 +84,13 @@ struct LoadCommand { // The raw content of the payload of the load command (located right after the // corresponding struct). In some cases it is either empty or can be // copied-over without digging into its structure. - std::vector<uint8_t> Payload; + std::vector<uint8_t> Payload; // Some load commands can contain (inside the payload) an array of sections, // though the contents of the sections are stored separately. The struct // Section describes only sections' metadata and where to find the // corresponding content inside the binary. - std::vector<Section> Sections; + std::vector<std::unique_ptr<Section>> Sections; // Returns the segment name if the load command is a segment command. Optional<StringRef> getSegmentName() const; @@ -106,15 +107,22 @@ struct SymbolEntry { uint16_t n_desc; uint64_t n_value; - bool isExternalSymbol() const { - return n_type & ((MachO::N_EXT | MachO::N_PEXT)); - } + bool isExternalSymbol() const { return n_type & MachO::N_EXT; } bool isLocalSymbol() const { return !isExternalSymbol(); } bool isUndefinedSymbol() const { return (n_type & MachO::N_TYPE) == MachO::N_UNDF; } + + bool isSwiftSymbol() const { + return StringRef(Name).startswith("_$s") || + StringRef(Name).startswith("_$S"); + } + + Optional<uint32_t> section() const { + return n_sect == MachO::NO_SECT ? None : Optional<uint32_t>(n_sect); + } }; /// The location of the symbol table inside the binary is described by LC_SYMTAB @@ -157,10 +165,29 @@ struct StringTable { }; struct RelocationInfo { - const SymbolEntry *Symbol; + // The referenced symbol entry. Set if !Scattered && Extern. + Optional<const SymbolEntry *> Symbol; + // The referenced section. Set if !Scattered && !Extern. + Optional<const Section *> Sec; // True if Info is a scattered_relocation_info. bool Scattered; + // True if the r_symbolnum points to a section number (i.e. r_extern=0). + bool Extern; MachO::any_relocation_info Info; + + unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) { + if (IsLittleEndian) + return Info.r_word1 & 0xffffff; + return Info.r_word1 >> 8; + } + + void setPlainRelocationSymbolNum(unsigned SymbolNum, bool IsLittleEndian) { + assert(SymbolNum < (1 << 24) && "SymbolNum out of range"); + if (IsLittleEndian) + Info.r_word1 = (Info.r_word1 & ~0x00ffffff) | SymbolNum; + else + Info.r_word1 = (Info.r_word1 & ~0xffffff00) | (SymbolNum << 8); + } }; /// The location of the rebase info inside the binary is described by @@ -275,7 +302,12 @@ struct Object { IndirectSymbolTable IndirectSymTable; LinkData DataInCode; LinkData FunctionStarts; + LinkData CodeSignature; + + Optional<uint32_t> SwiftVersion; + /// The index of LC_CODE_SIGNATURE load command if present. + Optional<size_t> CodeSignatureCommandIndex; /// The index of LC_SYMTAB load command if present. Optional<size_t> SymTabCommandIndex; /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. @@ -292,7 +324,13 @@ struct Object { Object() : NewSectionsContents(Alloc) {} - void removeSections(function_ref<bool(const Section &)> ToRemove); + Error + removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove); + + Error removeLoadCommands(function_ref<bool(const LoadCommand &)> ToRemove); + + void updateLoadCommandIndexes(); + void addLoadCommand(LoadCommand LC); /// Creates a new segment load command in the object and returns a reference diff --git a/llvm/tools/llvm-objcopy/StripOpts.td b/llvm/tools/llvm-objcopy/StripOpts.td index cd02cffae673..001da23528d7 100644 --- a/llvm/tools/llvm-objcopy/StripOpts.td +++ b/llvm/tools/llvm-objcopy/StripOpts.td @@ -15,3 +15,6 @@ def d : Flag<["-"], "d">, def S : Flag<["-"], "S">, Alias<strip_debug>, HelpText<"Alias for --strip-debug">; + +def strip_swift_symbols : Flag<["-"], "T">, + HelpText<"Remove Swift symbols">; diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index e662f35f4b08..69b23b6cf975 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -8,10 +8,11 @@ #include "llvm-objcopy.h" #include "Buffer.h" +#include "COFF/COFFObjcopy.h" #include "CopyConfig.h" #include "ELF/ELFObjcopy.h" -#include "COFF/COFFObjcopy.h" #include "MachO/MachOObjcopy.h" +#include "wasm/WasmObjcopy.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -25,6 +26,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/Wasm.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" @@ -33,6 +35,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Path.h" @@ -172,6 +175,8 @@ static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); else if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In)) return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); + else if (auto *WasmBinary = dyn_cast<object::WasmObjectFile>(&In)) + return objcopy::wasm::executeObjcopyOnBinary(Config, *WasmBinary, Out); else return createStringError(object_error::invalid_file_type, "unsupported object file format"); @@ -322,11 +327,25 @@ enum class ToolType { Objcopy, Strip, InstallNameTool }; int main(int argc, char **argv) { InitLLVM X(argc, argv); ToolName = argv[0]; - ToolType Tool = StringSwitch<ToolType>(sys::path::stem(ToolName)) - .EndsWith("strip", ToolType::Strip) - .EndsWith("install-name-tool", ToolType::InstallNameTool) - .EndsWith("install_name_tool", ToolType::InstallNameTool) - .Default(ToolType::Objcopy); + + StringRef Stem = sys::path::stem(ToolName); + auto Is = [=](StringRef Tool) { + // We need to recognize the following filenames: + // + // llvm-objcopy -> objcopy + // strip-10.exe -> strip + // powerpc64-unknown-freebsd13-objcopy -> objcopy + // llvm-install-name-tool -> install-name-tool + auto I = Stem.rfind_lower(Tool); + return I != StringRef::npos && + (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()])); + }; + ToolType Tool = ToolType::Objcopy; + if (Is("strip")) + Tool = ToolType::Strip; + else if (Is("install-name-tool") || Is("install_name_tool")) + Tool = ToolType::InstallNameTool; + // Expand response files. // TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp, // into a separate function in the CommandLine library and call that function diff --git a/llvm/tools/llvm-objcopy/wasm/Object.cpp b/llvm/tools/llvm-objcopy/wasm/Object.cpp new file mode 100644 index 000000000000..0c416483663f --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/Object.cpp @@ -0,0 +1,36 @@ +//===- Object.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Object.h" + +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +void Object::addSectionWithOwnedContents( + Section NewSection, std::unique_ptr<MemoryBuffer> &&Content) { + Sections.push_back(NewSection); + OwnedContents.emplace_back(std::move(Content)); +} + +void Object::removeSections(function_ref<bool(const Section &)> ToRemove) { + // TODO: remove reloc sections for the removed section, handle symbols, etc. + Sections.erase( + std::remove_if(std::begin(Sections), std::end(Sections), ToRemove), + std::end(Sections)); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/Object.h b/llvm/tools/llvm-objcopy/wasm/Object.h new file mode 100644 index 000000000000..9db91c41e2e2 --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/Object.h @@ -0,0 +1,47 @@ +//===- Object.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H +#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/MemoryBuffer.h" +#include <vector> + +namespace llvm { +namespace objcopy { +namespace wasm { + +struct Section { + // For now, each section is only an opaque binary blob with no distinction + // between custom and known sections. + uint8_t SectionType; + StringRef Name; + ArrayRef<uint8_t> Contents; +}; + +struct Object { + llvm::wasm::WasmObjectHeader Header; + // For now don't discriminate between kinds of sections. + std::vector<Section> Sections; + + void addSectionWithOwnedContents(Section NewSection, + std::unique_ptr<MemoryBuffer> &&Content); + void removeSections(function_ref<bool(const Section &)> ToRemove); + +private: + std::vector<std::unique_ptr<MemoryBuffer>> OwnedContents; +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H diff --git a/llvm/tools/llvm-objcopy/wasm/Reader.cpp b/llvm/tools/llvm-objcopy/wasm/Reader.cpp new file mode 100644 index 000000000000..13fa84ad8020 --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/Reader.cpp @@ -0,0 +1,33 @@ +//===- Reader.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Reader.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +Expected<std::unique_ptr<Object>> Reader::create() const { + auto Obj = std::make_unique<Object>(); + Obj->Header = WasmObj.getHeader(); + std::vector<Section> Sections; + Obj->Sections.reserve(WasmObj.getNumSections()); + for (const SectionRef &Sec : WasmObj.sections()) { + const WasmSection &WS = WasmObj.getWasmSection(Sec); + Obj->Sections.push_back( + {static_cast<uint8_t>(WS.Type), WS.Name, WS.Content}); + } + return std::move(Obj); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/Reader.h b/llvm/tools/llvm-objcopy/wasm/Reader.h new file mode 100644 index 000000000000..2dcf7dde029a --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/Reader.h @@ -0,0 +1,31 @@ +//===- Reader.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H +#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H + +#include "Object.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +class Reader { +public: + explicit Reader(const object::WasmObjectFile &O) : WasmObj(O) {} + Expected<std::unique_ptr<Object>> create() const; + +private: + const object::WasmObjectFile &WasmObj; +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp new file mode 100644 index 000000000000..20781cef2d33 --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp @@ -0,0 +1,114 @@ +//===- WasmObjcopy.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmObjcopy.h" +#include "Buffer.h" +#include "CopyConfig.h" +#include "Object.h" +#include "Reader.h" +#include "Writer.h" +#include "llvm-objcopy.h" +#include "llvm/Support/Errc.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (const Section &Sec : Obj.Sections) { + if (Sec.Name == SecName) { + ArrayRef<uint8_t> Contents = Sec.Contents; + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Contents.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + std::copy(Contents.begin(), Contents.end(), Buf->getBufferStart()); + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + return createStringError(errc::invalid_argument, "section '%s' not found", + SecName.str().c_str()); +} +static Error handleArgs(const CopyConfig &Config, Object &Obj) { + // Only support AddSection, DumpSection, RemoveSection for now. + for (StringRef Flag : Config.DumpSection) { + StringRef SecName; + StringRef FileName; + std::tie(SecName, FileName) = Flag.split("="); + if (Error E = dumpSectionToFile(SecName, FileName, Obj)) + return createFileError(FileName, std::move(E)); + } + + Obj.removeSections([&Config](const Section &Sec) { + if (Config.ToRemove.matches(Sec.Name)) + return true; + return false; + }); + + for (StringRef Flag : Config.AddSection) { + StringRef SecName, FileName; + std::tie(SecName, FileName) = Flag.split("="); + ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = + MemoryBuffer::getFile(FileName); + if (!BufOrErr) + return createFileError(FileName, errorCodeToError(BufOrErr.getError())); + Section Sec; + Sec.SectionType = llvm::wasm::WASM_SEC_CUSTOM; + Sec.Name = SecName; + std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr); + Sec.Contents = makeArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(Buf->getBufferStart()), + Buf->getBufferSize()); + Obj.addSectionWithOwnedContents(Sec, std::move(Buf)); + } + + if (!Config.AddGnuDebugLink.empty() || !Config.BuildIdLinkDir.empty() || + Config.BuildIdLinkInput || Config.BuildIdLinkOutput || + Config.ExtractPartition || !Config.SplitDWO.empty() || + !Config.SymbolsPrefix.empty() || !Config.AllocSectionsPrefix.empty() || + Config.DiscardMode != DiscardType::None || Config.NewSymbolVisibility || + !Config.SymbolsToAdd.empty() || !Config.RPathToAdd.empty() || + !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() || + !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() || + !Config.SymbolsToRemove.empty() || + !Config.UnneededSymbolsToRemove.empty() || + !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() || + !Config.SectionsToRename.empty() || !Config.SetSectionAlignment.empty() || + !Config.SetSectionFlags.empty() || !Config.SymbolsToRename.empty()) { + return createStringError( + llvm::errc::invalid_argument, + "only add-section, dump-section, and remove-section are supported"); + } + return Error::success(); +} + +Error executeObjcopyOnBinary(const CopyConfig &Config, + object::WasmObjectFile &In, Buffer &Out) { + Reader TheReader(In); + Expected<std::unique_ptr<Object>> ObjOrErr = TheReader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize Wasm object"); + if (Error E = handleArgs(Config, *Obj)) + return E; + Writer TheWriter(*Obj, Out); + if (Error E = TheWriter.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h new file mode 100644 index 000000000000..3557d5c0a50d --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h @@ -0,0 +1,31 @@ +//===- WasmObjcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H +#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H + +namespace llvm { +class Error; + +namespace object { +class WasmObjectFile; +} // end namespace object + +namespace objcopy { +struct CopyConfig; +class Buffer; + +namespace wasm { +Error executeObjcopyOnBinary(const CopyConfig &Config, + object::WasmObjectFile &In, Buffer &Out); + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/wasm/Writer.cpp b/llvm/tools/llvm-objcopy/wasm/Writer.cpp new file mode 100644 index 000000000000..50d26507b498 --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/Writer.cpp @@ -0,0 +1,78 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +Writer::SectionHeader Writer::createSectionHeader(const Section &S, + size_t &SectionSize) { + SectionHeader Header; + raw_svector_ostream OS(Header); + OS << S.SectionType; + bool HasName = S.SectionType == WASM_SEC_CUSTOM; + SectionSize = S.Contents.size(); + if (HasName) + SectionSize += getULEB128Size(S.Name.size()) + S.Name.size(); + // Pad the LEB value out to 5 bytes to make it a predictable size, and + // match the behavior of clang. + encodeULEB128(SectionSize, OS, 5); + if (HasName) { + encodeULEB128(S.Name.size(), OS); + OS << S.Name; + } + // Total section size is the content size plus 1 for the section type and + // 5 for the LEB-encoded size. + SectionSize = SectionSize + 1 + 5; + return Header; +} + +size_t Writer::finalize() { + size_t ObjectSize = sizeof(WasmMagic) + sizeof(WasmVersion); + SectionHeaders.reserve(Obj.Sections.size()); + // Finalize the headers of each section so we know the total size. + for (const Section &S : Obj.Sections) { + size_t SectionSize; + SectionHeaders.push_back(createSectionHeader(S, SectionSize)); + ObjectSize += SectionSize; + } + return ObjectSize; +} + +Error Writer::write() { + size_t FileSize = finalize(); + if (Error E = Buf.allocate(FileSize)) + return E; + + // Write the header. + uint8_t *Ptr = Buf.getBufferStart(); + Ptr = std::copy(Obj.Header.Magic.begin(), Obj.Header.Magic.end(), Ptr); + support::endian::write32le(Ptr, Obj.Header.Version); + Ptr += sizeof(Obj.Header.Version); + + // Write each section. + for (size_t I = 0, S = SectionHeaders.size(); I < S; ++I) { + Ptr = std::copy(SectionHeaders[I].begin(), SectionHeaders[I].end(), Ptr); + ArrayRef<uint8_t> Contents = Obj.Sections[I].Contents; + Ptr = std::copy(Contents.begin(), Contents.end(), Ptr); + } + return Buf.commit(); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/wasm/Writer.h b/llvm/tools/llvm-objcopy/wasm/Writer.h new file mode 100644 index 000000000000..da48ee730c3b --- /dev/null +++ b/llvm/tools/llvm-objcopy/wasm/Writer.h @@ -0,0 +1,50 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H +#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H + +#include "Buffer.h" +#include "Object.h" +#include <cstdint> +#include <vector> + +namespace llvm { +namespace objcopy { +namespace wasm { + +class Writer { +public: + Writer(Object &Obj, Buffer &Buf) : Obj(Obj), Buf(Buf) {} + Error write(); + +private: + using SectionHeader = SmallVector<char, 8>; + Object &Obj; + Buffer &Buf; + std::vector<SectionHeader> SectionHeaders; + + /// Generate a wasm section section header for S. + /// The header consists of + /// * A one-byte section ID (aka the section type). + /// * The size of the section contents, encoded as ULEB128. + /// * If the section is a custom section (type 0) it also has a name, which is + /// encoded as a length-prefixed string. The encoded section size *includes* + /// this string. + /// See https://webassembly.github.io/spec/core/binary/modules.html#sections + /// Return the header and store the total size in SectionSize. + static SectionHeader createSectionHeader(const Section &S, + size_t &SectionSize); + size_t finalize(); +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H |