1 files changed, 67 insertions, 29 deletions
diff --git a/contrib/llvm/tools/lld/COFF/ICF.cpp b/contrib/llvm/tools/lld/COFF/ICF.cpp
index 48895c34886c..7feb3c4e0b0c 100644
--- a/contrib/llvm/tools/lld/COFF/ICF.cpp
+++ b/contrib/llvm/tools/lld/COFF/ICF.cpp
@@ -18,13 +18,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ICF.h"
 #include "Chunks.h"
 #include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Timer.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
 #include <algorithm>
 #include <atomic>
 #include <vector>
@@ -34,6 +37,8 @@ using namespace llvm;
 namespace lld {
 namespace coff {
 
+static Timer ICFTimer("ICF", Timer::root());
+
 class ICF {
 public:
   void run(ArrayRef<Chunk *> V);
@@ -41,6 +46,8 @@ public:
 private:
   void segregate(size_t Begin, size_t End, bool Constant);
 
+  bool assocEquals(const SectionChunk *A, const SectionChunk *B);
+
   bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
   bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
 
@@ -59,13 +66,6 @@ private:
   std::atomic<bool> Repeat = {false};
 };
 
-// Returns a hash value for S.
-uint32_t ICF::getHash(SectionChunk *C) {
-  return hash_combine(C->getPermissions(), C->SectionName, C->NumRelocs,
-                      C->Alignment, uint32_t(C->Header->SizeOfRawData),
-                      C->Checksum, C->getContents());
-}
-
 // Returns true if section S is subject of ICF.
 //
 // Microsoft's documentation
@@ -73,21 +73,27 @@ uint32_t ICF::getHash(SectionChunk *C) {
 // 2017) says that /opt:icf folds both functions and read-only data.
 // Despite that, the MSVC linker folds only functions. We found
 // a few instances of programs that are not safe for data merging.
-// Therefore, we merge only functions just like the MSVC tool. However, we merge
-// identical .xdata sections, because the address of unwind information is
-// insignificant to the user program and the Visual C++ linker does this.
+// Therefore, we merge only functions just like the MSVC tool. However, we also
+// merge read-only sections in a couple of cases where the address of the
+// section is insignificant to the user program and the behaviour matches that
+// of the Visual C++ linker.
 bool ICF::isEligible(SectionChunk *C) {
   // Non-comdat chunks, dead chunks, and writable chunks are not elegible.
-  bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
+  bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
   if (!C->isCOMDAT() || !C->isLive() || Writable)
     return false;
 
   // Code sections are eligible.
-  if (C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
+  if (C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
     return true;
 
-  // .xdata unwind info sections are eligble.
-  return C->getSectionName().split('$').first == ".xdata";
+  // .pdata and .xdata unwind info sections are eligible.
+  StringRef OutSecName = C->getSectionName().split('$').first;
+  if (OutSecName == ".pdata" || OutSecName == ".xdata")
+    return true;
+
+  // So are vtables.
+  return C->Sym && C->Sym->getName().startswith("??_7");
 }
 
 // Split an equivalence class into smaller classes.
@@ -116,10 +122,23 @@ void ICF::segregate(size_t Begin, size_t End, bool Constant) {
   }
 }
 
+// Returns true if two sections' associative children are equal.
+bool ICF::assocEquals(const SectionChunk *A, const SectionChunk *B) {
+  auto ChildClasses = [&](const SectionChunk *SC) {
+    std::vector<uint32_t> Classes;
+    for (const SectionChunk *C : SC->children())
+      if (!C->SectionName.startswith(".debug") &&
+          C->SectionName != ".gfids$y" && C->SectionName != ".gljmp$y")
+        Classes.push_back(C->Class[Cnt % 2]);
+    return Classes;
+  };
+  return ChildClasses(A) == ChildClasses(B);
+}
+
 // Compare "non-moving" part of two sections, namely everything
 // except relocation targets.
 bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
-  if (A->NumRelocs != B->NumRelocs)
+  if (A->Relocs.size() != B->Relocs.size())
     return false;
 
   // Compare relocations.
@@ -142,10 +161,11 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
     return false;
 
   // Compare section attributes and contents.
-  return A->getPermissions() == B->getPermissions() &&
-         A->SectionName == B->SectionName && A->Alignment == B->Alignment &&
+  return A->getOutputCharacteristics() == B->getOutputCharacteristics() &&
+         A->SectionName == B->SectionName &&
          A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
-         A->Checksum == B->Checksum && A->getContents() == B->getContents();
+         A->Checksum == B->Checksum && A->getContents() == B->getContents() &&
+         assocEquals(A, B);
 }
 
 // Compare "moving" part of two sections, namely relocation targets.
@@ -161,9 +181,12 @@ bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
         return D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
     return false;
   };
-  return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
+  return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(),
+                    Eq) &&
+         assocEquals(A, B);
 }
 
+// Find the first Chunk after Begin that has a different class from Begin.
 size_t ICF::findBoundary(size_t Begin, size_t End) {
   for (size_t I = Begin + 1; I < End; ++I)
     if (Chunks[Begin]->Class[Cnt % 2] != Chunks[I]->Class[Cnt % 2])
@@ -173,11 +196,8 @@ size_t ICF::findBoundary(size_t Begin, size_t End) {
 
 void ICF::forEachClassRange(size_t Begin, size_t End,
                             std::function<void(size_t, size_t)> Fn) {
-  if (Begin > 0)
-    Begin = findBoundary(Begin - 1, End);
-
   while (Begin < End) {
-    size_t Mid = findBoundary(Begin, Chunks.size());
+    size_t Mid = findBoundary(Begin, End);
     Fn(Begin, Mid);
     Begin = Mid;
   }
@@ -193,12 +213,22 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
     return;
   }
 
-  // Split sections into 256 shards and call Fn in parallel.
-  size_t NumShards = 256;
+  // Shard into non-overlapping intervals, and call Fn in parallel.
+  // The sharding must be completed before any calls to Fn are made
+  // so that Fn can modify the Chunks in its shard without causing data
+  // races.
+  const size_t NumShards = 256;
   size_t Step = Chunks.size() / NumShards;
-  for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
-    size_t End = (I == NumShards - 1) ? Chunks.size() : (I + 1) * Step;
-    forEachClassRange(I * Step, End, Fn);
+  size_t Boundaries[NumShards + 1];
+  Boundaries[0] = 0;
+  Boundaries[NumShards] = Chunks.size();
+  for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) {
+    Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size());
+  });
+  for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) {
+    if (Boundaries[I - 1] < Boundaries[I]) {
+      forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
+    }
   });
   ++Cnt;
 }
@@ -207,6 +237,8 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
 // Two sections are considered the same if their section headers,
 // contents and relocations are all the same.
 void ICF::run(ArrayRef<Chunk *> Vec) {
+  ScopedTimer T(ICFTimer);
+
   // Collect only mergeable sections and group by hash value.
   uint32_t NextId = 1;
   for (Chunk *C : Vec) {
@@ -218,10 +250,16 @@ void ICF::run(ArrayRef<Chunk *> Vec) {
     }
   }
 
+  // Make sure that ICF doesn't merge sections that are being handled by string
+  // tail merging.
+  for (auto &P : MergeChunk::Instances)
+    for (SectionChunk *SC : P.second->Sections)
+      SC->Class[0] = NextId++;
+
   // Initially, we use hash values to partition sections.
   for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
     // Set MSB to 1 to avoid collisions with non-hash classs.
-    SC->Class[0] = getHash(SC) | (1 << 31);
+    SC->Class[0] = xxHash64(SC->getContents()) | (1 << 31);
   });
 
   // From now on, sections in Chunks are ordered so that sections in