From 11ec7c544e60944146e7e51b82c60bbc9d169af2 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Tue, 18 Aug 2020 12:56:59 -0700
Subject: DWARF: Optimize abbreviation index/offset computation (#3033)

---
 third_party/llvm-project/DWARFVisitor.cpp | 40 ++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 8 deletions(-)

(limited to 'third_party/llvm-project/DWARFVisitor.cpp')
diff --git a/third_party/llvm-project/DWARFVisitor.cpp b/third_party/llvm-project/DWARFVisitor.cpp
index 1f4403563..4b6ed5ada 100644
--- a/third_party/llvm-project/DWARFVisitor.cpp
+++ b/third_party/llvm-project/DWARFVisitor.cpp
@@ -8,6 +8,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <unordered_map>
+
 #include "DWARFVisitor.h"
 #include "llvm/ObjectYAML/DWARFYAML.h"
 
@@ -49,19 +51,41 @@ template <typename T> void DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
   // TODO: This code appears to assume that abbreviation codes increment by 1
   // so that lookups are linear. In LLVM output that is true, but it might not
   // be in general.
+  // Create a map of [byte offset into the abbreviation section] => [index in
+  // DebugInfo.AbbrevDecls]. This avoids linear search for each CU.
+  std::unordered_map<size_t, size_t> abbrByteOffsetToDeclsIndex;
+  for (size_t i = 0; i < DebugInfo.AbbrevDecls.size(); i++) {
+    auto offset = DebugInfo.AbbrevDecls[i].ListOffset;
+    // The offset is the same for all entries for the same CU, so only note the
+    // first as that is where the list for the CU (that LLVM DeclSet) begins.
+    // That is, DebugInfo.AbbrevDecls looks like this:
+    //
+    //  i  CU   Abbrev  ListOffset
+    // ============================
+    //  0   X     X1      150
+    //  1   X     X2      150
+    //  2   X     X3      150
+    //           ..
+    //  6   Y     Y1      260
+    //  7   Y     Y2      260
+    //
+    // Note how multiple rows i have the same CU. All those abbrevs have the
+    // same ListOffset, which is the byte offset into the abbreviation section
+    // for that set of abbreviations.
+    if (abbrByteOffsetToDeclsIndex.count(offset)) {
+      continue;
+    }
+    abbrByteOffsetToDeclsIndex[offset] = i;
+  }
   for (auto &Unit : DebugInfo.CompileUnits) {
     // AbbrOffset is the byte offset into the abbreviation section, which we
     // need to find among the Abbrev's ListOffsets (which are the byte offsets
     // of where that abbreviation list begins).
     // TODO: Optimize this to not be O(#CUs * #abbrevs).
-    size_t AbbrevStart = -1;
-    for (size_t i = 0; i < DebugInfo.AbbrevDecls.size(); i++) {
-      if (DebugInfo.AbbrevDecls[i].ListOffset == Unit.AbbrOffset) {
-        AbbrevStart = i;
-        break;
-      }
-    }
-    assert(AbbrevStart != -1); // must find the list
+    auto offset = Unit.AbbrOffset;
+    assert(abbrByteOffsetToDeclsIndex.count(offset));
+    size_t AbbrevStart = abbrByteOffsetToDeclsIndex[offset];
+    assert(DebugInfo.AbbrevDecls[AbbrevStart].ListOffset == offset);
     // Find the last entry in this abbreviation list.
     size_t AbbrevEnd = AbbrevStart;
     while (AbbrevEnd < DebugInfo.AbbrevDecls.size() &&
-- 
cgit v1.2.3