DWARF: Track sequences so that we can handle reordering within one (#2932)

Previously we tracked sequence ends, so if an instruction was marked as the end, we'd keep marking it that way in the output. However, if X, Y, Z form a sequence that is then reordered into Z, Y, X then we need to emit the end on X now. To do that, give a "sequence number" to each debug line. Then when emitting, we can tell if two adjacent lines are in a sequence or not, and emit the end properly. This fixes a large partner testcase, allowing llvm-dwarfdump --verify --debug-line to pass on it. With this change it is easier to remove the hackish handling of prologueEnd that we had before, where we reset it. Instead, just emit it when it is set, and that's all. In particular we can get rid of the // Reset the state and resetAfterLine() calls in emitDiff. That function now just emits a diff, with no side effects, and is marked const. This refactoring moves the needToEmit() check to an earlier place. Instead of noting lines we'll never emit, don't even note them at all. The test diff seems large, but it is all due to one small change that then changes all the later offsets: - 0x00000831: 01 DW_LNS_copy - 0x000000000000086e 43 4 1 0 0 is_stmt + 0x00000831: 00 DW_LNE_end_sequence + 0x000000000000086e 43 4 1 0 0 is_stmt end_sequence Note how we add end_sequence there. We used to have an entry right after it with line 0 that was marked as the end of the sequence. In the new code, we don't emit that unnecessary line (which was previously only emitted for the end sequence!) and instead emit the end sequence on the last valid line.
author: Alon Zakai <azakai@google.com> 2020-06-25 16:58:04 -0700
committer: GitHub <noreply@github.com> 2020-06-25 16:58:04 -0700
commit: 0d5eca6cf4f61798d0854226d2c78d2655e07c30 (patch)
tree: e25cad47d636a4b301983a5edb34addda582f719 /src
parent: eae2ee3dff0c68cfa783f45ee3ced06f4b5aa47c (diff)
download: binaryen-0d5eca6cf4f61798d0854226d2c78d2655e07c30.tar.gz
binaryen-0d5eca6cf4f61798d0854226d2c78d2655e07c30.tar.bz2
binaryen-0d5eca6cf4f61798d0854226d2c78d2655e07c30.zip
1 files changed, 43 insertions, 27 deletions
diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp
index 20c5dfb62..4181b06f2 100644
--- a/src/wasm/wasm-debug.cpp
+++ b/src/wasm/wasm-debug.cpp
@@ -123,11 +123,17 @@ struct LineState {
   bool basicBlock = false;
   bool prologueEnd = false;
   bool epilogueBegin = false;
-  bool endSequence = false;
+  // Each instruction is part of a sequence, all of which get the same ID. The
+  // order within a sequence may change if binaryen reorders things, which means
+  // that we can't track the end_sequence location and assume it is at the end -
+  // we must track sequences and then emit an end for each one.
+  // -1 is an invalid marker value (note that this assumes we can fit all ids
+  // into just under 32 bits).
+  uint32_t sequenceId = -1;
 
   LineState(const LineState& other) = default;
-  LineState(const llvm::DWARFYAML::LineTable& table)
-    : isStmt(table.DefaultIsStmt) {}
+  LineState(const llvm::DWARFYAML::LineTable& table, uint32_t sequenceId)
+    : isStmt(table.DefaultIsStmt), sequenceId(sequenceId) {}
 
   LineState& operator=(const LineState& other) = default;
 
@@ -143,7 +149,6 @@ struct LineState {
             break;
           }
           case llvm::dwarf::DW_LNE_end_sequence: {
-            endSequence = true;
             return true;
           }
           case llvm::dwarf::DW_LNE_set_discriminator: {
@@ -241,17 +246,17 @@ struct LineState {
   }
 
   bool needToEmit() {
-    // Zero values imply we can ignore this line, unless it has something
-    // special we must emit.
+    // Zero values imply we can ignore this line.
     // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872
-    return (line != 0 && addr != 0) || endSequence;
+    return line != 0 && addr != 0;
   }
 
   // Given an old state, emit the diff from it to this state into a new line
   // table entry (that will be emitted in the updated DWARF debug line section).
   void emitDiff(const LineState& old,
                 std::vector<llvm::DWARFYAML::LineTableOpcode>& newOpcodes,
-                const llvm::DWARFYAML::LineTable& table) {
+                const llvm::DWARFYAML::LineTable& table,
+                bool endSequence) const {
     bool useSpecial = false;
     if (addr != old.addr || line != old.line) {
       // Try to use a special opcode TODO
@@ -299,8 +304,7 @@ struct LineState {
       assert(basicBlock);
       newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_basic_block));
     }
-    if (prologueEnd != old.prologueEnd) {
-      assert(prologueEnd);
+    if (prologueEnd) {
       newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_prologue_end));
     }
     if (epilogueBegin != old.epilogueBegin) {
@@ -314,27 +318,25 @@ struct LineState {
       if (endSequence) {
         // len = 1 (subopcode)
         newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNE_end_sequence, 1));
-        // Reset the state.
-        *this = LineState(table);
       } else {
         newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_copy));
       }
     }
-    resetAfterLine();
   }
 
   // Some flags are automatically reset after each debug line.
   void resetAfterLine() { prologueEnd = false; }
 
 private:
-  llvm::DWARFYAML::LineTableOpcode makeItem(llvm::dwarf::LineNumberOps opcode) {
+  llvm::DWARFYAML::LineTableOpcode
+  makeItem(llvm::dwarf::LineNumberOps opcode) const {
     llvm::DWARFYAML::LineTableOpcode item = {};
     item.Opcode = opcode;
     return item;
   }
 
   llvm::DWARFYAML::LineTableOpcode
-  makeItem(llvm::dwarf::LineNumberExtendedOps opcode, uint64_t len) {
+  makeItem(llvm::dwarf::LineNumberExtendedOps opcode, uint64_t len) const {
     auto item = makeItem(llvm::dwarf::LineNumberOps(0));
     // All the length after the len field itself, including the subopcode
     // (1 byte).
@@ -626,8 +628,9 @@ struct LocationUpdater {
 static void updateDebugLines(llvm::DWARFYAML::Data& data,
                              LocationUpdater& locationUpdater) {
   for (auto& table : data.DebugLines) {
+    uint32_t sequenceId = 0;
     // Parse the original opcodes and emit new ones.
-    LineState state(table);
+    LineState state(table, sequenceId);
     // All the addresses we need to write out.
     std::vector<BinaryLocation> newAddrs;
     std::unordered_map<BinaryLocation, LineState> newAddrInfo;
@@ -645,7 +648,7 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
           omittingRange = true;
         }
         if (omittingRange) {
-          state = LineState(table);
+          state = LineState(table, sequenceId);
           continue;
         }
         // An expression may not exist for this line table item, if we optimized
@@ -666,7 +669,7 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
         } else if (locationUpdater.hasOldDelimiter(oldAddr)) {
           newAddr = locationUpdater.getNewDelimiter(oldAddr);
         }
-        if (newAddr) {
+        if (newAddr && state.needToEmit()) {
           // LLVM sometimes emits the same address more than once. We should
           // probably investigate that.
           if (newAddrInfo.count(newAddr)) {
@@ -682,7 +685,11 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
         }
         if (opcode.Opcode == 0 &&
             opcode.SubOpcode == llvm::dwarf::DW_LNE_end_sequence) {
-          state = LineState(table);
+          sequenceId++;
+          // We assume the number of sequences can fit in 32 bits, and -1 is
+          // an invalid value.
+          assert(sequenceId != uint32_t(-1));
+          state = LineState(table, sequenceId);
         }
       }
     }
@@ -692,15 +699,24 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
     // Emit a new line table.
     {
       std::vector<llvm::DWARFYAML::LineTableOpcode> newOpcodes;
-      LineState state(table);
-      for (BinaryLocation addr : newAddrs) {
-        LineState oldState(state);
-        state = newAddrInfo.at(addr);
-        if (state.needToEmit()) {
-          state.emitDiff(oldState, newOpcodes, table);
-        } else {
-          state = oldState;
+      for (size_t i = 0; i < newAddrs.size(); i++) {
+        LineState state = newAddrInfo.at(newAddrs[i]);
+        assert(state.needToEmit());
+        LineState lastState(table, -1);
+        if (i != 0) {
+          lastState = newAddrInfo.at(newAddrs[i - 1]);
+          // If the last line is in another sequence, clear the old state, as
+          // there is nothing to diff to.
+          if (lastState.sequenceId != state.sequenceId) {
+            lastState = LineState(table, -1);
+          }
         }
+        // This line ends a sequence if there is no next line after it, or if
+        // the next line is in a different sequence.
+        bool endSequence =
+          i + 1 == newAddrs.size() ||
+          newAddrInfo.at(newAddrs[i + 1]).sequenceId != state.sequenceId;
+        state.emitDiff(lastState, newOpcodes, table, endSequence);
       }
       table.Opcodes.swap(newOpcodes);
     }
author	Alon Zakai <azakai@google.com>	2020-06-25 16:58:04 -0700
committer	GitHub <noreply@github.com>	2020-06-25 16:58:04 -0700
commit	0d5eca6cf4f61798d0854226d2c78d2655e07c30 (patch)
tree	e25cad47d636a4b301983a5edb34addda582f719 /src
parent	eae2ee3dff0c68cfa783f45ee3ced06f4b5aa47c (diff)
download	binaryen-0d5eca6cf4f61798d0854226d2c78d2655e07c30.tar.gz binaryen-0d5eca6cf4f61798d0854226d2c78d2655e07c30.tar.bz2 binaryen-0d5eca6cf4f61798d0854226d2c78d2655e07c30.zip