DWARF: Fix handling of the end of control flow instructions (#3288)

Previously when we processed a block for example, we'd do this: ;; start is here (block (result type) ;; end is here .. contents .. ) ;; end delimiter is here Not how this represents the block's start and end as the "header", and uses an extra delimiter to mark the end. I think this is wrong, and was an attempt to handle some offsets from LLVM that otherwise made no sense, ones at the end of the "header". But it turns out that this makes us completely incorrect on some things where there is a low/high pc pair, and we need to understand that the end of a block is at the end opcode at the very end, and not the end of the header. This PR changes us to do that, i.e. ;; start is here (block (result type) .. contents .. ) ;; end is here This fixes a testcase already in the test suite, test/passes/fib_nonzero-low-pc_dwarf.bin.txt where you can see that lexical block now has a valid value for the end, and not a 0 (the proper scope extends all the way to the end of the big block in that function, and is now the same in the DWARF before and after we process it). test/passes/fannkuch3_dwarf.bin.txt is also improved by this. To implement this, this removes the BinaryLocations::End delimeter. After this we just need one type of delimiter actually, but I didn't refactor that any more to keep this PR small (see TODO). This removes an assertion in writeDebugLocationEnd() that is no longer valid: the assert ensures that we wrote an end only if there was a 0 for the end, but for a control flow structure, we write the end of the "header" automatically like for any expression, and then overwrite it later when we finish writing the children and the end marker. We could in theory special-case control flow structures to avoid the first write, but it would add more complexity. This uncovered what appears to be a possible bug in our debug_line handling, see test/passes/fannkuch3_manyopts_dwarf.bin.txt. That needs to be looked into more, but I suspect that was invalid info from when we looked at the end of the "header" of control flow structures. Note that there was one definite bug uncovered here, fixed by the extra } else if (locationUpdater.hasOldExprEnd(oldAddr)) { that is added here, which was definitely a bug.
author: Alon Zakai <azakai@google.com> 2020-10-27 10:42:07 -0700
committer: GitHub <noreply@github.com> 2020-10-27 10:42:07 -0700
commit: 6c28516678b12fb04715c046b39f256cd26d0726 (patch)
tree: 2ea7a194d362f94e3cc4949966ab89360dad57df /src
parent: 9a174f0ce9dc44b74db0e04728de62e6556176f9 (diff)
download: binaryen-6c28516678b12fb04715c046b39f256cd26d0726.tar.gz
binaryen-6c28516678b12fb04715c046b39f256cd26d0726.tar.bz2
binaryen-6c28516678b12fb04715c046b39f256cd26d0726.zip
4 files changed, 15 insertions, 26 deletions
diff --git a/src/wasm.h b/src/wasm.h
index 04e2faeb3..295088780 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -1413,10 +1413,10 @@ struct BinaryLocations {
   // control flow, have, like 'end' for loop and block. We keep these in a
   // separate map because they are rare and we optimize for the storage space
   // for the common type of instruction which just needs a Span. We implement
-  // this as a simple struct with two elements (as two extra elements is the
-  // maximum currently needed; due to 'catch' and 'end' for try-catch). The
-  // second value may be 0, indicating it is not used.
-  struct DelimiterLocations : public std::array<BinaryLocation, 2> {
+  // this as a simple array with one element at the moment (more elements may
+  // be necessary in the future).
+  // TODO: If we are sure we won't need more, make this a single value?
+  struct DelimiterLocations : public std::array<BinaryLocation, 1> {
     DelimiterLocations() {
       // Ensure zero-initialization.
       for (auto& item : *this) {
@@ -1425,14 +1425,7 @@ struct BinaryLocations {
     }
   };
 
-  enum DelimiterId {
-    // All control flow structures have an end, so use index 0 for that.
-    End = 0,
-    // Use index 1 for all other current things.
-    Else = 1,
-    Catch = 1,
-    Invalid = -1
-  };
+  enum DelimiterId { Else = 0, Catch = 0, Invalid = -1 };
   std::unordered_map<Expression*, DelimiterLocations> delimiters;
 
   // DWARF debug info can refer to multiple interesting positions in a function.
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 5c25fd0d2..0c56af8e0 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -865,7 +865,6 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) {
 void WasmBinaryWriter::writeDebugLocationEnd(Expression* curr, Function* func) {
   if (func && !func->expressionLocations.empty()) {
     auto& span = binaryLocations.expressions.at(curr);
-    assert(span.end == 0);
     span.end = o.size();
   }
 }
@@ -2580,7 +2579,12 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) {
       break;
     case BinaryConsts::End:
       curr = nullptr;
-      continueControlFlow(BinaryLocations::End, startPos);
+      // Pop the current control flow structure off the stack. If there is none
+      // then this is the "end" of the function itself, which also emits an
+      // "end" byte.
+      if (!controlFlowStack.empty()) {
+        controlFlowStack.pop_back();
+      }
       break;
     case BinaryConsts::Else:
       curr = nullptr;
@@ -2808,22 +2812,12 @@ void WasmBinaryBuilder::startControlFlow(Expression* curr) {
 void WasmBinaryBuilder::continueControlFlow(BinaryLocations::DelimiterId id,
                                             BinaryLocation pos) {
   if (DWARF && currFunction) {
-    if (controlFlowStack.empty()) {
-      // We reached the end of the function, which is also marked with an
-      // "end", like a control flow structure.
-      assert(id == BinaryLocations::End);
-      assert(pos + 1 == endOfFunction);
-      return;
-    }
     assert(!controlFlowStack.empty());
     auto currControlFlow = controlFlowStack.back();
     // We are called after parsing the byte, so we need to subtract one to
     // get its position.
     currFunction->delimiterLocations[currControlFlow][id] =
       pos - codeSectionLocation;
-    if (id == BinaryLocations::End) {
-      controlFlowStack.pop_back();
-    }
   }
 }
 
diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp
index 0f37374b5..448bcaaa9 100644
--- a/src/wasm/wasm-debug.cpp
+++ b/src/wasm/wasm-debug.cpp
@@ -680,6 +680,8 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
           newAddr = locationUpdater.getNewFuncStart(oldAddr);
         } else if (locationUpdater.hasOldDelimiter(oldAddr)) {
           newAddr = locationUpdater.getNewDelimiter(oldAddr);
+        } else if (locationUpdater.hasOldExprEnd(oldAddr)) {
+          newAddr = locationUpdater.getNewExprEnd(oldAddr);
         }
         if (newAddr && state.needToEmit()) {
           // LLVM sometimes emits the same address more than once. We should
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index 2ec156eea..1d290f53c 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -1880,10 +1880,10 @@ void BinaryInstWriter::visitArrayLen(ArrayLen* curr) {
 void BinaryInstWriter::emitScopeEnd(Expression* curr) {
   assert(!breakStack.empty());
   breakStack.pop_back();
+  o << int8_t(BinaryConsts::End);
   if (func && !sourceMap) {
-    parent.writeExtraDebugLocation(curr, func, BinaryLocations::End);
+    parent.writeDebugLocationEnd(curr, func);
   }
-  o << int8_t(BinaryConsts::End);
 }
 
 void BinaryInstWriter::emitFunctionEnd() { o << int8_t(BinaryConsts::End); }
author	Alon Zakai <azakai@google.com>	2020-10-27 10:42:07 -0700
committer	GitHub <noreply@github.com>	2020-10-27 10:42:07 -0700
commit	6c28516678b12fb04715c046b39f256cd26d0726 (patch)
tree	2ea7a194d362f94e3cc4949966ab89360dad57df /src
parent	9a174f0ce9dc44b74db0e04728de62e6556176f9 (diff)
download	binaryen-6c28516678b12fb04715c046b39f256cd26d0726.tar.gz binaryen-6c28516678b12fb04715c046b39f256cd26d0726.tar.bz2 binaryen-6c28516678b12fb04715c046b39f256cd26d0726.zip