summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2019-12-20 19:13:19 -0800
committerGitHub <noreply@github.com>2019-12-20 19:13:19 -0800
commit3f6fd583ca283c12f05c1258962cebdd2202e949 (patch)
tree72b7c4f75c0763cf43379ff571b0710cdda1b543 /src
parentc97d6e4f529f181d81f1fc8c76f0ce28c16782b8 (diff)
downloadbinaryen-3f6fd583ca283c12f05c1258962cebdd2202e949.tar.gz
binaryen-3f6fd583ca283c12f05c1258962cebdd2202e949.tar.bz2
binaryen-3f6fd583ca283c12f05c1258962cebdd2202e949.zip
DWARF debug line updating (#2545)
With this, we can update DWARF debug line info properly as we write a new binary. To do that we track binary locations as we write. Each instruction is mapped to the location it is written to. We must also adjust them as we move code around because of LEB optimization (we emit a function or a section with a 5-byte LEB placeholder, the maximal size; later we shrink it which is almost always possible). writeDWARFSections() now takes a second param, the new locations of instructions. It then maps debug line info from the original offsets in the binary to the new offsets in the binary being written. The core logic for updating the debug line section is in wasm-debug.cpp. It basically tracks state machine logic both to read the existing debug lines and to emit the new ones. I couldn't find a way to reuse LLVM code for this, but reading LLVM's code was very useful here. A final tricky thing we need to do is to update the DWARF section's internal size annotation. The LLVM YAML writing code doesn't do that for us. Luckily it's pretty easy, in fixEmittedSection we just update the first 4 bytes in place to have the section size, after we've emitted it and know the size. This ignores debug lines with a 0 in the line, col, or addr, see WebAssembly/debugging#9 (comment) This ignores debug line offsets into the middle of instructions, which LLVM sometimes emits for some reason, see WebAssembly/debugging#9 (comment) Handling that would likely at least double our memory usage, which is unfortunate - we are run in an LTO manner, where the entire app's DWARF is present, and it may be massive. I think we should see if such odd offsets are a bug in LLVM, and if we can fix or prevent that. This does not emit "special" opcodes for debug lines. Those are purely an optimization, which I wanted to leave for later. (Even without them we decrease the size quite a lot, btw, as many lines have 0s in them...) This adds some testing that shows we can load and save fib2.c and fannkuch.cpp properly. The latter includes more than one function and has nontrivial code. To actually emit correct offsets a few minor fixes are done here: * Fix the code section location tracking during reading - the correct offset we care about is the body of the code section, not including the section declaration and size. * Fix wasm-stack debug line emitting. We need to update in BinaryInstWriter::visit(), that is, right before writing bytes for the instruction. That differs from * BinaryenIRWriter::visit which is a recursive function that also calls the children - so the offset there would be of the first child. For some reason that is correct with source maps, I don't understand why, but it's wrong for DWARF... * Print code section offsets in hex, to match other tools. Remove DWARFUpdate pass, which was useful for testing temporarily, but doesn't make sense now (it just updates without writing a binary). cc @yurydelendik
Diffstat (limited to 'src')
-rw-r--r--src/passes/DWARF.cpp8
-rw-r--r--src/passes/Print.cpp3
-rw-r--r--src/passes/RoundTrip.cpp2
-rw-r--r--src/passes/pass.cpp2
-rw-r--r--src/passes/passes.h1
-rw-r--r--src/wasm-binary.h13
-rw-r--r--src/wasm-debug.h2
-rw-r--r--src/wasm-stack.h18
-rw-r--r--src/wasm.h7
-rw-r--r--src/wasm/wasm-binary.cpp80
-rw-r--r--src/wasm/wasm-debug.cpp316
11 files changed, 406 insertions, 46 deletions
diff --git a/src/passes/DWARF.cpp b/src/passes/DWARF.cpp
index fe595adae..bc2af3292 100644
--- a/src/passes/DWARF.cpp
+++ b/src/passes/DWARF.cpp
@@ -35,14 +35,6 @@ struct DWARFDump : public Pass {
}
};
-struct DWARFUpdate : public Pass {
- void run(PassRunner* runner, Module* module) override {
- Debug::writeDWARFSections(*module);
- }
-};
-
Pass* createDWARFDumpPass() { return new DWARFDump(); }
-Pass* createDWARFUpdatePass() { return new DWARFUpdate(); }
-
} // namespace wasm
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index cbf3c5a24..3bad8f960 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -1427,7 +1427,8 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> {
auto iter = currFunction->binaryLocations.find(curr);
if (iter != currFunction->binaryLocations.end()) {
Colors::grey(o);
- o << ";; code offset: 0x" << iter->second << '\n';
+ o << ";; code offset: 0x" << std::hex << iter->second << std::dec
+ << '\n';
restoreNormalColor(o);
doIndent(o, indent);
}
diff --git a/src/passes/RoundTrip.cpp b/src/passes/RoundTrip.cpp
index 0afabed9e..5410ad87b 100644
--- a/src/passes/RoundTrip.cpp
+++ b/src/passes/RoundTrip.cpp
@@ -57,7 +57,7 @@ struct RoundTrip : public Pass {
// Read
ModuleUtils::clearModule(*module);
ModuleReader reader;
- // TODO: enable debug info when relevant
+ reader.setDWARF(runner->options.debugInfo);
reader.read(tempName, *module);
// Clean up
std::remove(tempName.c_str());
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index cbc8e3be9..ac4f6b661 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -110,8 +110,6 @@ void PassRegistry::registerPasses() {
registerPass("dwarfdump",
"dump DWARF debug info sections from the read binary",
createDWARFDumpPass);
- registerPass(
- "dwarfupdate", "update DWARF debug info sections", createDWARFUpdatePass);
registerPass("duplicate-import-elimination",
"removes duplicate imports",
createDuplicateImportEliminationPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 2f30441db..df5601d2f 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -36,7 +36,6 @@ Pass* createDataFlowOptsPass();
Pass* createDeadCodeEliminationPass();
Pass* createDirectizePass();
Pass* createDWARFDumpPass();
-Pass* createDWARFUpdatePass();
Pass* createDuplicateImportEliminationPass();
Pass* createDuplicateFunctionEliminationPass();
Pass* createEmitTargetFeaturesPass();
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 1c3a430fc..4206defdf 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1044,6 +1044,19 @@ private:
std::unique_ptr<ImportInfo> importInfo;
+ // General debugging info: map every instruction to its original position in
+ // the binary, relative to the beginning of the code section. This is similar
+ // to binaryLocations on Function objects, which are filled as we load the
+ // functions from the binary. Here we track them as we write, and then
+ // the combination of the two can be used to update DWARF info for the new
+ // locations of things.
+ BinaryLocationsMap binaryLocations;
+ size_t binaryLocationsSizeAtSectionStart;
+ // Track the expressions that we added for the current function being
+ // written, so that we can update those specific binary locations when
+ // the function is written out.
+ std::vector<Expression*> binaryLocationTrackedExpressionsForFunc;
+
void prepare();
};
diff --git a/src/wasm-debug.h b/src/wasm-debug.h
index d2e4840f1..1020eee85 100644
--- a/src/wasm-debug.h
+++ b/src/wasm-debug.h
@@ -37,7 +37,7 @@ bool hasDWARFSections(const Module& wasm);
void dumpDWARF(const Module& wasm);
// Update the DWARF sections.
-void writeDWARFSections(Module& wasm);
+void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations);
} // namespace Debug
diff --git a/src/wasm-stack.h b/src/wasm-stack.h
index 7564fbcdb..fbd28b0d5 100644
--- a/src/wasm-stack.h
+++ b/src/wasm-stack.h
@@ -83,8 +83,16 @@ class BinaryInstWriter : public OverriddenVisitor<BinaryInstWriter> {
public:
BinaryInstWriter(WasmBinaryWriter& parent,
BufferWithRandomAccess& o,
- Function* func)
- : parent(parent), o(o), func(func) {}
+ Function* func,
+ bool sourceMap)
+ : parent(parent), o(o), func(func), sourceMap(sourceMap) {}
+
+ void visit(Expression* curr) {
+ if (func && !sourceMap) {
+ parent.writeDebugLocation(curr, func);
+ }
+ OverriddenVisitor<BinaryInstWriter>::visit(curr);
+ }
void visitBlock(Block* curr);
void visitIf(If* curr);
@@ -144,6 +152,8 @@ private:
WasmBinaryWriter& parent;
BufferWithRandomAccess& o;
Function* func = nullptr;
+ bool sourceMap;
+
std::vector<Name> breakStack;
// type => number of locals of that type in the compact form
@@ -758,7 +768,7 @@ public:
Function* func = nullptr,
bool sourceMap = false)
: BinaryenIRWriter<BinaryenIRToBinaryWriter>(func), parent(parent),
- writer(parent, o, func), sourceMap(sourceMap) {}
+ writer(parent, o, func, sourceMap), sourceMap(sourceMap) {}
void visit(Expression* curr) {
BinaryenIRWriter<BinaryenIRToBinaryWriter>::visit(curr);
@@ -833,7 +843,7 @@ public:
StackIRToBinaryWriter(WasmBinaryWriter& parent,
BufferWithRandomAccess& o,
Function* func)
- : writer(parent, o, func), func(func) {}
+ : writer(parent, o, func, false /* sourceMap */), func(func) {}
void write();
diff --git a/src/wasm.h b/src/wasm.h
index bfa0c8947..4c8c4d444 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -1129,7 +1129,10 @@ struct Importable {
// Stack IR is a secondary IR to the main IR defined in this file (Binaryen
// IR). See wasm-stack.h.
class StackInst;
-typedef std::vector<StackInst*> StackIR;
+
+using StackIR = std::vector<StackInst*>;
+
+using BinaryLocationsMap = std::unordered_map<Expression*, uint32_t>;
class Function : public Importable {
public:
@@ -1178,7 +1181,7 @@ public:
// General debugging info: map every instruction to its original position in
// the binary, relative to the beginning of the code section.
- std::unordered_map<Expression*, uint32_t> binaryLocations;
+ BinaryLocationsMap binaryLocations;
size_t getNumParams();
size_t getNumVars();
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 71dd0fc21..fa90532d9 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -72,6 +72,14 @@ void WasmBinaryWriter::write() {
writeSourceMapEpilog();
}
+#ifdef BUILD_LLVM_DWARF
+ // Update DWARF user sections after writing the data referred to by them
+ // (function bodies), and before writing the user sections themselves.
+ if (Debug::hasDWARFSections(*wasm)) {
+ Debug::writeDWARFSections(*wasm, binaryLocations);
+ }
+#endif
+
writeLateUserSections();
writeFeaturesSection();
@@ -109,6 +117,7 @@ template<typename T> int32_t WasmBinaryWriter::startSection(T code) {
if (sourceMap) {
sourceMapLocationsSizeAtSectionStart = sourceMapLocations.size();
}
+ binaryLocationsSizeAtSectionStart = binaryLocations.size();
return writeU32LEBPlaceholder(); // section size to be filled in later
}
@@ -116,22 +125,44 @@ void WasmBinaryWriter::finishSection(int32_t start) {
// section size does not include the reserved bytes of the size field itself
int32_t size = o.size() - start - MaxLEB32Bytes;
auto sizeFieldSize = o.writeAt(start, U32LEB(size));
- if (sizeFieldSize != MaxLEB32Bytes) {
+ // We can move things back if the actual LEB for the size doesn't use the
+ // maximum 5 bytes. In that case we need to adjust offsets after we move
+ // things backwards.
+ auto adjustmentForLEBShrinking = MaxLEB32Bytes - sizeFieldSize;
+ if (adjustmentForLEBShrinking) {
// we can save some room, nice
assert(sizeFieldSize < MaxLEB32Bytes);
std::move(&o[start] + MaxLEB32Bytes,
&o[start] + MaxLEB32Bytes + size,
&o[start] + sizeFieldSize);
- auto adjustment = MaxLEB32Bytes - sizeFieldSize;
- o.resize(o.size() - adjustment);
+ o.resize(o.size() - adjustmentForLEBShrinking);
if (sourceMap) {
for (auto i = sourceMapLocationsSizeAtSectionStart;
i < sourceMapLocations.size();
++i) {
- sourceMapLocations[i].first -= adjustment;
+ sourceMapLocations[i].first -= adjustmentForLEBShrinking;
}
}
}
+
+ if (binaryLocationsSizeAtSectionStart != binaryLocations.size()) {
+ // We added the binary locations, adjust them: they must be relative
+ // to the code section.
+ assert(binaryLocationsSizeAtSectionStart == 0);
+ // The section type byte is right before the LEB for the size; we want
+ // offsets that are relative to the body, which is after that section type
+ // byte and the the size LEB.
+ auto body = start + sizeFieldSize;
+ for (auto& pair : binaryLocations) {
+ // Offsets are relative to the body of the code section: after the
+ // section type byte and the size.
+ // Everything was moved by the adjustment, track that. After this,
+ // we are at the right absolute address.
+ pair.second -= adjustmentForLEBShrinking;
+ // We are relative to the section start.
+ pair.second -= body;
+ }
+ }
}
int32_t
@@ -266,6 +297,7 @@ void WasmBinaryWriter::writeFunctions() {
auto start = startSection(BinaryConsts::Section::Code);
o << U32LEB(importInfo->getNumDefinedFunctions());
ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) {
+ assert(binaryLocationTrackedExpressionsForFunc.empty());
size_t sourceMapLocationsSizeAtFunctionStart = sourceMapLocations.size();
BYN_TRACE("write one at" << o.size() << std::endl);
size_t sizePos = writeU32LEBPlaceholder();
@@ -284,22 +316,31 @@ void WasmBinaryWriter::writeFunctions() {
BYN_TRACE("body size: " << size << ", writing at " << sizePos
<< ", next starts at " << o.size() << "\n");
auto sizeFieldSize = o.writeAt(sizePos, U32LEB(size));
- if (sizeFieldSize != MaxLEB32Bytes) {
+ // We can move things back if the actual LEB for the size doesn't use the
+ // maximum 5 bytes. In that case we need to adjust offsets after we move
+ // things backwards.
+ auto adjustmentForLEBShrinking = MaxLEB32Bytes - sizeFieldSize;
+ if (adjustmentForLEBShrinking) {
// we can save some room, nice
assert(sizeFieldSize < MaxLEB32Bytes);
std::move(&o[start], &o[start] + size, &o[sizePos] + sizeFieldSize);
- auto adjustment = MaxLEB32Bytes - sizeFieldSize;
- o.resize(o.size() - adjustment);
+ o.resize(o.size() - adjustmentForLEBShrinking);
if (sourceMap) {
for (auto i = sourceMapLocationsSizeAtFunctionStart;
i < sourceMapLocations.size();
++i) {
- sourceMapLocations[i].first -= adjustment;
+ sourceMapLocations[i].first -= adjustmentForLEBShrinking;
}
}
+ for (auto* curr : binaryLocationTrackedExpressionsForFunc) {
+ // We added the binary locations, adjust them: they must be relative
+ // to the code section.
+ binaryLocations[curr] -= adjustmentForLEBShrinking;
+ }
}
tableOfContents.functionBodies.emplace_back(
func->name, sizePos + sizeFieldSize, size);
+ binaryLocationTrackedExpressionsForFunc.clear();
});
finishSection(start);
}
@@ -649,10 +690,19 @@ void WasmBinaryWriter::writeDebugLocation(const Function::DebugLocation& loc) {
}
void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) {
- auto& debugLocations = func->debugLocations;
- auto iter = debugLocations.find(curr);
- if (iter != debugLocations.end()) {
- writeDebugLocation(iter->second);
+ if (sourceMap) {
+ auto& debugLocations = func->debugLocations;
+ auto iter = debugLocations.find(curr);
+ if (iter != debugLocations.end()) {
+ writeDebugLocation(iter->second);
+ }
+ }
+ // TODO: remove source map debugging support and refactor this method
+ // to something that directly thinks about DWARF, instead of indirectly
+ // looking at func->binaryLocations as a proxy for that etc.
+ if (func && !func->binaryLocations.empty()) {
+ binaryLocations[curr] = o.size();
+ binaryLocationTrackedExpressionsForFunc.push_back(curr);
}
}
@@ -809,6 +859,9 @@ void WasmBinaryBuilder::read() {
readFunctionSignatures();
break;
case BinaryConsts::Section::Code:
+ if (DWARF) {
+ codeSectionLocation = pos;
+ }
readFunctions();
break;
case BinaryConsts::Section::Export:
@@ -1288,9 +1341,6 @@ void WasmBinaryBuilder::readFunctionSignatures() {
void WasmBinaryBuilder::readFunctions() {
BYN_TRACE("== readFunctions\n");
- if (DWARF) {
- codeSectionLocation = pos;
- }
size_t total = getU32LEB();
if (total != functionSignatures.size()) {
throwError("invalid function section size, must equal types");
diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp
index 595c7f50b..04dd4e9d3 100644
--- a/src/wasm/wasm-debug.cpp
+++ b/src/wasm/wasm-debug.cpp
@@ -25,6 +25,10 @@
std::error_code dwarf2yaml(llvm::DWARFContext& DCtx, llvm::DWARFYAML::Data& Y);
#endif
+#include "wasm-binary.h"
+#include "wasm-debug.h"
+#include "wasm.h"
+
namespace wasm {
namespace Debug {
@@ -99,26 +103,315 @@ void dumpDWARF(const Module& wasm) {
// StringMap<std::unique_ptr<MemoryBuffer>>
// EmitDebugSections(llvm::DWARFYAML::Data &DI, bool ApplyFixups);
//
-// For modifying data, like line numberes, we can in theory do that either on
-// the DWARFContext or DWARFYAML::Data; unclear which is best, but modifying
-// the DWARFContext may save us doing fixups in EmitDebugSections.
-//
-void writeDWARFSections(Module& wasm) {
+// Represents the state when parsing a line table.
+struct LineState {
+ uint32_t addr = 0;
+ // TODO sectionIndex?
+ uint32_t line = 1;
+ uint32_t col = 0;
+ uint32_t file = 1;
+ // TODO uint32_t isa = 0;
+ // TODO Discriminator = 0;
+ bool isStmt;
+ bool basicBlock = false;
+ // XXX these two should be just prologue, epilogue?
+ bool prologueEnd = false;
+ bool epilogueBegin = false;
+
+ LineState(const LineState& other) = default;
+ LineState(const llvm::DWARFYAML::LineTable& table)
+ : isStmt(table.DefaultIsStmt) {}
+
+ LineState& operator=(const LineState& other) = default;
+
+ // Updates the state, and returns whether a new row is ready to be emitted.
+ bool update(llvm::DWARFYAML::LineTableOpcode& opcode,
+ const llvm::DWARFYAML::LineTable& table) {
+ switch (opcode.Opcode) {
+ case 0: {
+ // Extended opcodes
+ switch (opcode.SubOpcode) {
+ case llvm::dwarf::DW_LNE_set_address: {
+ addr = opcode.Data;
+ break;
+ }
+ case llvm::dwarf::DW_LNE_end_sequence: {
+ return true;
+ }
+ default: {
+ Fatal() << "unknown debug line sub-opcode: " << std::hex
+ << opcode.SubOpcode;
+ }
+ }
+ break;
+ }
+ case llvm::dwarf::DW_LNS_set_column: {
+ col = opcode.Data;
+ break;
+ }
+ case llvm::dwarf::DW_LNS_set_prologue_end: {
+ prologueEnd = true;
+ break;
+ }
+ case llvm::dwarf::DW_LNS_copy: {
+ return true;
+ }
+ case llvm::dwarf::DW_LNS_advance_pc: {
+ assert(table.MinInstLength == 1);
+ addr += opcode.Data;
+ break;
+ }
+ case llvm::dwarf::DW_LNS_advance_line: {
+ line += opcode.SData;
+ break;
+ }
+ case llvm::dwarf::DW_LNS_set_file: {
+ file = opcode.Data;
+ break;
+ }
+ case llvm::dwarf::DW_LNS_negate_stmt: {
+ isStmt = !isStmt;
+ break;
+ }
+ case llvm::dwarf::DW_LNS_const_add_pc: {
+ uint8_t AdjustOpcode = 255 - table.OpcodeBase;
+ uint64_t AddrOffset =
+ (AdjustOpcode / table.LineRange) * table.MinInstLength;
+ addr += AddrOffset;
+ break;
+ }
+ default: {
+ if (opcode.Opcode >= table.OpcodeBase) {
+ // Special opcode: adjust line and addr, using some math.
+ uint8_t AdjustOpcode =
+ opcode.Opcode - table.OpcodeBase; // 20 - 13 = 7
+ uint64_t AddrOffset = (AdjustOpcode / table.LineRange) *
+ table.MinInstLength; // (7 / 14) * 1 = 0
+ int32_t LineOffset =
+ table.LineBase +
+ (AdjustOpcode % table.LineRange); // -5 + (7 % 14) = 2
+ line += LineOffset;
+ addr += AddrOffset;
+ return true;
+ } else {
+ Fatal() << "unknown debug line opcode: " << std::hex << opcode.Opcode;
+ }
+ }
+ }
+ return false;
+ }
+
+ bool needToEmit() {
+ // If any value is 0, can ignore it
+ // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872
+ return line != 0 && col != 0 && addr != 0;
+ }
+
+ // Given an old state, emit the diff from it to this state into a new line
+ // table entry (that will be emitted in the updated DWARF debug line section).
+ void emitDiff(const LineState& old,
+ std::vector<llvm::DWARFYAML::LineTableOpcode>& newOpcodes,
+ const llvm::DWARFYAML::LineTable& table) {
+ bool useSpecial = false;
+ if (addr != old.addr || line != old.line) {
+ // Try to use a special opcode TODO
+ }
+ if (addr != old.addr && !useSpecial) {
+ // len = 1 (subopcode) + 4 (wasm32 address)
+ // FIXME: look at AddrSize on the Unit.
+ auto item = makeItem(llvm::dwarf::DW_LNE_set_address, 5);
+ item.Data = addr;
+ newOpcodes.push_back(item);
+ }
+ if (line != old.line && !useSpecial) {
+ auto item = makeItem(llvm::dwarf::DW_LNS_advance_line);
+ item.SData = line - old.line;
+ newOpcodes.push_back(item);
+ }
+ if (col != old.col) {
+ auto item = makeItem(llvm::dwarf::DW_LNS_set_column);
+ item.Data = col;
+ newOpcodes.push_back(item);
+ }
+ if (file != old.file) {
+ auto item = makeItem(llvm::dwarf::DW_LNS_set_file);
+ item.Data = file;
+ newOpcodes.push_back(item);
+ }
+ if (isStmt != old.isStmt) {
+ newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_negate_stmt));
+ }
+ if (basicBlock != old.basicBlock) {
+ Fatal() << "bb";
+ }
+ if (prologueEnd != old.prologueEnd) {
+ assert(prologueEnd);
+ newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_prologue_end));
+ }
+ if (epilogueBegin != old.epilogueBegin) {
+ Fatal() << "eb";
+ }
+ if (useSpecial) {
+ // Emit a special, which ends a sequence automatically.
+ // TODO
+ } else {
+ // End the sequence manually.
+ // len = 1 (subopcode)
+ newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNE_end_sequence, 1));
+ // Reset the state.
+ *this = LineState(table);
+ }
+ }
+
+private:
+ llvm::DWARFYAML::LineTableOpcode makeItem(llvm::dwarf::LineNumberOps opcode) {
+ llvm::DWARFYAML::LineTableOpcode item = {};
+ item.Opcode = opcode;
+ return item;
+ }
+
+ llvm::DWARFYAML::LineTableOpcode
+ makeItem(llvm::dwarf::LineNumberExtendedOps opcode, uint64_t len) {
+ auto item = makeItem(llvm::dwarf::LineNumberOps(0));
+ // All the length after the len field itself, including the subopcode
+ // (1 byte).
+ item.ExtLen = len;
+ item.SubOpcode = opcode;
+ return item;
+ }
+};
+
+// Represents a mapping of addresses to expressions.
+struct AddrExprMap {
+ std::unordered_map<uint32_t, Expression*> map;
+
+ // Construct the map from the binaryLocations loaded from the wasm.
+ AddrExprMap(const Module& wasm) {
+ for (auto& func : wasm.functions) {
+ for (auto pair : func->binaryLocations) {
+ assert(map.count(pair.second) == 0);
+ map[pair.second] = pair.first;
+ }
+ }
+ }
+
+ // Construct the map from new binaryLocations just written
+ AddrExprMap(const BinaryLocationsMap& newLocations) {
+ for (auto pair : newLocations) {
+ assert(map.count(pair.second) == 0);
+ map[pair.second] = pair.first;
+ }
+ }
+
+ Expression* get(uint32_t addr) {
+ auto iter = map.find(addr);
+ if (iter != map.end()) {
+ return iter->second;
+ }
+ return nullptr;
+ }
+
+ void dump() {
+ std::cout << " (size: " << map.size() << ")\n";
+ for (auto pair : map) {
+ std::cout << " " << pair.first << " => " << pair.second << '\n';
+ }
+ }
+};
+
+static void updateDebugLines(const Module& wasm,
+ llvm::DWARFYAML::Data& data,
+ const BinaryLocationsMap& newLocations) {
+ // TODO: for memory efficiency, we may want to do this in a streaming manner,
+ // binary to binary, without YAML IR.
+
+ // TODO: apparently DWARF offsets may be into the middle of instructions...
+ // we may need to track their spans too
+ // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872
+
+ AddrExprMap oldAddrMap(wasm);
+ AddrExprMap newAddrMap(newLocations);
+
+ for (auto& table : data.DebugLines) {
+ // Parse the original opcodes and emit new ones.
+ LineState state(table);
+ // All the addresses we need to write out.
+ std::vector<uint32_t> newAddrs;
+ std::unordered_map<uint32_t, LineState> newAddrInfo;
+ for (auto& opcode : table.Opcodes) {
+ // Update the state, and check if we have a new row to emit.
+ if (state.update(opcode, table)) {
+ // An expression may not exist for this line table item, if we optimized
+ // it away.
+ if (auto* expr = oldAddrMap.get(state.addr)) {
+ auto iter = newLocations.find(expr);
+ if (iter != newLocations.end()) {
+ uint32_t newAddr = iter->second;
+ newAddrs.push_back(newAddr);
+ newAddrInfo.emplace(newAddr, state);
+ auto& updatedState = newAddrInfo.at(newAddr);
+ // The only difference is the address TODO other stuff?
+ updatedState.addr = newAddr;
+ }
+ }
+ if (opcode.Opcode == 0 &&
+ opcode.SubOpcode == llvm::dwarf::DW_LNE_end_sequence) {
+ state = LineState(table);
+ }
+ }
+ }
+ // Sort the new addresses (which may be substantially different from the
+ // original layout after optimization).
+ std::sort(newAddrs.begin(), newAddrs.end());
+ // Emit a new line table.
+ {
+ std::vector<llvm::DWARFYAML::LineTableOpcode> newOpcodes;
+ LineState state(table);
+ for (uint32_t addr : newAddrs) {
+ LineState oldState(state);
+ state = newAddrInfo.at(addr);
+ if (state.needToEmit()) {
+ state.emitDiff(oldState, newOpcodes, table);
+ } else {
+ state = oldState;
+ }
+ }
+ table.Opcodes.swap(newOpcodes);
+ }
+ }
+}
+
+static void fixEmittedSection(const std::string& name,
+ std::vector<char>& data) {
+ if (name == ".debug_line") {
+ // The YAML code does not update the line section size. However, it is
+ // trivial to do so after the fact, as the wasm section's additional size is
+ // easy to compute: it is the emitted size - the 4 bytes of the size itself.
+ uint32_t size = data.size() - 4;
+ BufferWithRandomAccess buf;
+ buf << size;
+ for (int i = 0; i < 4; i++) {
+ data[i] = buf[i];
+ }
+ }
+}
+
+void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations) {
BinaryenDWARFInfo info(wasm);
// Convert to Data representation, which YAML can use to write.
- llvm::DWARFYAML::Data Data;
- if (dwarf2yaml(*info.context, Data)) {
+ llvm::DWARFYAML::Data data;
+ if (dwarf2yaml(*info.context, data)) {
Fatal() << "Failed to parse DWARF to YAML";
}
+ updateDebugLines(wasm, data, newLocations);
+
// TODO: Actually update, and remove sections we don't know how to update yet?
// Convert to binary sections.
- auto newSections = EmitDebugSections(
- Data,
- false /* ApplyFixups, should be true if we modify Data, presumably? */);
+ auto newSections = EmitDebugSections(data, true);
// Update the custom sections in the wasm.
// TODO: efficiency
@@ -129,6 +422,7 @@ void writeDWARFSections(Module& wasm) {
auto llvmData = newSections[llvmName]->getBuffer();
section.data.resize(llvmData.size());
std::copy(llvmData.begin(), llvmData.end(), section.data.data());
+ fixEmittedSection(section.name, section.data);
}
}
}
@@ -140,7 +434,7 @@ void dumpDWARF(const Module& wasm) {
std::cerr << "warning: no DWARF dumping support present\n";
}
-void writeDWARFSections(Module& wasm) {
+void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations) {
std::cerr << "warning: no DWARF updating support present\n";
}