diff options
author | Alon Zakai <azakai@google.com> | 2020-01-16 15:51:32 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-16 15:51:32 -0800 |
commit | 0ec999db121197d7da242dd4a1136997e02c67cc (patch) | |
tree | 603292a1441e3cd39157eb258f233f44f1a31a21 /src | |
parent | 0848a27fdecf3ffd5170986dceec7ba04c4e50a0 (diff) | |
download | binaryen-0ec999db121197d7da242dd4a1136997e02c67cc.tar.gz binaryen-0ec999db121197d7da242dd4a1136997e02c67cc.tar.bz2 binaryen-0ec999db121197d7da242dd4a1136997e02c67cc.zip |
DWARF: high_pc computation (#2595)
Update high_pc values. These are interesting as they
may be a relative offset compared to the low_pc.
For functions we already had both a start and an end. Add
such tracking for instructions as well.
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/Print.cpp | 4 | ||||
-rw-r--r-- | src/wasm-binary.h | 1 | ||||
-rw-r--r-- | src/wasm-stack.h | 3 | ||||
-rw-r--r-- | src/wasm.h | 4 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 22 | ||||
-rw-r--r-- | src/wasm/wasm-debug.cpp | 159 |
6 files changed, 139 insertions, 54 deletions
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 866b0d8f8..5504373f5 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -1443,8 +1443,8 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> { auto iter = currFunction->expressionLocations.find(curr); if (iter != currFunction->expressionLocations.end()) { Colors::grey(o); - o << ";; code offset: 0x" << std::hex << iter->second << std::dec - << '\n'; + o << ";; code offset: 0x" << std::hex << iter->second.start + << std::dec << '\n'; restoreNormalColor(o); doIndent(o, indent); } diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 66e823e9e..5d601c6e9 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1014,6 +1014,7 @@ public: void writeSourceMapEpilog(); void writeDebugLocation(const Function::DebugLocation& loc); void writeDebugLocation(Expression* curr, Function* func); + void writeDebugLocationEnd(Expression* curr, Function* func); // helpers void writeInlineString(const char* name); diff --git a/src/wasm-stack.h b/src/wasm-stack.h index eeb675a3d..7ca30f369 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -92,6 +92,9 @@ public: parent.writeDebugLocation(curr, func); } OverriddenVisitor<BinaryInstWriter>::visit(curr); + if (func && !sourceMap) { + parent.writeDebugLocationEnd(curr, func); + } } void visitBlock(Block* curr); diff --git a/src/wasm.h b/src/wasm.h index db1742d8c..99ea59728 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1167,7 +1167,7 @@ struct BinaryLocations { struct Span { uint32_t start, end; }; - std::unordered_map<Expression*, uint32_t> expressions; + std::unordered_map<Expression*, Span> expressions; std::unordered_map<Function*, Span> functions; }; @@ -1225,7 +1225,7 @@ public: std::set<DebugLocation> epilogLocation; // General debugging info support: track instructions and the function itself. - std::unordered_map<Expression*, uint32_t> expressionLocations; + std::unordered_map<Expression*, BinaryLocations::Span> expressionLocations; BinaryLocations::Span funcLocation; size_t getNumParams(); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 9b1f24a0f..8d42fb9f4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -160,7 +160,8 @@ void WasmBinaryWriter::finishSection(int32_t start) { // We are relative to the section start. auto totalAdjustment = adjustmentForLEBShrinking + body; for (auto& pair : binaryLocations.expressions) { - pair.second -= totalAdjustment; + pair.second.start -= totalAdjustment; + pair.second.end -= totalAdjustment; } for (auto& pair : binaryLocations.functions) { pair.second.start -= totalAdjustment; @@ -339,7 +340,9 @@ void WasmBinaryWriter::writeFunctions() { for (auto* curr : binaryLocationTrackedExpressionsForFunc) { // We added the binary locations, adjust them: they must be relative // to the code section. - binaryLocations.expressions[curr] -= adjustmentForLEBShrinking; + auto& span = binaryLocations.expressions[curr]; + span.start -= adjustmentForLEBShrinking; + span.end -= adjustmentForLEBShrinking; } } if (!binaryLocationTrackedExpressionsForFunc.empty()) { @@ -708,11 +711,20 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { // If this is an instruction in a function, and if the original wasm had // binary locations tracked, then track it in the output as well. if (func && !func->expressionLocations.empty()) { - binaryLocations.expressions[curr] = o.size(); + binaryLocations.expressions[curr] = + BinaryLocations::Span{uint32_t(o.size()), 0}; binaryLocationTrackedExpressionsForFunc.push_back(curr); } } +void WasmBinaryWriter::writeDebugLocationEnd(Expression* curr, Function* func) { + if (func && !func->expressionLocations.empty()) { + auto& span = binaryLocations.expressions.at(curr); + assert(span.end == 0); + span.end = o.size(); + } +} + void WasmBinaryWriter::writeInlineString(const char* name) { int32_t size = strlen(name); o << U32LEB(size); @@ -2293,7 +2305,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { currFunction->debugLocations[curr] = *currDebugLocation.begin(); } if (DWARF && currFunction) { - currFunction->expressionLocations[curr] = startPos - codeSectionLocation; + currFunction->expressionLocations[curr] = + BinaryLocations::Span{uint32_t(startPos - codeSectionLocation), + uint32_t(pos - codeSectionLocation)}; } } BYN_TRACE("zz recurse from " << depth-- << " at " << pos << std::endl); diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index 23bc83374..89082e8dd 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -328,16 +328,20 @@ private: } }; -// Represents a mapping of addresses to expressions. +// Represents a mapping of addresses to expressions. We track beginnings and +// endings of expressions separately, since the end of one (which is one past +// the end in DWARF notation) overlaps with the beginning of the next, and also +// to let us use contextual information (we may know we are looking up the end +// of an instruction). struct AddrExprMap { - std::unordered_map<uint32_t, Expression*> map; + std::unordered_map<uint32_t, Expression*> startMap; + std::unordered_map<uint32_t, Expression*> endMap; // Construct the map from the binaryLocations loaded from the wasm. AddrExprMap(const Module& wasm) { for (auto& func : wasm.functions) { for (auto pair : func->expressionLocations) { - assert(map.count(pair.second) == 0); - map[pair.second] = pair.first; + add(pair.first, pair.second); } } } @@ -345,28 +349,38 @@ struct AddrExprMap { // Construct the map from new binaryLocations just written AddrExprMap(const BinaryLocations& newLocations) { for (auto pair : newLocations.expressions) { - assert(map.count(pair.second) == 0); - map[pair.second] = pair.first; + add(pair.first, pair.second); } } - Expression* get(uint32_t addr) const { - auto iter = map.find(addr); - if (iter != map.end()) { + Expression* getStart(uint32_t addr) const { + auto iter = startMap.find(addr); + if (iter != startMap.end()) { return iter->second; } return nullptr; } - void dump() const { - std::cout << " (size: " << map.size() << ")\n"; - for (auto pair : map) { - std::cout << " " << pair.first << " => " << pair.second << '\n'; + Expression* getEnd(uint32_t addr) const { + auto iter = endMap.find(addr); + if (iter != endMap.end()) { + return iter->second; } + return nullptr; + } + +private: + void add(Expression* expr, BinaryLocations::Span span) { + assert(startMap.count(span.start) == 0); + startMap[span.start] = expr; + assert(endMap.count(span.end) == 0); + endMap[span.end] = expr; } }; -// Represents a mapping of addresses to expressions. +// Represents a mapping of addresses to expressions. Note that we use a single +// map for the start and end addresses, since there is no chance of a function's +// start overlapping with another's end (there is the size LEB in the middle). struct FuncAddrMap { std::unordered_map<uint32_t, Function*> map; @@ -415,10 +429,6 @@ struct LocationUpdater { // TODO: for memory efficiency, we may want to do this in a streaming manner, // binary to binary, without YAML IR. - // TODO: apparently DWARF offsets may be into the middle of instructions... - // we may need to track their spans too - // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872 - LocationUpdater(Module& wasm, const BinaryLocations& newLocations) : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm), newExprAddrMap(newLocations), oldFuncAddrMap(wasm) {} @@ -427,10 +437,21 @@ struct LocationUpdater { // address, or if there was but if that instruction no longer exists, return // 0. Otherwise, return the new updated location. uint32_t getNewExprAddr(uint32_t oldAddr) const { - if (auto* expr = oldExprAddrMap.get(oldAddr)) { + if (auto* expr = oldExprAddrMap.getStart(oldAddr)) { auto iter = newLocations.expressions.find(expr); if (iter != newLocations.expressions.end()) { - uint32_t newAddr = iter->second; + uint32_t newAddr = iter->second.start; + return newAddr; + } + } + return 0; + } + + uint32_t getNewExprEndAddr(uint32_t oldAddr) const { + if (auto* expr = oldExprAddrMap.getEnd(oldAddr)) { + auto iter = newLocations.expressions.find(expr); + if (iter != newLocations.expressions.end()) { + uint32_t newAddr = iter->second.end; return newAddr; } } @@ -529,6 +550,76 @@ static void iterContextAndYAML(const T& contextList, U& yamlList, W func) { assert(yamlValue == yamlList.end()); } +static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE, + llvm::DWARFYAML::Entry& yamlEntry, + const llvm::DWARFAbbreviationDeclaration* abbrevDecl, + const LocationUpdater& locationUpdater) { + auto tag = DIE.getTag(); + // Pairs of low/high_pc require some special handling, as the high + // may be an offset relative to the low. First, process the low_pcs. + uint32_t oldLowPC = 0, newLowPC = 0; + iterContextAndYAML( + abbrevDecl->attributes(), + yamlEntry.Values, + [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& attrSpec, + llvm::DWARFYAML::FormValue& yamlValue) { + auto attr = attrSpec.Attr; + if (attr != llvm::dwarf::DW_AT_low_pc) { + return; + } + uint32_t oldValue = yamlValue.Value, newValue = 0; + if (tag == llvm::dwarf::DW_TAG_GNU_call_site || + tag == llvm::dwarf::DW_TAG_inlined_subroutine || + tag == llvm::dwarf::DW_TAG_lexical_block || + tag == llvm::dwarf::DW_TAG_label) { + newValue = locationUpdater.getNewExprAddr(oldValue); + } else if (tag == llvm::dwarf::DW_TAG_compile_unit || + tag == llvm::dwarf::DW_TAG_subprogram) { + newValue = locationUpdater.getNewFuncAddr(oldValue); + } else { + Fatal() << "unknown tag with low_pc " + << llvm::dwarf::TagString(tag).str(); + } + oldLowPC = oldValue; + newLowPC = newValue; + yamlValue.Value = newValue; + }); + // Next, process the high_pcs. + // TODO: do this more efficiently, without a second traversal (but that's a + // little tricky given the special double-traversal we have). + iterContextAndYAML( + abbrevDecl->attributes(), + yamlEntry.Values, + [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& attrSpec, + llvm::DWARFYAML::FormValue& yamlValue) { + auto attr = attrSpec.Attr; + if (attr != llvm::dwarf::DW_AT_high_pc) { + return; + } + uint32_t oldValue = yamlValue.Value, newValue = 0; + bool isRelative = attrSpec.Form == llvm::dwarf::DW_FORM_data4; + if (isRelative) { + oldValue += oldLowPC; + } + if (tag == llvm::dwarf::DW_TAG_GNU_call_site || + tag == llvm::dwarf::DW_TAG_inlined_subroutine || + tag == llvm::dwarf::DW_TAG_lexical_block || + tag == llvm::dwarf::DW_TAG_label) { + newValue = locationUpdater.getNewExprEndAddr(oldValue); + } else if (tag == llvm::dwarf::DW_TAG_compile_unit || + tag == llvm::dwarf::DW_TAG_subprogram) { + newValue = locationUpdater.getNewFuncAddr(oldValue); + } else { + Fatal() << "unknown tag with low_pc " + << llvm::dwarf::TagString(tag).str(); + } + if (isRelative) { + newValue -= newLowPC; + } + yamlValue.Value = newValue; + }); +} + static void updateCompileUnits(const BinaryenDWARFInfo& info, llvm::DWARFYAML::Data& yaml, const LocationUpdater& locationUpdater) { @@ -545,36 +636,12 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info, yamlUnit.Entries, [&](const llvm::DWARFDebugInfoEntry& DIE, llvm::DWARFYAML::Entry& yamlEntry) { - auto tag = DIE.getTag(); // Process the entries in each relevant DIE, looking for attributes to // change. auto abbrevDecl = DIE.getAbbreviationDeclarationPtr(); if (abbrevDecl) { - iterContextAndYAML( - abbrevDecl->attributes(), - yamlEntry.Values, - [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& - attrSpec, - llvm::DWARFYAML::FormValue& yamlValue) { - if (attrSpec.Attr == llvm::dwarf::DW_AT_low_pc) { - if (tag == llvm::dwarf::DW_TAG_GNU_call_site || - tag == llvm::dwarf::DW_TAG_inlined_subroutine || - tag == llvm::dwarf::DW_TAG_lexical_block || - tag == llvm::dwarf::DW_TAG_label) { - // low_pc in certain tags represent expressions. - yamlValue.Value = - locationUpdater.getNewExprAddr(yamlValue.Value); - } else if (tag == llvm::dwarf::DW_TAG_compile_unit || - tag == llvm::dwarf::DW_TAG_subprogram) { - // low_pc in certain tags represent function. - yamlValue.Value = - locationUpdater.getNewFuncAddr(yamlValue.Value); - } else { - Fatal() << "unknown tag with low_pc " - << llvm::dwarf::TagString(tag).str(); - } - } - }); + // This is relevant; look for things to update. + updateDIE(DIE, yamlEntry, abbrevDecl, locationUpdater); } }); }); |