diff options
-rw-r--r-- | scripts/test/shared.py | 2 | ||||
-rw-r--r-- | src/passes/Print.cpp | 4 | ||||
-rw-r--r-- | src/wasm-binary.h | 9 | ||||
-rw-r--r-- | src/wasm-debug.h | 2 | ||||
-rw-r--r-- | src/wasm.h | 21 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 49 | ||||
-rw-r--r-- | src/wasm/wasm-debug.cpp | 122 | ||||
-rw-r--r-- | test/passes/dwarfdump_roundtrip_dwarfdump.bin.txt | 4 | ||||
-rw-r--r-- | test/passes/fannkuch3.bin.txt | 2 | ||||
-rw-r--r-- | test/passes/fannkuch3_manyopts.bin.txt | 2 | ||||
-rw-r--r-- | test/passes/ignore_missing_func.bin.txt | 2 |
11 files changed, 148 insertions, 71 deletions
diff --git a/scripts/test/shared.py b/scripts/test/shared.py index f15424885..7c581d66c 100644 --- a/scripts/test/shared.py +++ b/scripts/test/shared.py @@ -371,7 +371,7 @@ def fail_if_not_contained(actual, expected): def fail_if_not_identical_to_file(actual, expected_file): binary = expected_file.endswith(".wasm") or type(actual) == bytes with open(expected_file, 'rb' if binary else 'r') as f: - fail_if_not_identical(actual, f.read(), fromfile=expected_file) + fail_if_not_identical(f.read(), actual, fromfile=expected_file) def get_test_dir(name): diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 106966ef1..866b0d8f8 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -1440,8 +1440,8 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> { } // show a binary position, if there is one if (debugInfo) { - auto iter = currFunction->binaryLocations.find(curr); - if (iter != currFunction->binaryLocations.end()) { + auto iter = currFunction->expressionLocations.find(curr); + if (iter != currFunction->expressionLocations.end()) { Colors::grey(o); o << ";; code offset: 0x" << std::hex << iter->second << std::dec << '\n'; diff --git a/src/wasm-binary.h b/src/wasm-binary.h index b36f5c548..66e823e9e 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1059,13 +1059,8 @@ private: std::unique_ptr<ImportInfo> importInfo; - // General debugging info: map every instruction to its original position in - // the binary, relative to the beginning of the code section. This is similar - // to binaryLocations on Function objects, which are filled as we load the - // functions from the binary. Here we track them as we write, and then - // the combination of the two can be used to update DWARF info for the new - // locations of things. - BinaryLocationsMap binaryLocations; + // General debugging info: track locations as we write. + BinaryLocations binaryLocations; size_t binaryLocationsSizeAtSectionStart; // Track the expressions that we added for the current function being // written, so that we can update those specific binary locations when diff --git a/src/wasm-debug.h b/src/wasm-debug.h index 1020eee85..009edb555 100644 --- a/src/wasm-debug.h +++ b/src/wasm-debug.h @@ -37,7 +37,7 @@ bool hasDWARFSections(const Module& wasm); void dumpDWARF(const Module& wasm); // Update the DWARF sections. -void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations); +void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations); } // namespace Debug diff --git a/src/wasm.h b/src/wasm.h index 77bf75545..db1742d8c 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1158,6 +1158,19 @@ struct Importable { bool imported() { return module.is(); } }; +class Function; + +// Represents a mapping of wasm module elements to their location in the +// binary representation. This is used for general debugging info support. +// Offsets are relative to the beginning of the code section, as in DWARF. +struct BinaryLocations { + struct Span { + uint32_t start, end; + }; + std::unordered_map<Expression*, uint32_t> expressions; + std::unordered_map<Function*, Span> functions; +}; + // Forward declarations of Stack IR, as functions can contain it, see // the stackIR property. // Stack IR is a secondary IR to the main IR defined in this file (Binaryen @@ -1166,8 +1179,6 @@ class StackInst; using StackIR = std::vector<StackInst*>; -using BinaryLocationsMap = std::unordered_map<Expression*, uint32_t>; - class Function : public Importable { public: Name name; @@ -1213,9 +1224,9 @@ public: std::set<DebugLocation> prologLocation; std::set<DebugLocation> epilogLocation; - // General debugging info: map every instruction to its original position in - // the binary, relative to the beginning of the code section. - BinaryLocationsMap binaryLocations; + // General debugging info support: track instructions and the function itself. + std::unordered_map<Expression*, uint32_t> expressionLocations; + BinaryLocations::Span funcLocation; size_t getNumParams(); size_t getNumVars(); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 4b748a5af..9b1f24a0f 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -117,7 +117,7 @@ template<typename T> int32_t WasmBinaryWriter::startSection(T code) { if (sourceMap) { sourceMapLocationsSizeAtSectionStart = sourceMapLocations.size(); } - binaryLocationsSizeAtSectionStart = binaryLocations.size(); + binaryLocationsSizeAtSectionStart = binaryLocations.expressions.size(); return writeU32LEBPlaceholder(); // section size to be filled in later } @@ -145,7 +145,7 @@ void WasmBinaryWriter::finishSection(int32_t start) { } } - if (binaryLocationsSizeAtSectionStart != binaryLocations.size()) { + if (binaryLocationsSizeAtSectionStart != binaryLocations.expressions.size()) { // We added the binary locations, adjust them: they must be relative // to the code section. assert(binaryLocationsSizeAtSectionStart == 0); @@ -153,14 +153,18 @@ void WasmBinaryWriter::finishSection(int32_t start) { // offsets that are relative to the body, which is after that section type // byte and the the size LEB. auto body = start + sizeFieldSize; - for (auto& pair : binaryLocations) { - // Offsets are relative to the body of the code section: after the - // section type byte and the size. - // Everything was moved by the adjustment, track that. After this, - // we are at the right absolute address. - pair.second -= adjustmentForLEBShrinking; - // We are relative to the section start. - pair.second -= body; + // Offsets are relative to the body of the code section: after the + // section type byte and the size. + // Everything was moved by the adjustment, track that. After this, + // we are at the right absolute address. + // We are relative to the section start. + auto totalAdjustment = adjustmentForLEBShrinking + body; + for (auto& pair : binaryLocations.expressions) { + pair.second -= totalAdjustment; + } + for (auto& pair : binaryLocations.functions) { + pair.second.start -= totalAdjustment; + pair.second.end -= totalAdjustment; } } } @@ -335,9 +339,13 @@ void WasmBinaryWriter::writeFunctions() { for (auto* curr : binaryLocationTrackedExpressionsForFunc) { // We added the binary locations, adjust them: they must be relative // to the code section. - binaryLocations[curr] -= adjustmentForLEBShrinking; + binaryLocations.expressions[curr] -= adjustmentForLEBShrinking; } } + if (!binaryLocationTrackedExpressionsForFunc.empty()) { + binaryLocations.functions[func] = BinaryLocations::Span{ + uint32_t(start - adjustmentForLEBShrinking), uint32_t(o.size())}; + } tableOfContents.functionBodies.emplace_back( func->name, sizePos + sizeFieldSize, size); binaryLocationTrackedExpressionsForFunc.clear(); @@ -697,11 +705,10 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { writeDebugLocation(iter->second); } } - // TODO: remove source map debugging support and refactor this method - // to something that directly thinks about DWARF, instead of indirectly - // looking at func->binaryLocations as a proxy for that etc. - if (func && !func->binaryLocations.empty()) { - binaryLocations[curr] = o.size(); + // If this is an instruction in a function, and if the original wasm had + // binary locations tracked, then track it in the output as well. + if (func && !func->expressionLocations.empty()) { + binaryLocations.expressions[curr] = o.size(); binaryLocationTrackedExpressionsForFunc.push_back(curr); } } @@ -1357,11 +1364,17 @@ void WasmBinaryBuilder::readFunctions() { } endOfFunction = pos + size; - Function* func = new Function; + auto* func = new Function; func->name = Name::fromInt(i); func->sig = functionSignatures[i]; currFunction = func; + if (DWARF) { + func->funcLocation = + BinaryLocations::Span{uint32_t(pos - codeSectionLocation), + uint32_t(pos - codeSectionLocation + size)}; + } + readNextDebugLocation(); BYN_TRACE("reading " << i << std::endl); @@ -2280,7 +2293,7 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { currFunction->debugLocations[curr] = *currDebugLocation.begin(); } if (DWARF && currFunction) { - currFunction->binaryLocations[curr] = startPos - codeSectionLocation; + currFunction->expressionLocations[curr] = startPos - codeSectionLocation; } } BYN_TRACE("zz recurse from " << depth-- << " at " << pos << std::endl); diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index a244e550d..23bc83374 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -335,7 +335,7 @@ struct AddrExprMap { // Construct the map from the binaryLocations loaded from the wasm. AddrExprMap(const Module& wasm) { for (auto& func : wasm.functions) { - for (auto pair : func->binaryLocations) { + for (auto pair : func->expressionLocations) { assert(map.count(pair.second) == 0); map[pair.second] = pair.first; } @@ -343,8 +343,8 @@ struct AddrExprMap { } // Construct the map from new binaryLocations just written - AddrExprMap(const BinaryLocationsMap& newLocations) { - for (auto pair : newLocations) { + AddrExprMap(const BinaryLocations& newLocations) { + for (auto pair : newLocations.expressions) { assert(map.count(pair.second) == 0); map[pair.second] = pair.first; } @@ -366,12 +366,51 @@ struct AddrExprMap { } }; +// Represents a mapping of addresses to expressions. +struct FuncAddrMap { + std::unordered_map<uint32_t, Function*> map; + + // Construct the map from the binaryLocations loaded from the wasm. + FuncAddrMap(const Module& wasm) { + for (auto& func : wasm.functions) { + map[func->funcLocation.start] = func.get(); + map[func->funcLocation.end] = func.get(); + } + } + + Function* get(uint32_t addr) const { + auto iter = map.find(addr); + if (iter != map.end()) { + return iter->second; + } + return nullptr; + } + + void dump() const { + std::cout << " (size: " << map.size() << ")\n"; + for (auto pair : map) { + std::cout << " " << pair.first << " => " << pair.second->name << '\n'; + } + } +}; + +// Track locations from the original binary and the new one we wrote, so that +// we can update debug positions. +// We track expressions and functions separately, instead of having a single +// big map of (oldAddr) => (newAddr) because of the potentially ambiguous case +// of the final expression in a function: it's end might be identical in offset +// to the end of the function. So we have two different things that map to the +// same offset. However, if the context is "the end of the function" then the +// updated address is the new end of the function, even if the function ends +// with a different instruction now, as the old last instruction might have +// moved or been optimized out. struct LocationUpdater { Module& wasm; - const BinaryLocationsMap& newLocations; + const BinaryLocations& newLocations; - AddrExprMap oldAddrMap; - AddrExprMap newAddrMap; + AddrExprMap oldExprAddrMap; + AddrExprMap newExprAddrMap; + FuncAddrMap oldFuncAddrMap; // TODO: for memory efficiency, we may want to do this in a streaming manner, // binary to binary, without YAML IR. @@ -380,17 +419,17 @@ struct LocationUpdater { // we may need to track their spans too // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872 - LocationUpdater(Module& wasm, const BinaryLocationsMap& newLocations) - : wasm(wasm), newLocations(newLocations), oldAddrMap(wasm), - newAddrMap(newLocations) {} - - // Updates an address. If there was never an instruction at that address, - // or if there was but if that instruction no longer exists, return 0. - // Otherwise, return the new updated location. - uint32_t getNewAddr(uint32_t oldAddr) const { - if (auto* expr = oldAddrMap.get(oldAddr)) { - auto iter = newLocations.find(expr); - if (iter != newLocations.end()) { + LocationUpdater(Module& wasm, const BinaryLocations& newLocations) + : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm), + newExprAddrMap(newLocations), oldFuncAddrMap(wasm) {} + + // Updates an expression's address. If there was never an instruction at that + // address, or if there was but if that instruction no longer exists, return + // 0. Otherwise, return the new updated location. + uint32_t getNewExprAddr(uint32_t oldAddr) const { + if (auto* expr = oldExprAddrMap.get(oldAddr)) { + auto iter = newLocations.expressions.find(expr); + if (iter != newLocations.expressions.end()) { uint32_t newAddr = iter->second; return newAddr; } @@ -398,7 +437,22 @@ struct LocationUpdater { return 0; } - bool hasOldAddr(uint32_t oldAddr) const { return oldAddrMap.get(oldAddr); } + uint32_t getNewFuncAddr(uint32_t oldAddr) const { + if (auto* func = oldFuncAddrMap.get(oldAddr)) { + // The function might have been optimized away, check. + auto iter = newLocations.functions.find(func); + if (iter != newLocations.functions.end()) { + auto oldSpan = func->funcLocation; + auto newSpan = iter->second; + if (oldAddr == oldSpan.start) { + return newSpan.start; + } else if (oldAddr == oldSpan.end) { + return newSpan.end; + } + } + } + return 0; + } }; static void updateDebugLines(llvm::DWARFYAML::Data& data, @@ -428,7 +482,7 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data, } // An expression may not exist for this line table item, if we optimized // it away. - if (auto newAddr = locationUpdater.getNewAddr(state.addr)) { + if (auto newAddr = locationUpdater.getNewExprAddr(state.addr)) { newAddrs.push_back(newAddr); newAddrInfo.emplace(newAddr, state); auto& updatedState = newAddrInfo.at(newAddr); @@ -491,6 +545,7 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info, yamlUnit.Entries, [&](const llvm::DWARFDebugInfoEntry& DIE, llvm::DWARFYAML::Entry& yamlEntry) { + auto tag = DIE.getTag(); // Process the entries in each relevant DIE, looking for attributes to // change. auto abbrevDecl = DIE.getAbbreviationDeclarationPtr(); @@ -502,18 +557,21 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info, attrSpec, llvm::DWARFYAML::FormValue& yamlValue) { if (attrSpec.Attr == llvm::dwarf::DW_AT_low_pc) { - // If the old address did not refer to an instruction, then - // this is not something we understand and can update. - if (locationUpdater.hasOldAddr(yamlValue.Value)) { - // The addresses of compile units and functions are not - // instructions. - assert(DIE.getTag() != llvm::dwarf::DW_TAG_compile_unit && - DIE.getTag() != llvm::dwarf::DW_TAG_subprogram); - // Note that the new value may be 0, which is the correct - // way to indicate that this is no longer a valid wasm - // value, the same as wasm-ld would do. + if (tag == llvm::dwarf::DW_TAG_GNU_call_site || + tag == llvm::dwarf::DW_TAG_inlined_subroutine || + tag == llvm::dwarf::DW_TAG_lexical_block || + tag == llvm::dwarf::DW_TAG_label) { + // low_pc in certain tags represent expressions. + yamlValue.Value = + locationUpdater.getNewExprAddr(yamlValue.Value); + } else if (tag == llvm::dwarf::DW_TAG_compile_unit || + tag == llvm::dwarf::DW_TAG_subprogram) { + // low_pc in certain tags represent function. yamlValue.Value = - locationUpdater.getNewAddr(yamlValue.Value); + locationUpdater.getNewFuncAddr(yamlValue.Value); + } else { + Fatal() << "unknown tag with low_pc " + << llvm::dwarf::TagString(tag).str(); } } }); @@ -522,7 +580,7 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info, }); } -void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations) { +void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) { BinaryenDWARFInfo info(wasm); // Convert to Data representation, which YAML can use to write. @@ -563,7 +621,7 @@ void dumpDWARF(const Module& wasm) { std::cerr << "warning: no DWARF dumping support present\n"; } -void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations) { +void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) { std::cerr << "warning: no DWARF updating support present\n"; } diff --git a/test/passes/dwarfdump_roundtrip_dwarfdump.bin.txt b/test/passes/dwarfdump_roundtrip_dwarfdump.bin.txt index 6e7a91a06..b20e0cf35 100644 --- a/test/passes/dwarfdump_roundtrip_dwarfdump.bin.txt +++ b/test/passes/dwarfdump_roundtrip_dwarfdump.bin.txt @@ -137,11 +137,11 @@ Abbrev table for offset: 0x00000000 DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000095] = "a.cpp") DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000) DW_AT_comp_dir [DW_FORM_strp] ( .debug_str[0x0000009b] = "/usr/local/google/home/azakai/Dev/emscripten") - DW_AT_low_pc [DW_FORM_addr] (0x0000000000000005) + DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000) DW_AT_high_pc [DW_FORM_data4] (0x00000002) 0x00000026: DW_TAG_subprogram [2] - DW_AT_low_pc [DW_FORM_addr] (0x0000000000000005) + DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000) DW_AT_high_pc [DW_FORM_data4] (0x00000002) DW_AT_linkage_name [DW_FORM_strp] ( .debug_str[0x000000c8] = "_Z3foov") DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000d0] = "foo") diff --git a/test/passes/fannkuch3.bin.txt b/test/passes/fannkuch3.bin.txt index 9ff0e44c3..d07570c59 100644 --- a/test/passes/fannkuch3.bin.txt +++ b/test/passes/fannkuch3.bin.txt @@ -2685,7 +2685,7 @@ Abbrev table for offset: 0x00000000 0x00000237: NULL 0x00000238: DW_TAG_subprogram [25] * - DW_AT_low_pc [DW_FORM_addr] (0x000000000000039c) + DW_AT_low_pc [DW_FORM_addr] (0x0000000000000388) DW_AT_high_pc [DW_FORM_data4] (0x00000346) DW_AT_GNU_all_call_sites [DW_FORM_flag_present] (true) DW_AT_name [DW_FORM_strp] ( .debug_str[0x000001ba] = "main") diff --git a/test/passes/fannkuch3_manyopts.bin.txt b/test/passes/fannkuch3_manyopts.bin.txt index a9061c853..42f6d6897 100644 --- a/test/passes/fannkuch3_manyopts.bin.txt +++ b/test/passes/fannkuch3_manyopts.bin.txt @@ -2685,7 +2685,7 @@ Abbrev table for offset: 0x00000000 0x00000237: NULL 0x00000238: DW_TAG_subprogram [25] * - DW_AT_low_pc [DW_FORM_addr] (0x000000000000039c) + DW_AT_low_pc [DW_FORM_addr] (0x0000000000000358) DW_AT_high_pc [DW_FORM_data4] (0x00000346) DW_AT_GNU_all_call_sites [DW_FORM_flag_present] (true) DW_AT_name [DW_FORM_strp] ( .debug_str[0x000001ba] = "main") diff --git a/test/passes/ignore_missing_func.bin.txt b/test/passes/ignore_missing_func.bin.txt index c64f22a0d..8fc468ea8 100644 --- a/test/passes/ignore_missing_func.bin.txt +++ b/test/passes/ignore_missing_func.bin.txt @@ -687,7 +687,7 @@ Abbrev table for offset: 0x00000000 0x0000009a: NULL 0x0000009b: DW_TAG_subprogram [8] - DW_AT_low_pc [DW_FORM_addr] (0x0000000000000060) + DW_AT_low_pc [DW_FORM_addr] (0x000000000000005c) DW_AT_high_pc [DW_FORM_data4] (0x00000064) DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000e4] = "main") DW_AT_decl_file [DW_FORM_data1] ("/home/alon/Dev/emscripten/a.cpp") |