summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2020-01-16 09:37:26 -0800
committerGitHub <noreply@github.com>2020-01-16 09:37:26 -0800
commit5d2c760fa086b9964ec22156a4cdf3847b019b76 (patch)
tree8521ca3c1d4207c48a8111947b83ab80b90387d9 /src
parent719b4d7d7897e25e5868989f0c708565e12d3295 (diff)
downloadbinaryen-5d2c760fa086b9964ec22156a4cdf3847b019b76.tar.gz
binaryen-5d2c760fa086b9964ec22156a4cdf3847b019b76.tar.bz2
binaryen-5d2c760fa086b9964ec22156a4cdf3847b019b76.zip
DWARF: Function location tracking (#2592)
Track the beginning and end of each function, both when reading and writing. We track expressions and functions separately, instead of having a single big map of (oldAddr) => (newAddr) because of the potentially ambiguous case of the final expression in a function: it's end might be identical in offset to the end of the function. So we have two different things that map to the same offset. However, if the context is "the end of the function" then the updated address is the new end of the function, even if the function ends with a different instruction now, as the old last instruction might have moved or been optimized out. Concretely, we have getNewExprAddr and getNewFuncAddr, so we can ask to update the location of either an expression or a function, and use that contextual information. This checks for the DIE tag in order to know what we are looking for. To be safe, if we hit an unknown tag, we halt, so that we don't silently miss things. As the test updates show, the new things we can do thanks to this PR are to update compile unit and subprogram low_pc locations. Note btw that in the first test (dwarfdump_roundtrip_dwarfdump.bin.txt) we change 5 to 0: that is correct since that test does not write out DWARF (it intentionally has no -g), so we do not track binary locations while writing, and so we have nothing to update to (the other tests show actual updating). Also fix the order in the python test runner code to show a diff of expected to encountered, and not the reverse, which confused me.
Diffstat (limited to 'src')
-rw-r--r--src/passes/Print.cpp4
-rw-r--r--src/wasm-binary.h9
-rw-r--r--src/wasm-debug.h2
-rw-r--r--src/wasm.h21
-rw-r--r--src/wasm/wasm-binary.cpp49
-rw-r--r--src/wasm/wasm-debug.cpp122
6 files changed, 142 insertions, 65 deletions
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 106966ef1..866b0d8f8 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -1440,8 +1440,8 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> {
}
// show a binary position, if there is one
if (debugInfo) {
- auto iter = currFunction->binaryLocations.find(curr);
- if (iter != currFunction->binaryLocations.end()) {
+ auto iter = currFunction->expressionLocations.find(curr);
+ if (iter != currFunction->expressionLocations.end()) {
Colors::grey(o);
o << ";; code offset: 0x" << std::hex << iter->second << std::dec
<< '\n';
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index b36f5c548..66e823e9e 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1059,13 +1059,8 @@ private:
std::unique_ptr<ImportInfo> importInfo;
- // General debugging info: map every instruction to its original position in
- // the binary, relative to the beginning of the code section. This is similar
- // to binaryLocations on Function objects, which are filled as we load the
- // functions from the binary. Here we track them as we write, and then
- // the combination of the two can be used to update DWARF info for the new
- // locations of things.
- BinaryLocationsMap binaryLocations;
+ // General debugging info: track locations as we write.
+ BinaryLocations binaryLocations;
size_t binaryLocationsSizeAtSectionStart;
// Track the expressions that we added for the current function being
// written, so that we can update those specific binary locations when
diff --git a/src/wasm-debug.h b/src/wasm-debug.h
index 1020eee85..009edb555 100644
--- a/src/wasm-debug.h
+++ b/src/wasm-debug.h
@@ -37,7 +37,7 @@ bool hasDWARFSections(const Module& wasm);
void dumpDWARF(const Module& wasm);
// Update the DWARF sections.
-void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations);
+void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations);
} // namespace Debug
diff --git a/src/wasm.h b/src/wasm.h
index 77bf75545..db1742d8c 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -1158,6 +1158,19 @@ struct Importable {
bool imported() { return module.is(); }
};
+class Function;
+
+// Represents a mapping of wasm module elements to their location in the
+// binary representation. This is used for general debugging info support.
+// Offsets are relative to the beginning of the code section, as in DWARF.
+struct BinaryLocations {
+ struct Span {
+ uint32_t start, end;
+ };
+ std::unordered_map<Expression*, uint32_t> expressions;
+ std::unordered_map<Function*, Span> functions;
+};
+
// Forward declarations of Stack IR, as functions can contain it, see
// the stackIR property.
// Stack IR is a secondary IR to the main IR defined in this file (Binaryen
@@ -1166,8 +1179,6 @@ class StackInst;
using StackIR = std::vector<StackInst*>;
-using BinaryLocationsMap = std::unordered_map<Expression*, uint32_t>;
-
class Function : public Importable {
public:
Name name;
@@ -1213,9 +1224,9 @@ public:
std::set<DebugLocation> prologLocation;
std::set<DebugLocation> epilogLocation;
- // General debugging info: map every instruction to its original position in
- // the binary, relative to the beginning of the code section.
- BinaryLocationsMap binaryLocations;
+ // General debugging info support: track instructions and the function itself.
+ std::unordered_map<Expression*, uint32_t> expressionLocations;
+ BinaryLocations::Span funcLocation;
size_t getNumParams();
size_t getNumVars();
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 4b748a5af..9b1f24a0f 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -117,7 +117,7 @@ template<typename T> int32_t WasmBinaryWriter::startSection(T code) {
if (sourceMap) {
sourceMapLocationsSizeAtSectionStart = sourceMapLocations.size();
}
- binaryLocationsSizeAtSectionStart = binaryLocations.size();
+ binaryLocationsSizeAtSectionStart = binaryLocations.expressions.size();
return writeU32LEBPlaceholder(); // section size to be filled in later
}
@@ -145,7 +145,7 @@ void WasmBinaryWriter::finishSection(int32_t start) {
}
}
- if (binaryLocationsSizeAtSectionStart != binaryLocations.size()) {
+ if (binaryLocationsSizeAtSectionStart != binaryLocations.expressions.size()) {
// We added the binary locations, adjust them: they must be relative
// to the code section.
assert(binaryLocationsSizeAtSectionStart == 0);
@@ -153,14 +153,18 @@ void WasmBinaryWriter::finishSection(int32_t start) {
// offsets that are relative to the body, which is after that section type
// byte and the the size LEB.
auto body = start + sizeFieldSize;
- for (auto& pair : binaryLocations) {
- // Offsets are relative to the body of the code section: after the
- // section type byte and the size.
- // Everything was moved by the adjustment, track that. After this,
- // we are at the right absolute address.
- pair.second -= adjustmentForLEBShrinking;
- // We are relative to the section start.
- pair.second -= body;
+ // Offsets are relative to the body of the code section: after the
+ // section type byte and the size.
+ // Everything was moved by the adjustment, track that. After this,
+ // we are at the right absolute address.
+ // We are relative to the section start.
+ auto totalAdjustment = adjustmentForLEBShrinking + body;
+ for (auto& pair : binaryLocations.expressions) {
+ pair.second -= totalAdjustment;
+ }
+ for (auto& pair : binaryLocations.functions) {
+ pair.second.start -= totalAdjustment;
+ pair.second.end -= totalAdjustment;
}
}
}
@@ -335,9 +339,13 @@ void WasmBinaryWriter::writeFunctions() {
for (auto* curr : binaryLocationTrackedExpressionsForFunc) {
// We added the binary locations, adjust them: they must be relative
// to the code section.
- binaryLocations[curr] -= adjustmentForLEBShrinking;
+ binaryLocations.expressions[curr] -= adjustmentForLEBShrinking;
}
}
+ if (!binaryLocationTrackedExpressionsForFunc.empty()) {
+ binaryLocations.functions[func] = BinaryLocations::Span{
+ uint32_t(start - adjustmentForLEBShrinking), uint32_t(o.size())};
+ }
tableOfContents.functionBodies.emplace_back(
func->name, sizePos + sizeFieldSize, size);
binaryLocationTrackedExpressionsForFunc.clear();
@@ -697,11 +705,10 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) {
writeDebugLocation(iter->second);
}
}
- // TODO: remove source map debugging support and refactor this method
- // to something that directly thinks about DWARF, instead of indirectly
- // looking at func->binaryLocations as a proxy for that etc.
- if (func && !func->binaryLocations.empty()) {
- binaryLocations[curr] = o.size();
+ // If this is an instruction in a function, and if the original wasm had
+ // binary locations tracked, then track it in the output as well.
+ if (func && !func->expressionLocations.empty()) {
+ binaryLocations.expressions[curr] = o.size();
binaryLocationTrackedExpressionsForFunc.push_back(curr);
}
}
@@ -1357,11 +1364,17 @@ void WasmBinaryBuilder::readFunctions() {
}
endOfFunction = pos + size;
- Function* func = new Function;
+ auto* func = new Function;
func->name = Name::fromInt(i);
func->sig = functionSignatures[i];
currFunction = func;
+ if (DWARF) {
+ func->funcLocation =
+ BinaryLocations::Span{uint32_t(pos - codeSectionLocation),
+ uint32_t(pos - codeSectionLocation + size)};
+ }
+
readNextDebugLocation();
BYN_TRACE("reading " << i << std::endl);
@@ -2280,7 +2293,7 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) {
currFunction->debugLocations[curr] = *currDebugLocation.begin();
}
if (DWARF && currFunction) {
- currFunction->binaryLocations[curr] = startPos - codeSectionLocation;
+ currFunction->expressionLocations[curr] = startPos - codeSectionLocation;
}
}
BYN_TRACE("zz recurse from " << depth-- << " at " << pos << std::endl);
diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp
index a244e550d..23bc83374 100644
--- a/src/wasm/wasm-debug.cpp
+++ b/src/wasm/wasm-debug.cpp
@@ -335,7 +335,7 @@ struct AddrExprMap {
// Construct the map from the binaryLocations loaded from the wasm.
AddrExprMap(const Module& wasm) {
for (auto& func : wasm.functions) {
- for (auto pair : func->binaryLocations) {
+ for (auto pair : func->expressionLocations) {
assert(map.count(pair.second) == 0);
map[pair.second] = pair.first;
}
@@ -343,8 +343,8 @@ struct AddrExprMap {
}
// Construct the map from new binaryLocations just written
- AddrExprMap(const BinaryLocationsMap& newLocations) {
- for (auto pair : newLocations) {
+ AddrExprMap(const BinaryLocations& newLocations) {
+ for (auto pair : newLocations.expressions) {
assert(map.count(pair.second) == 0);
map[pair.second] = pair.first;
}
@@ -366,12 +366,51 @@ struct AddrExprMap {
}
};
+// Represents a mapping of addresses to expressions.
+struct FuncAddrMap {
+ std::unordered_map<uint32_t, Function*> map;
+
+ // Construct the map from the binaryLocations loaded from the wasm.
+ FuncAddrMap(const Module& wasm) {
+ for (auto& func : wasm.functions) {
+ map[func->funcLocation.start] = func.get();
+ map[func->funcLocation.end] = func.get();
+ }
+ }
+
+ Function* get(uint32_t addr) const {
+ auto iter = map.find(addr);
+ if (iter != map.end()) {
+ return iter->second;
+ }
+ return nullptr;
+ }
+
+ void dump() const {
+ std::cout << " (size: " << map.size() << ")\n";
+ for (auto pair : map) {
+ std::cout << " " << pair.first << " => " << pair.second->name << '\n';
+ }
+ }
+};
+
+// Track locations from the original binary and the new one we wrote, so that
+// we can update debug positions.
+// We track expressions and functions separately, instead of having a single
+// big map of (oldAddr) => (newAddr) because of the potentially ambiguous case
+// of the final expression in a function: it's end might be identical in offset
+// to the end of the function. So we have two different things that map to the
+// same offset. However, if the context is "the end of the function" then the
+// updated address is the new end of the function, even if the function ends
+// with a different instruction now, as the old last instruction might have
+// moved or been optimized out.
struct LocationUpdater {
Module& wasm;
- const BinaryLocationsMap& newLocations;
+ const BinaryLocations& newLocations;
- AddrExprMap oldAddrMap;
- AddrExprMap newAddrMap;
+ AddrExprMap oldExprAddrMap;
+ AddrExprMap newExprAddrMap;
+ FuncAddrMap oldFuncAddrMap;
// TODO: for memory efficiency, we may want to do this in a streaming manner,
// binary to binary, without YAML IR.
@@ -380,17 +419,17 @@ struct LocationUpdater {
// we may need to track their spans too
// https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872
- LocationUpdater(Module& wasm, const BinaryLocationsMap& newLocations)
- : wasm(wasm), newLocations(newLocations), oldAddrMap(wasm),
- newAddrMap(newLocations) {}
-
- // Updates an address. If there was never an instruction at that address,
- // or if there was but if that instruction no longer exists, return 0.
- // Otherwise, return the new updated location.
- uint32_t getNewAddr(uint32_t oldAddr) const {
- if (auto* expr = oldAddrMap.get(oldAddr)) {
- auto iter = newLocations.find(expr);
- if (iter != newLocations.end()) {
+ LocationUpdater(Module& wasm, const BinaryLocations& newLocations)
+ : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm),
+ newExprAddrMap(newLocations), oldFuncAddrMap(wasm) {}
+
+ // Updates an expression's address. If there was never an instruction at that
+ // address, or if there was but if that instruction no longer exists, return
+ // 0. Otherwise, return the new updated location.
+ uint32_t getNewExprAddr(uint32_t oldAddr) const {
+ if (auto* expr = oldExprAddrMap.get(oldAddr)) {
+ auto iter = newLocations.expressions.find(expr);
+ if (iter != newLocations.expressions.end()) {
uint32_t newAddr = iter->second;
return newAddr;
}
@@ -398,7 +437,22 @@ struct LocationUpdater {
return 0;
}
- bool hasOldAddr(uint32_t oldAddr) const { return oldAddrMap.get(oldAddr); }
+ uint32_t getNewFuncAddr(uint32_t oldAddr) const {
+ if (auto* func = oldFuncAddrMap.get(oldAddr)) {
+ // The function might have been optimized away, check.
+ auto iter = newLocations.functions.find(func);
+ if (iter != newLocations.functions.end()) {
+ auto oldSpan = func->funcLocation;
+ auto newSpan = iter->second;
+ if (oldAddr == oldSpan.start) {
+ return newSpan.start;
+ } else if (oldAddr == oldSpan.end) {
+ return newSpan.end;
+ }
+ }
+ }
+ return 0;
+ }
};
static void updateDebugLines(llvm::DWARFYAML::Data& data,
@@ -428,7 +482,7 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
}
// An expression may not exist for this line table item, if we optimized
// it away.
- if (auto newAddr = locationUpdater.getNewAddr(state.addr)) {
+ if (auto newAddr = locationUpdater.getNewExprAddr(state.addr)) {
newAddrs.push_back(newAddr);
newAddrInfo.emplace(newAddr, state);
auto& updatedState = newAddrInfo.at(newAddr);
@@ -491,6 +545,7 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info,
yamlUnit.Entries,
[&](const llvm::DWARFDebugInfoEntry& DIE,
llvm::DWARFYAML::Entry& yamlEntry) {
+ auto tag = DIE.getTag();
// Process the entries in each relevant DIE, looking for attributes to
// change.
auto abbrevDecl = DIE.getAbbreviationDeclarationPtr();
@@ -502,18 +557,21 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info,
attrSpec,
llvm::DWARFYAML::FormValue& yamlValue) {
if (attrSpec.Attr == llvm::dwarf::DW_AT_low_pc) {
- // If the old address did not refer to an instruction, then
- // this is not something we understand and can update.
- if (locationUpdater.hasOldAddr(yamlValue.Value)) {
- // The addresses of compile units and functions are not
- // instructions.
- assert(DIE.getTag() != llvm::dwarf::DW_TAG_compile_unit &&
- DIE.getTag() != llvm::dwarf::DW_TAG_subprogram);
- // Note that the new value may be 0, which is the correct
- // way to indicate that this is no longer a valid wasm
- // value, the same as wasm-ld would do.
+ if (tag == llvm::dwarf::DW_TAG_GNU_call_site ||
+ tag == llvm::dwarf::DW_TAG_inlined_subroutine ||
+ tag == llvm::dwarf::DW_TAG_lexical_block ||
+ tag == llvm::dwarf::DW_TAG_label) {
+ // low_pc in certain tags represent expressions.
+ yamlValue.Value =
+ locationUpdater.getNewExprAddr(yamlValue.Value);
+ } else if (tag == llvm::dwarf::DW_TAG_compile_unit ||
+ tag == llvm::dwarf::DW_TAG_subprogram) {
+ // low_pc in certain tags represent function.
yamlValue.Value =
- locationUpdater.getNewAddr(yamlValue.Value);
+ locationUpdater.getNewFuncAddr(yamlValue.Value);
+ } else {
+ Fatal() << "unknown tag with low_pc "
+ << llvm::dwarf::TagString(tag).str();
}
}
});
@@ -522,7 +580,7 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info,
});
}
-void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations) {
+void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) {
BinaryenDWARFInfo info(wasm);
// Convert to Data representation, which YAML can use to write.
@@ -563,7 +621,7 @@ void dumpDWARF(const Module& wasm) {
std::cerr << "warning: no DWARF dumping support present\n";
}
-void writeDWARFSections(Module& wasm, const BinaryLocationsMap& newLocations) {
+void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) {
std::cerr << "warning: no DWARF updating support present\n";
}