summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2020-01-22 08:48:53 -0800
committerGitHub <noreply@github.com>2020-01-22 08:48:53 -0800
commit474269867c54c7f6031bd6797f4fe96db5783c49 (patch)
tree891228ae2340de609264c2bd072d6c0d7e7d2c59 /src
parentd6ce516017f6ea809babb6d81e5bb791ea94659c (diff)
downloadbinaryen-474269867c54c7f6031bd6797f4fe96db5783c49.tar.gz
binaryen-474269867c54c7f6031bd6797f4fe96db5783c49.tar.bz2
binaryen-474269867c54c7f6031bd6797f4fe96db5783c49.zip
DWARF: Track more function locations (#2604)
DWARF from LLVM can refer to the first byte belonging to the function, where the size LEB is, or to the first byte after that, where the local declarations are, or the end opcode, or to one byte past that which is one byte past the bytes that belong to the function. We aren't sure why LLVM does this, but track it all for now. After this all debug line positions are identified. However, in some cases a debug line refers to one past the end of the function, which may be an LLVM bug. That location is ambiguous as it could also be the first byte of the next function (what made this discovery possible was when this happened to the last function, after which there is another section).
Diffstat (limited to 'src')
-rw-r--r--src/wasm.h14
-rw-r--r--src/wasm/wasm-binary.cpp20
-rw-r--r--src/wasm/wasm-debug.cpp95
3 files changed, 92 insertions, 37 deletions
diff --git a/src/wasm.h b/src/wasm.h
index f30a24d6b..29844d855 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -1205,7 +1205,17 @@ struct BinaryLocations {
};
std::unordered_map<Expression*, DelimiterLocations> delimiters;
- std::unordered_map<Function*, Span> functions;
+ // DWARF debug info can refer to multiple interesting positions in a function.
+ struct FunctionLocations {
+ // The very start of the function, where the binary has a size LEB.
+ BinaryLocation start = 0;
+ // The area where we declare locals, which is right after the size LEB.
+ BinaryLocation declarations = 0;
+ // The end, which is one past the final "end" instruction byte.
+ BinaryLocation end = 0;
+ };
+
+ std::unordered_map<Function*, FunctionLocations> functions;
};
// Forward declarations of Stack IR, as functions can contain it, see
@@ -1265,7 +1275,7 @@ public:
std::unordered_map<Expression*, BinaryLocations::Span> expressionLocations;
std::unordered_map<Expression*, BinaryLocations::DelimiterLocations>
delimiterLocations;
- BinaryLocations::Span funcLocation;
+ BinaryLocations::FunctionLocations funcLocation;
size_t getNumParams();
size_t getNumVars();
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index f834666b4..7dc904cd9 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -165,6 +165,7 @@ void WasmBinaryWriter::finishSection(int32_t start) {
}
for (auto& pair : binaryLocations.functions) {
pair.second.start -= totalAdjustment;
+ pair.second.declarations -= totalAdjustment;
pair.second.end -= totalAdjustment;
}
for (auto& pair : binaryLocations.delimiters) {
@@ -304,7 +305,7 @@ void WasmBinaryWriter::writeFunctions() {
return;
}
BYN_TRACE("== writeFunctions\n");
- auto start = startSection(BinaryConsts::Section::Code);
+ auto sectionStart = startSection(BinaryConsts::Section::Code);
o << U32LEB(importInfo->getNumDefinedFunctions());
ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) {
assert(binaryLocationTrackedExpressionsForFunc.empty());
@@ -357,15 +358,16 @@ void WasmBinaryWriter::writeFunctions() {
}
}
if (!binaryLocationTrackedExpressionsForFunc.empty()) {
- binaryLocations.functions[func] =
- BinaryLocations::Span{BinaryLocation(start - adjustmentForLEBShrinking),
- BinaryLocation(o.size())};
+ binaryLocations.functions[func] = BinaryLocations::FunctionLocations{
+ BinaryLocation(sizePos),
+ BinaryLocation(start - adjustmentForLEBShrinking),
+ BinaryLocation(o.size())};
}
tableOfContents.functionBodies.emplace_back(
func->name, sizePos + sizeFieldSize, size);
binaryLocationTrackedExpressionsForFunc.clear();
});
- finishSection(start);
+ finishSection(sectionStart);
}
void WasmBinaryWriter::writeGlobals() {
@@ -1389,6 +1391,7 @@ void WasmBinaryBuilder::readFunctions() {
}
for (size_t i = 0; i < total; i++) {
BYN_TRACE("read one at " << pos << std::endl);
+ auto sizePos = pos;
size_t size = getU32LEB();
if (size == 0) {
throwError("empty function size");
@@ -1401,9 +1404,10 @@ void WasmBinaryBuilder::readFunctions() {
currFunction = func;
if (DWARF) {
- func->funcLocation =
- BinaryLocations::Span{BinaryLocation(pos - codeSectionLocation),
- BinaryLocation(pos - codeSectionLocation + size)};
+ func->funcLocation = BinaryLocations::FunctionLocations{
+ BinaryLocation(sizePos - codeSectionLocation),
+ BinaryLocation(pos - codeSectionLocation),
+ BinaryLocation(pos - codeSectionLocation + size)};
}
readNextDebugLocation();
diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp
index aa596aa61..a79f7f9e3 100644
--- a/src/wasm/wasm-debug.cpp
+++ b/src/wasm/wasm-debug.cpp
@@ -402,33 +402,38 @@ private:
}
};
-// Represents a mapping of addresses to expressions. Note that we use a single
-// map for the start and end addresses, since there is no chance of a function's
-// start overlapping with another's end (there is the size LEB in the middle).
+// Represents a mapping of addresses to expressions. As with expressions, we
+// track both start and end; here, however, "start" means the "start" and
+// "declarations" fields in FunctionLocations, and "end" means the two locations
+// of one past the end, and one before it which is the "end" opcode that is
+// emitted.
struct FuncAddrMap {
- std::unordered_map<BinaryLocation, Function*> map;
+ std::unordered_map<BinaryLocation, Function*> startMap, endMap;
// Construct the map from the binaryLocations loaded from the wasm.
FuncAddrMap(const Module& wasm) {
for (auto& func : wasm.functions) {
- map[func->funcLocation.start] = func.get();
- map[func->funcLocation.end] = func.get();
+ startMap[func->funcLocation.start] = func.get();
+ startMap[func->funcLocation.declarations] = func.get();
+ endMap[func->funcLocation.end - 1] = func.get();
+ endMap[func->funcLocation.end] = func.get();
}
}
- Function* get(BinaryLocation addr) const {
- auto iter = map.find(addr);
- if (iter != map.end()) {
+ Function* getStart(BinaryLocation addr) const {
+ auto iter = startMap.find(addr);
+ if (iter != startMap.end()) {
return iter->second;
}
return nullptr;
}
- void dump() const {
- std::cout << " (size: " << map.size() << ")\n";
- for (auto pair : map) {
- std::cout << " " << pair.first << " => " << pair.second->name << '\n';
+ Function* getEnd(BinaryLocation addr) const {
+ auto iter = endMap.find(addr);
+ if (iter != endMap.end()) {
+ return iter->second;
}
+ return nullptr;
}
};
@@ -484,25 +489,59 @@ struct LocationUpdater {
return 0;
}
- BinaryLocation getNewFuncAddr(BinaryLocation oldAddr) const {
- if (auto* func = oldFuncAddrMap.get(oldAddr)) {
+ BinaryLocation getNewFuncStartAddr(BinaryLocation oldAddr) const {
+ if (auto* func = oldFuncAddrMap.getStart(oldAddr)) {
// The function might have been optimized away, check.
auto iter = newLocations.functions.find(func);
if (iter != newLocations.functions.end()) {
- auto oldSpan = func->funcLocation;
- auto newSpan = iter->second;
- if (oldAddr == oldSpan.start) {
- return newSpan.start;
- } else if (oldAddr == oldSpan.end) {
- return newSpan.end;
+ auto oldLocations = func->funcLocation;
+ auto newLocations = iter->second;
+ if (oldAddr == oldLocations.start) {
+ return newLocations.start;
+ } else if (oldAddr == oldLocations.declarations) {
+ return newLocations.declarations;
+ } else {
+ WASM_UNREACHABLE("invalid func start");
}
}
}
return 0;
}
- bool hasOldFuncAddr(BinaryLocation oldAddr) const {
- return oldFuncAddrMap.get(oldAddr);
+ bool hasOldFuncStartAddr(BinaryLocation oldAddr) const {
+ return oldFuncAddrMap.getStart(oldAddr);
+ }
+
+ BinaryLocation getNewFuncEndAddr(BinaryLocation oldAddr) const {
+ if (auto* func = oldFuncAddrMap.getEnd(oldAddr)) {
+ // The function might have been optimized away, check.
+ auto iter = newLocations.functions.find(func);
+ if (iter != newLocations.functions.end()) {
+ auto oldLocations = func->funcLocation;
+ auto newLocations = iter->second;
+ if (oldAddr == oldLocations.end) {
+ return newLocations.end;
+ } else if (oldAddr == oldLocations.end - 1) {
+ return newLocations.end - 1;
+ } else {
+ WASM_UNREACHABLE("invalid func end");
+ }
+ }
+ }
+ return 0;
+ }
+
+ // Check for either the end opcode, or one past the end.
+ bool hasOldFuncEndAddr(BinaryLocation oldAddr) const {
+ return oldFuncAddrMap.getEnd(oldAddr);
+ }
+
+ // Check specifically for the end opcode.
+ bool hasOldFuncEndOpcodeAddr(BinaryLocation oldAddr) const {
+ if (auto* func = oldFuncAddrMap.getEnd(oldAddr)) {
+ return oldAddr == func->funcLocation.end - 1;
+ }
+ return false;
}
BinaryLocation getNewExtraAddr(BinaryLocation oldAddr) const {
@@ -552,8 +591,10 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data,
BinaryLocation newAddr = 0;
if (locationUpdater.hasOldExprAddr(oldAddr)) {
newAddr = locationUpdater.getNewExprAddr(oldAddr);
- } else if (locationUpdater.hasOldFuncAddr(oldAddr)) {
- newAddr = locationUpdater.getNewFuncAddr(oldAddr);
+ } else if (locationUpdater.hasOldFuncStartAddr(oldAddr)) {
+ newAddr = locationUpdater.getNewFuncStartAddr(oldAddr);
+ } else if (locationUpdater.hasOldFuncEndAddr(oldAddr)) {
+ newAddr = locationUpdater.getNewFuncEndAddr(oldAddr);
} else if (locationUpdater.hasOldExtraAddr(oldAddr)) {
newAddr = locationUpdater.getNewExtraAddr(oldAddr);
}
@@ -631,7 +672,7 @@ static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE,
newValue = locationUpdater.getNewExprAddr(oldValue);
} else if (tag == llvm::dwarf::DW_TAG_compile_unit ||
tag == llvm::dwarf::DW_TAG_subprogram) {
- newValue = locationUpdater.getNewFuncAddr(oldValue);
+ newValue = locationUpdater.getNewFuncStartAddr(oldValue);
} else {
Fatal() << "unknown tag with low_pc "
<< llvm::dwarf::TagString(tag).str();
@@ -664,7 +705,7 @@ static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE,
newValue = locationUpdater.getNewExprEndAddr(oldValue);
} else if (tag == llvm::dwarf::DW_TAG_compile_unit ||
tag == llvm::dwarf::DW_TAG_subprogram) {
- newValue = locationUpdater.getNewFuncAddr(oldValue);
+ newValue = locationUpdater.getNewFuncEndAddr(oldValue);
} else {
Fatal() << "unknown tag with low_pc "
<< llvm::dwarf::TagString(tag).str();