diff options
author | Alon Zakai <azakai@google.com> | 2020-01-21 19:03:22 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-21 19:03:22 -0800 |
commit | d6ce516017f6ea809babb6d81e5bb791ea94659c (patch) | |
tree | d8eb77d3b7791010d73b12bccc803a429de66709 /src | |
parent | 51fff211d7aae002ff28e1c6245e3f9349493f11 (diff) | |
download | binaryen-d6ce516017f6ea809babb6d81e5bb791ea94659c.tar.gz binaryen-d6ce516017f6ea809babb6d81e5bb791ea94659c.tar.bz2 binaryen-d6ce516017f6ea809babb6d81e5bb791ea94659c.zip |
DWARF: Track the positions of 'end', 'else', 'catch' binary locations (#2603)
Control flow structures have those in addition to the normal span of
(start, end), and we need to track them too.
Tracking them during reading requires us to track control flow
structures while parsing, so that we can know to which structure
an end/else/catch refers to.
We track these locations using a map on the side of instruction
to its "extra" locations. That avoids increasing the size of the
tracking info for the much more common non-control flow
instructions.
Note that there is one more 'end' location, that of the function
(not referring to any instruction). I left that to a later PR to
not increase this one too much.
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm-binary.h | 15 | ||||
-rw-r--r-- | src/wasm-stack.h | 16 | ||||
-rw-r--r-- | src/wasm.h | 34 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 69 | ||||
-rw-r--r-- | src/wasm/wasm-debug.cpp | 71 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 23 |
6 files changed, 196 insertions, 32 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 5d601c6e9..5cfafb509 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1015,6 +1015,9 @@ public: void writeDebugLocation(const Function::DebugLocation& loc); void writeDebugLocation(Expression* curr, Function* func); void writeDebugLocationEnd(Expression* curr, Function* func); + void writeExtraDebugLocation(Expression* curr, + Function* func, + BinaryLocations::DelimiterId id); // helpers void writeInlineString(const char* name); @@ -1185,6 +1188,18 @@ public: std::vector<Expression*> expressionStack; + // Control flow structure parsing: these have not just the normal binary + // data for an instruction, but also some bytes later on like "end" or "else". + // We must be aware of the connection between those things, for debug info. + std::vector<Expression*> controlFlowStack; + + // Called when we parse the beginning of a control flow structure. + void startControlFlow(Expression* curr); + + // Called when we parse a later part of a control flow structure, like "end" + // or "else". + void continueControlFlow(BinaryLocations::DelimiterId id, BinaryLocation pos); + // set when we know code is unreachable in the sense of the wasm spec: we are // in a block and after an unreachable element. this helps parse stacky wasm // code, which can be unsuitable for our IR when unreachable. diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 7ca30f369..863bbbf51 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -144,10 +144,12 @@ public: void visitPush(Push* curr); void visitPop(Pop* curr); - void emitIfElse(); - void emitCatch(); - void emitScopeEnd(); // emit an end at the end of a block/loop/if/try - void emitFunctionEnd(); // emit an end at the end of a function + void emitIfElse(If* curr); + void emitCatch(Try* curr); + // emit an end at the end of a block/loop/if/try + void emitScopeEnd(Expression* curr); + // emit an end at the end of a function + void emitFunctionEnd(); void emitUnreachable(); void mapLocalsAndEmitHeader(); @@ -813,9 +815,9 @@ public: } writer.mapLocalsAndEmitHeader(); } - void emitIfElse(If* curr) { writer.emitIfElse(); } - void emitCatch(Try* curr) { writer.emitCatch(); } - void emitScopeEnd(Expression* curr) { writer.emitScopeEnd(); } + void emitIfElse(If* curr) { writer.emitIfElse(curr); } + void emitCatch(Try* curr) { writer.emitCatch(curr); } + void emitScopeEnd(Expression* curr) { writer.emitScopeEnd(curr); } void emitFunctionEnd() { if (func->epilogLocation.size()) { parent.writeDebugLocation(*func->epilogLocation.begin()); diff --git a/src/wasm.h b/src/wasm.h index 8b60885e3..f30a24d6b 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1172,7 +1172,39 @@ struct BinaryLocations { struct Span { BinaryLocation start = 0, end = 0; }; + + // Track the range of addresses an expressions appears at. This is the + // contiguous range that all instructions have - control flow instructions + // have additional opcodes later (like an end for a block or loop), see + // just after this. std::unordered_map<Expression*, Span> expressions; + + // Track the extra delimiter positions that some instructions, in particular + // control flow, have, like 'end' for loop and block. We keep these in a + // separate map because they are rare and we optimize for the storage space + // for the common type of instruction which just needs a Span. We implement + // this as a simple struct with two elements (as two extra elements is the + // maximum currently needed; due to 'catch' and 'end' for try-catch). The + // second value may be 0, indicating it is not used. + struct DelimiterLocations : public std::array<BinaryLocation, 2> { + DelimiterLocations() { + // Ensure zero-initialization. + for (auto& item : *this) { + item = 0; + } + } + }; + + enum DelimiterId { + // All control flow structures have an end, so use index 0 for that. + End = 0, + // Use index 1 for all other current things. + Else = 1, + Catch = 1, + Invalid = -1 + }; + std::unordered_map<Expression*, DelimiterLocations> delimiters; + std::unordered_map<Function*, Span> functions; }; @@ -1231,6 +1263,8 @@ public: // General debugging info support: track instructions and the function itself. std::unordered_map<Expression*, BinaryLocations::Span> expressionLocations; + std::unordered_map<Expression*, BinaryLocations::DelimiterLocations> + delimiterLocations; BinaryLocations::Span funcLocation; size_t getNumParams(); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 9c7eb55f2..f834666b4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -167,6 +167,11 @@ void WasmBinaryWriter::finishSection(int32_t start) { pair.second.start -= totalAdjustment; pair.second.end -= totalAdjustment; } + for (auto& pair : binaryLocations.delimiters) { + for (auto& item : pair.second) { + item -= totalAdjustment; + } + } } } @@ -343,6 +348,12 @@ void WasmBinaryWriter::writeFunctions() { auto& span = binaryLocations.expressions[curr]; span.start -= adjustmentForLEBShrinking; span.end -= adjustmentForLEBShrinking; + auto iter = binaryLocations.delimiters.find(curr); + if (iter != binaryLocations.delimiters.end()) { + for (auto& item : iter->second) { + item -= adjustmentForLEBShrinking; + } + } } } if (!binaryLocationTrackedExpressionsForFunc.empty()) { @@ -726,6 +737,13 @@ void WasmBinaryWriter::writeDebugLocationEnd(Expression* curr, Function* func) { } } +void WasmBinaryWriter::writeExtraDebugLocation( + Expression* curr, Function* func, BinaryLocations::DelimiterId id) { + if (func && !func->expressionLocations.empty()) { + binaryLocations.delimiters[curr][id] = o.size(); + } +} + void WasmBinaryWriter::writeInlineString(const char* name) { int32_t size = strlen(name); o << U32LEB(size); @@ -1411,6 +1429,7 @@ void WasmBinaryBuilder::readFunctions() { assert(breakTargetNames.size() == 0); assert(breakStack.empty()); assert(expressionStack.empty()); + assert(controlFlowStack.empty()); assert(depth == 0); func->body = getBlockOrSingleton(func->sig.results); assert(depth == 0); @@ -1419,6 +1438,7 @@ void WasmBinaryBuilder::readFunctions() { if (!expressionStack.empty()) { throwError("stack not empty on function exit"); } + assert(controlFlowStack.empty()); if (pos != endOfFunction) { throwError("binary offset at function exit not at expected location"); } @@ -1675,11 +1695,11 @@ void WasmBinaryBuilder::processExpressions() { } expressionStack.push_back(curr); if (curr->type == Type::unreachable) { - // once we see something unreachable, we don't want to add anything else + // Once we see something unreachable, we don't want to add anything else // to the stack, as it could be stacky code that is non-representable in - // our AST. but we do need to skip it - // if there is nothing else here, just stop. otherwise, go into - // unreachable mode. peek to see what to do + // our AST. but we do need to skip it. + // If there is nothing else here, just stop. Otherwise, go into + // unreachable mode. peek to see what to do. if (pos == endOfFunction) { throwError("Reached function end without seeing End opcode"); } @@ -2172,9 +2192,16 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { visitDrop((curr = allocator.alloc<Drop>())->cast<Drop>()); break; case BinaryConsts::End: + curr = nullptr; + continueControlFlow(BinaryLocations::End, startPos); + break; case BinaryConsts::Else: + curr = nullptr; + continueControlFlow(BinaryLocations::Else, startPos); + break; case BinaryConsts::Catch: curr = nullptr; + continueControlFlow(BinaryLocations::Catch, startPos); break; case BinaryConsts::RefNull: visitRefNull((curr = allocator.alloc<RefNull>())->cast<RefNull>()); @@ -2317,6 +2344,34 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { return BinaryConsts::ASTNodes(code); } +void WasmBinaryBuilder::startControlFlow(Expression* curr) { + if (DWARF && currFunction) { + controlFlowStack.push_back(curr); + } +} + +void WasmBinaryBuilder::continueControlFlow(BinaryLocations::DelimiterId id, + BinaryLocation pos) { + if (DWARF && currFunction) { + if (controlFlowStack.empty()) { + // We reached the end of the function, which is also marked with an + // "end", like a control flow structure. + assert(id == BinaryLocations::End); + assert(pos + 1 == endOfFunction); + return; + } + assert(!controlFlowStack.empty()); + auto currControlFlow = controlFlowStack.back(); + // We are called after parsing the byte, so we need to subtract one to + // get its position. + currFunction->delimiterLocations[currControlFlow][id] = + pos - codeSectionLocation; + if (id == BinaryLocations::End) { + controlFlowStack.pop_back(); + } + } +} + void WasmBinaryBuilder::pushBlockElements(Block* curr, size_t start, size_t end) { @@ -2361,7 +2416,7 @@ void WasmBinaryBuilder::pushBlockElements(Block* curr, void WasmBinaryBuilder::visitBlock(Block* curr) { BYN_TRACE("zz node: Block\n"); - + startControlFlow(curr); // special-case Block and de-recurse nested blocks in their first position, as // that is a common pattern that can be very highly nested. std::vector<Block*> stack; @@ -2374,6 +2429,7 @@ void WasmBinaryBuilder::visitBlock(Block* curr) { // a recursion readNextDebugLocation(); curr = allocator.alloc<Block>(); + startControlFlow(curr); pos++; if (debugLocation.size()) { currFunction->debugLocations[curr] = *debugLocation.begin(); @@ -2449,6 +2505,7 @@ Expression* WasmBinaryBuilder::getBlockOrSingleton(Type type, void WasmBinaryBuilder::visitIf(If* curr) { BYN_TRACE("zz node: If\n"); + startControlFlow(curr); curr->type = getType(); curr->condition = popNonVoidExpression(); curr->ifTrue = getBlockOrSingleton(curr->type); @@ -2463,6 +2520,7 @@ void WasmBinaryBuilder::visitIf(If* curr) { void WasmBinaryBuilder::visitLoop(Loop* curr) { BYN_TRACE("zz node: Loop\n"); + startControlFlow(curr); curr->type = getType(); curr->name = getNextLabel(); breakStack.push_back({curr->name, 0}); @@ -4469,6 +4527,7 @@ void WasmBinaryBuilder::visitRefFunc(RefFunc* curr) { void WasmBinaryBuilder::visitTry(Try* curr) { BYN_TRACE("zz node: Try\n"); + startControlFlow(curr); // For simplicity of implementation, like if scopes, we create a hidden block // within each try-body and catch-body, and let branches target those inner // blocks instead. diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index a8b70247f..aa596aa61 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -337,21 +337,28 @@ struct AddrExprMap { std::unordered_map<BinaryLocation, Expression*> startMap; std::unordered_map<BinaryLocation, Expression*> endMap; + // Some instructions have extra binary locations, like the else and end in + // and if. Track those separately, including their expression and their id + // ("else", "end", etc.), as they are rare, and we don't want to + // bloat the common case which is represented in the earlier maps. + struct ExtraInfo { + Expression* expr; + BinaryLocations::DelimiterId id; + }; + std::unordered_map<BinaryLocation, ExtraInfo> extraMap; + // Construct the map from the binaryLocations loaded from the wasm. AddrExprMap(const Module& wasm) { for (auto& func : wasm.functions) { for (auto pair : func->expressionLocations) { add(pair.first, pair.second); } + for (auto pair : func->delimiterLocations) { + add(pair.first, pair.second); + } } } - // Construct the map from new binaryLocations just written - AddrExprMap(const BinaryLocations& newLocations) { - for (auto pair : newLocations.expressions) { - add(pair.first, pair.second); - } - } Expression* getStart(BinaryLocation addr) const { auto iter = startMap.find(addr); @@ -369,13 +376,30 @@ struct AddrExprMap { return nullptr; } + ExtraInfo getExtra(BinaryLocation addr) const { + auto iter = extraMap.find(addr); + if (iter != extraMap.end()) { + return iter->second; + } + return ExtraInfo{nullptr, BinaryLocations::Invalid}; + } + private: - void add(Expression* expr, BinaryLocations::Span span) { + void add(Expression* expr, const BinaryLocations::Span span) { assert(startMap.count(span.start) == 0); startMap[span.start] = expr; assert(endMap.count(span.end) == 0); endMap[span.end] = expr; } + + void add(Expression* expr, const BinaryLocations::DelimiterLocations& extra) { + for (Index i = 0; i < extra.size(); i++) { + if (extra[i] != 0) { + assert(extraMap.count(extra[i]) == 0); + extraMap[extra[i]] = ExtraInfo{expr, BinaryLocations::DelimiterId(i)}; + } + } + } }; // Represents a mapping of addresses to expressions. Note that we use a single @@ -423,7 +447,6 @@ struct LocationUpdater { const BinaryLocations& newLocations; AddrExprMap oldExprAddrMap; - AddrExprMap newExprAddrMap; FuncAddrMap oldFuncAddrMap; // TODO: for memory efficiency, we may want to do this in a streaming manner, @@ -431,7 +454,7 @@ struct LocationUpdater { LocationUpdater(Module& wasm, const BinaryLocations& newLocations) : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm), - newExprAddrMap(newLocations), oldFuncAddrMap(wasm) {} + oldFuncAddrMap(wasm) {} // Updates an expression's address. If there was never an instruction at that // address, or if there was but if that instruction no longer exists, return @@ -448,15 +471,14 @@ struct LocationUpdater { } bool hasOldExprAddr(BinaryLocation oldAddr) const { - return oldExprAddrMap.getStart(oldAddr) != nullptr; + return oldExprAddrMap.getStart(oldAddr); } BinaryLocation getNewExprEndAddr(BinaryLocation oldAddr) const { if (auto* expr = oldExprAddrMap.getEnd(oldAddr)) { auto iter = newLocations.expressions.find(expr); if (iter != newLocations.expressions.end()) { - BinaryLocation newAddr = iter->second.end; - return newAddr; + return iter->second.end; } } return 0; @@ -478,6 +500,25 @@ struct LocationUpdater { } return 0; } + + bool hasOldFuncAddr(BinaryLocation oldAddr) const { + return oldFuncAddrMap.get(oldAddr); + } + + BinaryLocation getNewExtraAddr(BinaryLocation oldAddr) const { + auto info = oldExprAddrMap.getExtra(oldAddr); + if (info.expr) { + auto iter = newLocations.delimiters.find(info.expr); + if (iter != newLocations.delimiters.end()) { + return iter->second[info.id]; + } + } + return 0; + } + + bool hasOldExtraAddr(BinaryLocation oldAddr) const { + return oldExprAddrMap.getExtra(oldAddr).expr; + } }; static void updateDebugLines(llvm::DWARFYAML::Data& data, @@ -511,11 +552,15 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data, BinaryLocation newAddr = 0; if (locationUpdater.hasOldExprAddr(oldAddr)) { newAddr = locationUpdater.getNewExprAddr(oldAddr); - } else { + } else if (locationUpdater.hasOldFuncAddr(oldAddr)) { newAddr = locationUpdater.getNewFuncAddr(oldAddr); + } else if (locationUpdater.hasOldExtraAddr(oldAddr)) { + newAddr = locationUpdater.getNewExtraAddr(oldAddr); } + // TODO: last 'end' of a function if (newAddr) { newAddrs.push_back(newAddr); + assert(newAddrInfo.count(newAddr) == 0); newAddrInfo.emplace(newAddr, state); auto& updatedState = newAddrInfo.at(newAddr); // The only difference is the address TODO other stuff? diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index a96dbb153..dee0caa82 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -33,10 +33,13 @@ void BinaryInstWriter::visitIf(If* curr) { o << binaryType(curr->type != Type::unreachable ? curr->type : Type::none); } -void BinaryInstWriter::emitIfElse() { +void BinaryInstWriter::emitIfElse(If* curr) { assert(!breakStack.empty()); breakStack.pop_back(); - breakStack.emplace_back(IMPOSSIBLE_CONTINUE); // TODO dito + breakStack.emplace_back(IMPOSSIBLE_CONTINUE); // TODO same as If + if (func && !sourceMap) { + parent.writeExtraDebugLocation(curr, func, BinaryLocations::Else); + } o << int8_t(BinaryConsts::Else); } @@ -1595,10 +1598,13 @@ void BinaryInstWriter::visitTry(Try* curr) { o << binaryType(curr->type != Type::unreachable ? curr->type : Type::none); } -void BinaryInstWriter::emitCatch() { +void BinaryInstWriter::emitCatch(Try* curr) { assert(!breakStack.empty()); breakStack.pop_back(); breakStack.emplace_back(IMPOSSIBLE_CONTINUE); + if (func && !sourceMap) { + parent.writeExtraDebugLocation(curr, func, BinaryLocations::Catch); + } o << int8_t(BinaryConsts::Catch); } @@ -1633,9 +1639,12 @@ void BinaryInstWriter::visitPop(Pop* curr) { // Turns into nothing in the binary format } -void BinaryInstWriter::emitScopeEnd() { +void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); + if (func && !sourceMap) { + parent.writeExtraDebugLocation(curr, func, BinaryLocations::End); + } o << int8_t(BinaryConsts::End); } @@ -1838,15 +1847,15 @@ void StackIRToBinaryWriter::write() { case StackInst::IfEnd: case StackInst::LoopEnd: case StackInst::TryEnd: { - writer.emitScopeEnd(); + writer.emitScopeEnd(inst->origin); break; } case StackInst::IfElse: { - writer.emitIfElse(); + writer.emitIfElse(inst->origin->cast<If>()); break; } case StackInst::Catch: { - writer.emitCatch(); + writer.emitCatch(inst->origin->cast<Try>()); break; } default: |