diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm-binary.h | 15 | ||||
-rw-r--r-- | src/wasm-stack.h | 16 | ||||
-rw-r--r-- | src/wasm.h | 34 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 69 | ||||
-rw-r--r-- | src/wasm/wasm-debug.cpp | 71 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 23 |
6 files changed, 196 insertions, 32 deletions
diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 5d601c6e9..5cfafb509 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1015,6 +1015,9 @@ public: void writeDebugLocation(const Function::DebugLocation& loc); void writeDebugLocation(Expression* curr, Function* func); void writeDebugLocationEnd(Expression* curr, Function* func); + void writeExtraDebugLocation(Expression* curr, + Function* func, + BinaryLocations::DelimiterId id); // helpers void writeInlineString(const char* name); @@ -1185,6 +1188,18 @@ public: std::vector<Expression*> expressionStack; + // Control flow structure parsing: these have not just the normal binary + // data for an instruction, but also some bytes later on like "end" or "else". + // We must be aware of the connection between those things, for debug info. + std::vector<Expression*> controlFlowStack; + + // Called when we parse the beginning of a control flow structure. + void startControlFlow(Expression* curr); + + // Called when we parse a later part of a control flow structure, like "end" + // or "else". + void continueControlFlow(BinaryLocations::DelimiterId id, BinaryLocation pos); + // set when we know code is unreachable in the sense of the wasm spec: we are // in a block and after an unreachable element. this helps parse stacky wasm // code, which can be unsuitable for our IR when unreachable. diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 7ca30f369..863bbbf51 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -144,10 +144,12 @@ public: void visitPush(Push* curr); void visitPop(Pop* curr); - void emitIfElse(); - void emitCatch(); - void emitScopeEnd(); // emit an end at the end of a block/loop/if/try - void emitFunctionEnd(); // emit an end at the end of a function + void emitIfElse(If* curr); + void emitCatch(Try* curr); + // emit an end at the end of a block/loop/if/try + void emitScopeEnd(Expression* curr); + // emit an end at the end of a function + void emitFunctionEnd(); void emitUnreachable(); void mapLocalsAndEmitHeader(); @@ -813,9 +815,9 @@ public: } writer.mapLocalsAndEmitHeader(); } - void emitIfElse(If* curr) { writer.emitIfElse(); } - void emitCatch(Try* curr) { writer.emitCatch(); } - void emitScopeEnd(Expression* curr) { writer.emitScopeEnd(); } + void emitIfElse(If* curr) { writer.emitIfElse(curr); } + void emitCatch(Try* curr) { writer.emitCatch(curr); } + void emitScopeEnd(Expression* curr) { writer.emitScopeEnd(curr); } void emitFunctionEnd() { if (func->epilogLocation.size()) { parent.writeDebugLocation(*func->epilogLocation.begin()); diff --git a/src/wasm.h b/src/wasm.h index 8b60885e3..f30a24d6b 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1172,7 +1172,39 @@ struct BinaryLocations { struct Span { BinaryLocation start = 0, end = 0; }; + + // Track the range of addresses an expressions appears at. This is the + // contiguous range that all instructions have - control flow instructions + // have additional opcodes later (like an end for a block or loop), see + // just after this. std::unordered_map<Expression*, Span> expressions; + + // Track the extra delimiter positions that some instructions, in particular + // control flow, have, like 'end' for loop and block. We keep these in a + // separate map because they are rare and we optimize for the storage space + // for the common type of instruction which just needs a Span. We implement + // this as a simple struct with two elements (as two extra elements is the + // maximum currently needed; due to 'catch' and 'end' for try-catch). The + // second value may be 0, indicating it is not used. + struct DelimiterLocations : public std::array<BinaryLocation, 2> { + DelimiterLocations() { + // Ensure zero-initialization. + for (auto& item : *this) { + item = 0; + } + } + }; + + enum DelimiterId { + // All control flow structures have an end, so use index 0 for that. + End = 0, + // Use index 1 for all other current things. + Else = 1, + Catch = 1, + Invalid = -1 + }; + std::unordered_map<Expression*, DelimiterLocations> delimiters; + std::unordered_map<Function*, Span> functions; }; @@ -1231,6 +1263,8 @@ public: // General debugging info support: track instructions and the function itself. std::unordered_map<Expression*, BinaryLocations::Span> expressionLocations; + std::unordered_map<Expression*, BinaryLocations::DelimiterLocations> + delimiterLocations; BinaryLocations::Span funcLocation; size_t getNumParams(); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 9c7eb55f2..f834666b4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -167,6 +167,11 @@ void WasmBinaryWriter::finishSection(int32_t start) { pair.second.start -= totalAdjustment; pair.second.end -= totalAdjustment; } + for (auto& pair : binaryLocations.delimiters) { + for (auto& item : pair.second) { + item -= totalAdjustment; + } + } } } @@ -343,6 +348,12 @@ void WasmBinaryWriter::writeFunctions() { auto& span = binaryLocations.expressions[curr]; span.start -= adjustmentForLEBShrinking; span.end -= adjustmentForLEBShrinking; + auto iter = binaryLocations.delimiters.find(curr); + if (iter != binaryLocations.delimiters.end()) { + for (auto& item : iter->second) { + item -= adjustmentForLEBShrinking; + } + } } } if (!binaryLocationTrackedExpressionsForFunc.empty()) { @@ -726,6 +737,13 @@ void WasmBinaryWriter::writeDebugLocationEnd(Expression* curr, Function* func) { } } +void WasmBinaryWriter::writeExtraDebugLocation( + Expression* curr, Function* func, BinaryLocations::DelimiterId id) { + if (func && !func->expressionLocations.empty()) { + binaryLocations.delimiters[curr][id] = o.size(); + } +} + void WasmBinaryWriter::writeInlineString(const char* name) { int32_t size = strlen(name); o << U32LEB(size); @@ -1411,6 +1429,7 @@ void WasmBinaryBuilder::readFunctions() { assert(breakTargetNames.size() == 0); assert(breakStack.empty()); assert(expressionStack.empty()); + assert(controlFlowStack.empty()); assert(depth == 0); func->body = getBlockOrSingleton(func->sig.results); assert(depth == 0); @@ -1419,6 +1438,7 @@ void WasmBinaryBuilder::readFunctions() { if (!expressionStack.empty()) { throwError("stack not empty on function exit"); } + assert(controlFlowStack.empty()); if (pos != endOfFunction) { throwError("binary offset at function exit not at expected location"); } @@ -1675,11 +1695,11 @@ void WasmBinaryBuilder::processExpressions() { } expressionStack.push_back(curr); if (curr->type == Type::unreachable) { - // once we see something unreachable, we don't want to add anything else + // Once we see something unreachable, we don't want to add anything else // to the stack, as it could be stacky code that is non-representable in - // our AST. but we do need to skip it - // if there is nothing else here, just stop. otherwise, go into - // unreachable mode. peek to see what to do + // our AST. but we do need to skip it. + // If there is nothing else here, just stop. Otherwise, go into + // unreachable mode. peek to see what to do. if (pos == endOfFunction) { throwError("Reached function end without seeing End opcode"); } @@ -2172,9 +2192,16 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { visitDrop((curr = allocator.alloc<Drop>())->cast<Drop>()); break; case BinaryConsts::End: + curr = nullptr; + continueControlFlow(BinaryLocations::End, startPos); + break; case BinaryConsts::Else: + curr = nullptr; + continueControlFlow(BinaryLocations::Else, startPos); + break; case BinaryConsts::Catch: curr = nullptr; + continueControlFlow(BinaryLocations::Catch, startPos); break; case BinaryConsts::RefNull: visitRefNull((curr = allocator.alloc<RefNull>())->cast<RefNull>()); @@ -2317,6 +2344,34 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { return BinaryConsts::ASTNodes(code); } +void WasmBinaryBuilder::startControlFlow(Expression* curr) { + if (DWARF && currFunction) { + controlFlowStack.push_back(curr); + } +} + +void WasmBinaryBuilder::continueControlFlow(BinaryLocations::DelimiterId id, + BinaryLocation pos) { + if (DWARF && currFunction) { + if (controlFlowStack.empty()) { + // We reached the end of the function, which is also marked with an + // "end", like a control flow structure. + assert(id == BinaryLocations::End); + assert(pos + 1 == endOfFunction); + return; + } + assert(!controlFlowStack.empty()); + auto currControlFlow = controlFlowStack.back(); + // We are called after parsing the byte, so we need to subtract one to + // get its position. + currFunction->delimiterLocations[currControlFlow][id] = + pos - codeSectionLocation; + if (id == BinaryLocations::End) { + controlFlowStack.pop_back(); + } + } +} + void WasmBinaryBuilder::pushBlockElements(Block* curr, size_t start, size_t end) { @@ -2361,7 +2416,7 @@ void WasmBinaryBuilder::pushBlockElements(Block* curr, void WasmBinaryBuilder::visitBlock(Block* curr) { BYN_TRACE("zz node: Block\n"); - + startControlFlow(curr); // special-case Block and de-recurse nested blocks in their first position, as // that is a common pattern that can be very highly nested. std::vector<Block*> stack; @@ -2374,6 +2429,7 @@ void WasmBinaryBuilder::visitBlock(Block* curr) { // a recursion readNextDebugLocation(); curr = allocator.alloc<Block>(); + startControlFlow(curr); pos++; if (debugLocation.size()) { currFunction->debugLocations[curr] = *debugLocation.begin(); @@ -2449,6 +2505,7 @@ Expression* WasmBinaryBuilder::getBlockOrSingleton(Type type, void WasmBinaryBuilder::visitIf(If* curr) { BYN_TRACE("zz node: If\n"); + startControlFlow(curr); curr->type = getType(); curr->condition = popNonVoidExpression(); curr->ifTrue = getBlockOrSingleton(curr->type); @@ -2463,6 +2520,7 @@ void WasmBinaryBuilder::visitIf(If* curr) { void WasmBinaryBuilder::visitLoop(Loop* curr) { BYN_TRACE("zz node: Loop\n"); + startControlFlow(curr); curr->type = getType(); curr->name = getNextLabel(); breakStack.push_back({curr->name, 0}); @@ -4469,6 +4527,7 @@ void WasmBinaryBuilder::visitRefFunc(RefFunc* curr) { void WasmBinaryBuilder::visitTry(Try* curr) { BYN_TRACE("zz node: Try\n"); + startControlFlow(curr); // For simplicity of implementation, like if scopes, we create a hidden block // within each try-body and catch-body, and let branches target those inner // blocks instead. diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index a8b70247f..aa596aa61 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -337,21 +337,28 @@ struct AddrExprMap { std::unordered_map<BinaryLocation, Expression*> startMap; std::unordered_map<BinaryLocation, Expression*> endMap; + // Some instructions have extra binary locations, like the else and end in + // and if. Track those separately, including their expression and their id + // ("else", "end", etc.), as they are rare, and we don't want to + // bloat the common case which is represented in the earlier maps. + struct ExtraInfo { + Expression* expr; + BinaryLocations::DelimiterId id; + }; + std::unordered_map<BinaryLocation, ExtraInfo> extraMap; + // Construct the map from the binaryLocations loaded from the wasm. AddrExprMap(const Module& wasm) { for (auto& func : wasm.functions) { for (auto pair : func->expressionLocations) { add(pair.first, pair.second); } + for (auto pair : func->delimiterLocations) { + add(pair.first, pair.second); + } } } - // Construct the map from new binaryLocations just written - AddrExprMap(const BinaryLocations& newLocations) { - for (auto pair : newLocations.expressions) { - add(pair.first, pair.second); - } - } Expression* getStart(BinaryLocation addr) const { auto iter = startMap.find(addr); @@ -369,13 +376,30 @@ struct AddrExprMap { return nullptr; } + ExtraInfo getExtra(BinaryLocation addr) const { + auto iter = extraMap.find(addr); + if (iter != extraMap.end()) { + return iter->second; + } + return ExtraInfo{nullptr, BinaryLocations::Invalid}; + } + private: - void add(Expression* expr, BinaryLocations::Span span) { + void add(Expression* expr, const BinaryLocations::Span span) { assert(startMap.count(span.start) == 0); startMap[span.start] = expr; assert(endMap.count(span.end) == 0); endMap[span.end] = expr; } + + void add(Expression* expr, const BinaryLocations::DelimiterLocations& extra) { + for (Index i = 0; i < extra.size(); i++) { + if (extra[i] != 0) { + assert(extraMap.count(extra[i]) == 0); + extraMap[extra[i]] = ExtraInfo{expr, BinaryLocations::DelimiterId(i)}; + } + } + } }; // Represents a mapping of addresses to expressions. Note that we use a single @@ -423,7 +447,6 @@ struct LocationUpdater { const BinaryLocations& newLocations; AddrExprMap oldExprAddrMap; - AddrExprMap newExprAddrMap; FuncAddrMap oldFuncAddrMap; // TODO: for memory efficiency, we may want to do this in a streaming manner, @@ -431,7 +454,7 @@ struct LocationUpdater { LocationUpdater(Module& wasm, const BinaryLocations& newLocations) : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm), - newExprAddrMap(newLocations), oldFuncAddrMap(wasm) {} + oldFuncAddrMap(wasm) {} // Updates an expression's address. If there was never an instruction at that // address, or if there was but if that instruction no longer exists, return @@ -448,15 +471,14 @@ struct LocationUpdater { } bool hasOldExprAddr(BinaryLocation oldAddr) const { - return oldExprAddrMap.getStart(oldAddr) != nullptr; + return oldExprAddrMap.getStart(oldAddr); } BinaryLocation getNewExprEndAddr(BinaryLocation oldAddr) const { if (auto* expr = oldExprAddrMap.getEnd(oldAddr)) { auto iter = newLocations.expressions.find(expr); if (iter != newLocations.expressions.end()) { - BinaryLocation newAddr = iter->second.end; - return newAddr; + return iter->second.end; } } return 0; @@ -478,6 +500,25 @@ struct LocationUpdater { } return 0; } + + bool hasOldFuncAddr(BinaryLocation oldAddr) const { + return oldFuncAddrMap.get(oldAddr); + } + + BinaryLocation getNewExtraAddr(BinaryLocation oldAddr) const { + auto info = oldExprAddrMap.getExtra(oldAddr); + if (info.expr) { + auto iter = newLocations.delimiters.find(info.expr); + if (iter != newLocations.delimiters.end()) { + return iter->second[info.id]; + } + } + return 0; + } + + bool hasOldExtraAddr(BinaryLocation oldAddr) const { + return oldExprAddrMap.getExtra(oldAddr).expr; + } }; static void updateDebugLines(llvm::DWARFYAML::Data& data, @@ -511,11 +552,15 @@ static void updateDebugLines(llvm::DWARFYAML::Data& data, BinaryLocation newAddr = 0; if (locationUpdater.hasOldExprAddr(oldAddr)) { newAddr = locationUpdater.getNewExprAddr(oldAddr); - } else { + } else if (locationUpdater.hasOldFuncAddr(oldAddr)) { newAddr = locationUpdater.getNewFuncAddr(oldAddr); + } else if (locationUpdater.hasOldExtraAddr(oldAddr)) { + newAddr = locationUpdater.getNewExtraAddr(oldAddr); } + // TODO: last 'end' of a function if (newAddr) { newAddrs.push_back(newAddr); + assert(newAddrInfo.count(newAddr) == 0); newAddrInfo.emplace(newAddr, state); auto& updatedState = newAddrInfo.at(newAddr); // The only difference is the address TODO other stuff? diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index a96dbb153..dee0caa82 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -33,10 +33,13 @@ void BinaryInstWriter::visitIf(If* curr) { o << binaryType(curr->type != Type::unreachable ? curr->type : Type::none); } -void BinaryInstWriter::emitIfElse() { +void BinaryInstWriter::emitIfElse(If* curr) { assert(!breakStack.empty()); breakStack.pop_back(); - breakStack.emplace_back(IMPOSSIBLE_CONTINUE); // TODO dito + breakStack.emplace_back(IMPOSSIBLE_CONTINUE); // TODO same as If + if (func && !sourceMap) { + parent.writeExtraDebugLocation(curr, func, BinaryLocations::Else); + } o << int8_t(BinaryConsts::Else); } @@ -1595,10 +1598,13 @@ void BinaryInstWriter::visitTry(Try* curr) { o << binaryType(curr->type != Type::unreachable ? curr->type : Type::none); } -void BinaryInstWriter::emitCatch() { +void BinaryInstWriter::emitCatch(Try* curr) { assert(!breakStack.empty()); breakStack.pop_back(); breakStack.emplace_back(IMPOSSIBLE_CONTINUE); + if (func && !sourceMap) { + parent.writeExtraDebugLocation(curr, func, BinaryLocations::Catch); + } o << int8_t(BinaryConsts::Catch); } @@ -1633,9 +1639,12 @@ void BinaryInstWriter::visitPop(Pop* curr) { // Turns into nothing in the binary format } -void BinaryInstWriter::emitScopeEnd() { +void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); + if (func && !sourceMap) { + parent.writeExtraDebugLocation(curr, func, BinaryLocations::End); + } o << int8_t(BinaryConsts::End); } @@ -1838,15 +1847,15 @@ void StackIRToBinaryWriter::write() { case StackInst::IfEnd: case StackInst::LoopEnd: case StackInst::TryEnd: { - writer.emitScopeEnd(); + writer.emitScopeEnd(inst->origin); break; } case StackInst::IfElse: { - writer.emitIfElse(); + writer.emitIfElse(inst->origin->cast<If>()); break; } case StackInst::Catch: { - writer.emitCatch(); + writer.emitCatch(inst->origin->cast<Try>()); break; } default: |