From f826df6e053e0541e16cc19ded8083cf8de7c59d Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Fri, 12 Feb 2021 14:41:57 +0900 Subject: [EH] Support reading/writing of delegate (#3561) This adds support for reading/writing of the new `delegate` instruction in the folded wast format, the stack IR format, the poppy IR format, and the binary format in Binaryen. We don't have a formal spec written down yet, but please refer to WebAssembly/exception-handling#137 and WebAssembly/exception-handling#146 for the informal semantics. In the current version of spec `delegate` is basically a rethrow, but with branch-like immediate argument so that it can bypass other catches/delegates in between. `delegate` is not represented as a new `Expression`, but it is rather an option within a `Try` class, like `catch`/`catch_all`. One special thing about `delegate` is, even though it is written _within_ a `try` in the folded wat format, like ```wasm (try (do ... ) (delegate $l) ) ``` In the unfolded wat format or in the binary format, `delegate` serves as a scope end instruction so there is no separate `end`: ```wasm try ... delegate $l ``` `delegate` semantically targets an outer `catch` or `delegate`, but we write `delegate` target as a `try` label because we only give labels to block-like scoping expressions. So far we have not given `Try` a label and used inner blocks or a wrapping block in case a branch targets the `try`. But in case of `delegate`, it can syntactically only target `try` and if it targets blocks or loops it is a validation failure. So after discussions in #3497, we give `Try` a label but this label can only be targeted by `delegate`s. Unfortunately this makes parsing and writing of `Try` expression somewhat complicated. Also there is one special case; if the immediate argument of `try` is the same as the depth of control flow stack, this means the 'delegate' delegates to the caller. To handle this case this adds a fake label `DELEGATE_CALLER_TARGET`, and when writing it back to the wast format writes it as an immediate value, unlike other cases in which we write labels. This uses `DELEGATE_FIELD_SCOPE_NAME_DEF/USE` to represent `try`'s label and `delegate`'s target. There are many cases that `try` and `delegate`'s labels need to be treated in the same way as block and branch labels, such as for hashing or comparing. But there are routines in which we automatically assume all label uses are branches. I thought about adding a new kind of defines such as `DELEGATE_FIELD_TRY_NAME_DEF/USE`, but I think it will also involve some duplication of existing routines or classes. So at the moment this PR chooses to use the existing `DELEGATE_FIELD_SCOPE_NAME_DEF/USE` for `try` and `delegate` labels and makes only necessary amount of changes in branch-utils. We can revisit this decision later if necessary. Many of changes to the existing test cases are because now all `try`s are automatically assigned a label. They will be removed in `RemoveUnusedNames` pass in the same way as block labels if not targeted by any delegates. This only supports reading and writing and has not been tested against any optimization passes yet. --- Original unfolded wat file to generate test/try-delegate.wasm: ```wasm (module (event $e) (func try try delegate 0 catch $e end) (func try try catch $e i32.const 0 drop try delegate 1 end catch $e end ) ) ``` --- src/wasm/wasm-binary.cpp | 173 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 119 insertions(+), 54 deletions(-) (limited to 'src/wasm/wasm-binary.cpp') diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index f3ec376d1..878293caa 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1918,7 +1918,9 @@ void WasmBinaryBuilder::readFunctions() { debugLocation.clear(); willBeIgnored = false; // process body - assert(breakTargetNames.size() == 0); + assert(breakStack.empty()); + assert(breakTargetNames.empty()); + assert(delegateTargetNames.empty()); assert(breakStack.empty()); assert(expressionStack.empty()); assert(controlFlowStack.empty()); @@ -1926,8 +1928,9 @@ void WasmBinaryBuilder::readFunctions() { assert(depth == 0); func->body = getBlockOrSingleton(func->sig.results); assert(depth == 0); - assert(breakStack.size() == 0); - assert(breakTargetNames.size() == 0); + assert(breakStack.empty()); + assert(breakTargetNames.empty()); + assert(delegateTargetNames.empty()); if (!expressionStack.empty()) { throwError("stack not empty on function exit"); } @@ -2210,7 +2213,8 @@ void WasmBinaryBuilder::processExpressions() { } auto peek = input[pos]; if (peek == BinaryConsts::End || peek == BinaryConsts::Else || - peek == BinaryConsts::Catch || peek == BinaryConsts::CatchAll) { + peek == BinaryConsts::Catch || peek == BinaryConsts::CatchAll || + peek == BinaryConsts::Delegate) { BYN_TRACE("== processExpressions finished with unreachable" << std::endl); lastSeparator = BinaryConsts::ASTNodes(peek); @@ -2987,6 +2991,14 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { } break; } + case BinaryConsts::Delegate: { + curr = nullptr; + if (DWARF && currFunction) { + assert(!controlFlowStack.empty()); + controlFlowStack.pop_back(); + } + break; + } case BinaryConsts::RefNull: visitRefNull((curr = allocator.alloc())->cast()); break; @@ -3376,7 +3388,8 @@ Expression* WasmBinaryBuilder::getBlockOrSingleton(Type type) { block->name = label; block->finalize(type); // maybe we don't need a block here? - if (breakTargetNames.find(block->name) == breakTargetNames.end()) { + if (breakTargetNames.find(block->name) == breakTargetNames.end() && + delegateTargetNames.find(block->name) == delegateTargetNames.end()) { block->name = Name(); if (block->list.size() == 1) { return block->list[0]; @@ -3452,6 +3465,28 @@ WasmBinaryBuilder::getBreakTarget(int32_t offset) { return ret; } +Name WasmBinaryBuilder::getDelegateTargetName(int32_t offset) { + BYN_TRACE("getDelegateTarget " << offset << std::endl); + // We always start parsing a function by creating a block label and pushing it + // in breakStack in getBlockOrSingleton, so if a 'delegate''s target is that + // block, it does not mean it targets that block; it throws to the caller. + if (breakStack.size() - 1 == size_t(offset)) { + return DELEGATE_CALLER_TARGET; + } + size_t index = breakStack.size() - 1 - offset; + if (index > breakStack.size()) { + throwError("bad delegate index (high)"); + } + BYN_TRACE("delegate target " << breakStack[index].name << std::endl); + auto& ret = breakStack[index]; + // if the delegate is in literally unreachable code, then we will not emit it + // anyhow, so do not note that the target has delegate to it + if (!willBeIgnored) { + delegateTargetNames.insert(ret.name); + } + return ret.name; +} + void WasmBinaryBuilder::visitBreak(Break* curr, uint8_t code) { BYN_TRACE("zz node: Break, code " << int32_t(code) << std::endl); BreakTarget target = getBreakTarget(getU32LEB()); @@ -5746,58 +5781,13 @@ void WasmBinaryBuilder::visitTryOrTryInBlock(Expression*& out) { curr->type = getType(); curr->body = getBlockOrSingleton(curr->type); if (lastSeparator != BinaryConsts::Catch && - lastSeparator != BinaryConsts::CatchAll) { + lastSeparator != BinaryConsts::CatchAll && + lastSeparator != BinaryConsts::Delegate) { throwError("No catch instruction within a try scope"); } - // For simplicity, we create an inner block within the catch body too, but the - // one within the 'catch' *must* be omitted when we write out the binary back - // later, because the 'catch' instruction pushes a value onto the stack and - // the inner block does not support block input parameters without multivalue - // support. - // try - // ... - // catch ;; Pushes a value onto the stack - // block ;; Inner block. Should be deleted when writing binary! - // use the pushed value - // end - // end - // - // But when input binary code is like - // try - // ... - // catch - // br 0 - // end - // - // 'br 0' accidentally happens to target the inner block, creating code like - // this in Binaryen IR, making the inner block not deletable, resulting in a - // validation error: - // (try - // ... - // (catch - // (block $label0 ;; Cannot be deleted, because there's a branch to this - // ... - // (br $label0) - // ) - // ) - // ) - // - // When this happens, we fix this by creating a block that wraps the whole - // try-catch, and making the branches target that block instead, like this: - // (block $label ;; New enclosing block, new target for the branch - // (try - // ... - // (catch - // (block ;; Now this can be deleted when writing binary - // ... - // (br $label0) - // ) - // ) - // ) - // ) - Builder builder(wasm); + // A nameless label shared by all catch body blocks Name catchLabel = getNextLabel(); breakStack.push_back({catchLabel, curr->type}); @@ -5839,8 +5829,84 @@ void WasmBinaryBuilder::visitTryOrTryInBlock(Expression*& out) { readCatchBody(Type::none); } } + breakStack.pop_back(); + + if (lastSeparator == BinaryConsts::Delegate) { + curr->delegateTarget = getDelegateTargetName(getU32LEB()); + } + + // For simplicity, we make try's labels only can be targeted by delegates, and + // delegates can only target try's labels. (If they target blocks or loops, it + // is a validation failure.) Because we create an inner block within each try + // and catch body, if any delegate targets those inner blocks, we should make + // them target the try's label instead. + curr->name = getNextLabel(); + if (auto* block = curr->body->dynCast()) { + if (block->name.is()) { + if (delegateTargetNames.find(block->name) != delegateTargetNames.end()) { + BranchUtils::replaceDelegateTargets(block, block->name, curr->name); + delegateTargetNames.erase(block->name); + } + // maybe we don't need a block here? + if (block->list.size() == 1) { + curr->body = block->list[0]; + } + } + } + if (delegateTargetNames.find(catchLabel) != delegateTargetNames.end()) { + for (auto* catchBody : curr->catchBodies) { + BranchUtils::replaceDelegateTargets(catchBody, catchLabel, curr->name); + } + delegateTargetNames.erase(catchLabel); + } curr->finalize(curr->type); + // For simplicity, we create an inner block within the catch body too, but the + // one within the 'catch' *must* be omitted when we write out the binary back + // later, because the 'catch' instruction pushes a value onto the stack and + // the inner block does not support block input parameters without multivalue + // support. + // try + // ... + // catch $e ;; Pushes value(s) onto the stack + // block ;; Inner block. Should be deleted when writing binary! + // use the pushed value + // end + // end + // + // But when input binary code is like + // try + // ... + // catch $e + // br 0 + // end + // + // 'br 0' accidentally happens to target the inner block, creating code like + // this in Binaryen IR, making the inner block not deletable, resulting in a + // validation error: + // (try + // ... + // (catch $e + // (block $label0 ;; Cannot be deleted, because there's a branch to this + // ... + // (br $label0) + // ) + // ) + // ) + // + // When this happens, we fix this by creating a block that wraps the whole + // try-catch, and making the branches target that block instead, like this: + // (block $label ;; New enclosing block, new target for the branch + // (try + // ... + // (catch $e + // (block ;; Now this can be deleted when writing binary + // ... + // (br $label0) + // ) + // ) + // ) + // ) if (breakTargetNames.find(catchLabel) == breakTargetNames.end()) { out = curr; } else { @@ -5848,7 +5914,6 @@ void WasmBinaryBuilder::visitTryOrTryInBlock(Expression*& out) { auto* block = builder.makeBlock(catchLabel, curr); out = block; } - breakStack.pop_back(); breakTargetNames.erase(catchLabel); } -- cgit v1.2.3