diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2020-09-10 17:49:56 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-10 17:49:56 -0700 |
commit | cd6f0d908f0e4c68d72fd476a6e0e7cfb7ae8595 (patch) | |
tree | f5fdb821fffa3dc54c49daa753ae33686553f3dc /src/wasm-stack.h | |
parent | 739144d96d162b430d5fd54e4fe11b8ce2d34d47 (diff) | |
download | binaryen-cd6f0d908f0e4c68d72fd476a6e0e7cfb7ae8595.tar.gz binaryen-cd6f0d908f0e4c68d72fd476a6e0e7cfb7ae8595.tar.bz2 binaryen-cd6f0d908f0e4c68d72fd476a6e0e7cfb7ae8595.zip |
Simplify BinaryenIRWriter (#3110)
BinaryenIRWriter was previously inconsistent about whether or not it
emitted an instruction if that instruction was not reachable.
Instructions that produced values were not emitted if they were
unreachable, but instructions that did not produce values were always
emitted. Additionally, blocks continued to emit their children even
after emitting an unreachable child.
Since it was not possible to tell whether an unreachable instruction's
parent would be emitted, BinaryenIRWriter had to be very defensive and
emit many extra `unreachable` instructions around unreachable code to
avoid type errors.
This PR unifies the logic for emitting all non-control flow
instructions and changes the behavior of BinaryenIRWriter so that it
never emits instructions that cannot be reached due to having
unreachable children. This means that extra `unreachable` instructions
now only need to be emitted after unreachable control flow
constructs. BinaryenIRWriter now also stops emitting instructions
inside blocks after the first unreachable instruction as an extra
optimization.
This change will also simplify Poppy IR stackification (see #3059) by
guaranteeing that instructions with unreachable children will not be
emitted into the stackifier. This makes satisfying the Poppy IR rule
against unreachable Pops trivial, whereas previously satisfying this
rule would have required about about 700 additional lines of code to
recompute the types of all unreachable children for any instruction.
Diffstat (limited to 'src/wasm-stack.h')
-rw-r--r-- | src/wasm-stack.h | 554 |
1 files changed, 57 insertions, 497 deletions
diff --git a/src/wasm-stack.h b/src/wasm-stack.h index d80618225..ed9245d4f 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -18,6 +18,7 @@ #define wasm_stack_h #include "ir/branch-utils.h" +#include "ir/properties.h" #include "pass.h" #include "wasm-binary.h" #include "wasm-traversal.h" @@ -182,7 +183,7 @@ private: // Takes binaryen IR and converts it to something else (binary or stack IR) template<typename SubType> -class BinaryenIRWriter : public OverriddenVisitor<BinaryenIRWriter<SubType>> { +class BinaryenIRWriter : public Visitor<BinaryenIRWriter<SubType>> { public: BinaryenIRWriter(Function* func) : func(func) {} @@ -194,50 +195,7 @@ public: void visitBlock(Block* curr); void visitIf(If* curr); void visitLoop(Loop* curr); - void visitBreak(Break* curr); - void visitSwitch(Switch* curr); - void visitCall(Call* curr); - void visitCallIndirect(CallIndirect* curr); - void visitLocalGet(LocalGet* curr); - void visitLocalSet(LocalSet* curr); - void visitGlobalGet(GlobalGet* curr); - void visitGlobalSet(GlobalSet* curr); - void visitLoad(Load* curr); - void visitStore(Store* curr); - void visitAtomicRMW(AtomicRMW* curr); - void visitAtomicCmpxchg(AtomicCmpxchg* curr); - void visitAtomicWait(AtomicWait* curr); - void visitAtomicNotify(AtomicNotify* curr); - void visitAtomicFence(AtomicFence* curr); - void visitSIMDExtract(SIMDExtract* curr); - void visitSIMDReplace(SIMDReplace* curr); - void visitSIMDShuffle(SIMDShuffle* curr); - void visitSIMDTernary(SIMDTernary* curr); - void visitSIMDShift(SIMDShift* curr); - void visitSIMDLoad(SIMDLoad* curr); - void visitMemoryInit(MemoryInit* curr); - void visitDataDrop(DataDrop* curr); - void visitMemoryCopy(MemoryCopy* curr); - void visitMemoryFill(MemoryFill* curr); - void visitConst(Const* curr); - void visitUnary(Unary* curr); - void visitBinary(Binary* curr); - void visitSelect(Select* curr); - void visitReturn(Return* curr); - void visitHost(Host* curr); - void visitRefNull(RefNull* curr); - void visitRefIsNull(RefIsNull* curr); - void visitRefFunc(RefFunc* curr); void visitTry(Try* curr); - void visitThrow(Throw* curr); - void visitRethrow(Rethrow* curr); - void visitBrOnExn(BrOnExn* curr); - void visitNop(Nop* curr); - void visitUnreachable(Unreachable* curr); - void visitDrop(Drop* curr); - void visitPop(Pop* curr); - void visitTupleMake(TupleMake* curr); - void visitTupleExtract(TupleExtract* curr); protected: Function* func = nullptr; @@ -275,19 +233,44 @@ void BinaryenIRWriter<SubType>::visitPossibleBlockContents(Expression* curr) { } for (auto* child : block->list) { visit(child); - } - if (block->type == Type::unreachable && - block->list.back()->type != Type::unreachable) { - // similar to in visitBlock, here we could skip emitting the block itself, - // but must still end the 'block' (the contents, really) with an unreachable - emitUnreachable(); + // Since this child was unreachable, either this child or one of its + // descendants was a source of unreachability that was actually + // emitted. Subsequent children won't be reachable, so skip them. + if (child->type == Type::unreachable) { + break; + } } } template<typename SubType> void BinaryenIRWriter<SubType>::visit(Expression* curr) { emitDebugLocation(curr); - OverriddenVisitor<BinaryenIRWriter>::visit(curr); + // We emit unreachable instructions that create unreachability, but not + // unreachable instructions that just inherit unreachability from their + // children, since the latter won't be reached. This (together with logic in + // the control flow visitors) also ensures that the final instruction in each + // unreachable block is a source of unreachability, which means we don't need + // to emit an extra `unreachable` before the end of the block to prevent type + // errors. + bool hasUnreachableChild = false; + for (auto* child : ValueChildIterator(curr)) { + visit(child); + if (child->type == Type::unreachable) { + hasUnreachableChild = true; + break; + } + } + if (hasUnreachableChild) { + // `curr` is not reachable, so don't emit it. + return; + } + // Control flow requires special handling, but most instructions can be + // emitted directly after their children. + if (Properties::isControlFlowStructure(curr)) { + Visitor<BinaryenIRWriter>::visit(curr); + } else { + emit(curr); + } } template<typename SubType> @@ -295,22 +278,26 @@ void BinaryenIRWriter<SubType>::visitBlock(Block* curr) { auto visitChildren = [this](Block* curr, Index from) { auto& list = curr->list; while (from < list.size()) { - visit(list[from++]); + auto* child = list[from]; + visit(child); + if (child->type == Type::unreachable) { + break; + } + ++from; } }; auto afterChildren = [this](Block* curr) { - if (curr->type == Type::unreachable) { - // an unreachable block is one that cannot be exited. We cannot encode - // this directly in wasm, where blocks must be none,i32,i64,f32,f64. Since - // the block cannot be exited, we can emit an unreachable at the end, and - // that will always be valid, and then the block is ok as a none - emitUnreachable(); - } emitScopeEnd(curr); if (curr->type == Type::unreachable) { - // and emit an unreachable *outside* the block too, so later things can - // pop anything + // Since this block is unreachable, no instructions will be emitted after + // it in its enclosing scope. That means that this block will be the last + // instruction before the end of its parent scope, so its type must match + // the type of its parent. But we don't have a concrete type for this + // block and we don't know what type its parent expects, so we can't + // ensure the types match. To work around this, we insert an `unreachable` + // instruction after every unreachable control flow structure and depend + // on its polymorphic behavior to paper over any type mismatches. emitUnreachable(); } }; @@ -331,12 +318,16 @@ void BinaryenIRWriter<SubType>::visitBlock(Block* curr) { emit(curr); visitChildren(curr, 0); afterChildren(curr); + bool childUnreachable = curr->type == Type::unreachable; // Finish the later parts of all the parent blocks. while (!parents.empty()) { auto* parent = parents.back(); parents.pop_back(); - visitChildren(parent, 1); + if (!childUnreachable) { + visitChildren(parent, 1); + } afterChildren(parent); + childUnreachable = parent->type == Type::unreachable; } return; } @@ -347,13 +338,6 @@ void BinaryenIRWriter<SubType>::visitBlock(Block* curr) { } template<typename SubType> void BinaryenIRWriter<SubType>::visitIf(If* curr) { - visit(curr->condition); - if (curr->condition->type == Type::unreachable) { - // this if-else is unreachable because of the condition, i.e., the condition - // does not exit. So don't emit the if (but do consume the condition) - emitUnreachable(); - return; - } emit(curr); visitPossibleBlockContents(curr->ifTrue); @@ -364,11 +348,10 @@ template<typename SubType> void BinaryenIRWriter<SubType>::visitIf(If* curr) { emitScopeEnd(curr); if (curr->type == Type::unreachable) { - // we already handled the case of the condition being unreachable. - // otherwise, we may still be unreachable, if we are an if-else with both - // sides unreachable. wasm does not allow this to be emitted directly, so we - // must do something more. we could do better, but for now we emit an extra - // unreachable instruction after the if, so it is not consumed itself, + // We already handled the case of the condition being unreachable in + // `visit`. Otherwise, we may still be unreachable, if we are an if-else + // with both sides unreachable. Just like with blocks, we emit an extra + // `unreachable` to work around potential type mismatches. assert(curr->ifFalse); emitUnreachable(); } @@ -378,11 +361,6 @@ template<typename SubType> void BinaryenIRWriter<SubType>::visitLoop(Loop* curr) { emit(curr); visitPossibleBlockContents(curr->body); - if (curr->type == Type::unreachable) { - // we emitted a loop without a return type, and the body might be block - // contents, so ensure it is not consumed - emitUnreachable(); - } emitScopeEnd(curr); if (curr->type == Type::unreachable) { // we emitted a loop without a return type, so it must not be consumed @@ -390,361 +368,6 @@ void BinaryenIRWriter<SubType>::visitLoop(Loop* curr) { } } -template<typename SubType> -void BinaryenIRWriter<SubType>::visitBreak(Break* curr) { - if (curr->value) { - visit(curr->value); - } - if (curr->condition) { - visit(curr->condition); - } - emit(curr); - if (curr->condition && curr->type == Type::unreachable) { - // a br_if is normally none or emits a value. if it is unreachable, then - // either the condition or the value is unreachable, which is extremely - // rare, and may require us to make the stack polymorphic (if the block we - // branch to has a value, we may lack one as we are not a reachable branch; - // the wasm spec on the other hand does presume the br_if emits a value of - // the right type, even if it popped unreachable) - emitUnreachable(); - } -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSwitch(Switch* curr) { - if (curr->value) { - visit(curr->value); - } - visit(curr->condition); - if (!BranchUtils::isBranchReachable(curr)) { - // if the branch is not reachable, then it's dangerous to emit it, as wasm - // type checking rules are different, especially in unreachable code. so - // just don't emit that unreachable code. - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitCall(Call* curr) { - for (auto* operand : curr->operands) { - visit(operand); - } - - // For non-control-flow value-returning instructions, if the type of an - // expression is unreachable, we emit an unreachable and don't emit the - // instruction itself. If we don't emit an unreachable, instructions that - // follow can have a validation failure in wasm binary format. For example: - // [unreachable] (f32.add - // [unreachable] (i32.eqz - // [unreachable] (unreachable) - // ) - // ... - // ) - // This is a valid prgram in binaryen IR, because the unreachable type - // propagates out of an expression, making both i32.eqz and f32.add - // unreachable. But in binary format, this becomes: - // unreachable - // i32.eqz - // f32.add ;; validation failure; it takes an i32! - // And here f32.add causes validation failure in wasm validation. So in this - // case we add an unreachable to prevent following instructions to consume - // the current value (here i32.eqz). - // - // The same applies for other expressions. - if (curr->type == Type::unreachable && !curr->isReturn) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitCallIndirect(CallIndirect* curr) { - for (auto* operand : curr->operands) { - visit(operand); - } - visit(curr->target); - if (curr->type == Type::unreachable && !curr->isReturn) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitLocalGet(LocalGet* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitLocalSet(LocalSet* curr) { - visit(curr->value); - if (curr->isTee() && curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitGlobalGet(GlobalGet* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitGlobalSet(GlobalSet* curr) { - visit(curr->value); - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitLoad(Load* curr) { - visit(curr->ptr); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitStore(Store* curr) { - visit(curr->ptr); - visit(curr->value); - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitAtomicRMW(AtomicRMW* curr) { - visit(curr->ptr); - visit(curr->value); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitAtomicCmpxchg(AtomicCmpxchg* curr) { - visit(curr->ptr); - visit(curr->expected); - visit(curr->replacement); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitAtomicWait(AtomicWait* curr) { - visit(curr->ptr); - visit(curr->expected); - visit(curr->timeout); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitAtomicNotify(AtomicNotify* curr) { - visit(curr->ptr); - visit(curr->notifyCount); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitAtomicFence(AtomicFence* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSIMDExtract(SIMDExtract* curr) { - visit(curr->vec); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSIMDReplace(SIMDReplace* curr) { - visit(curr->vec); - visit(curr->value); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSIMDShuffle(SIMDShuffle* curr) { - visit(curr->left); - visit(curr->right); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSIMDTernary(SIMDTernary* curr) { - visit(curr->a); - visit(curr->b); - visit(curr->c); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSIMDShift(SIMDShift* curr) { - visit(curr->vec); - visit(curr->shift); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSIMDLoad(SIMDLoad* curr) { - visit(curr->ptr); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitMemoryInit(MemoryInit* curr) { - visit(curr->dest); - visit(curr->offset); - visit(curr->size); - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitDataDrop(DataDrop* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitMemoryCopy(MemoryCopy* curr) { - visit(curr->dest); - visit(curr->source); - visit(curr->size); - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitMemoryFill(MemoryFill* curr) { - visit(curr->dest); - visit(curr->value); - visit(curr->size); - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitConst(Const* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitUnary(Unary* curr) { - visit(curr->value); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitBinary(Binary* curr) { - visit(curr->left); - visit(curr->right); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitSelect(Select* curr) { - visit(curr->ifTrue); - visit(curr->ifFalse); - visit(curr->condition); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitReturn(Return* curr) { - if (curr->value) { - visit(curr->value); - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitHost(Host* curr) { - switch (curr->op) { - case MemorySize: { - break; - } - case MemoryGrow: { - visit(curr->operands[0]); - break; - } - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitRefNull(RefNull* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitRefIsNull(RefIsNull* curr) { - visit(curr->value); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitRefFunc(RefFunc* curr) { - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - template<typename SubType> void BinaryenIRWriter<SubType>::visitTry(Try* curr) { emit(curr); visitPossibleBlockContents(curr->body); @@ -756,69 +379,6 @@ template<typename SubType> void BinaryenIRWriter<SubType>::visitTry(Try* curr) { } } -template<typename SubType> -void BinaryenIRWriter<SubType>::visitThrow(Throw* curr) { - for (auto* operand : curr->operands) { - visit(operand); - } - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitRethrow(Rethrow* curr) { - visit(curr->exnref); - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitBrOnExn(BrOnExn* curr) { - visit(curr->exnref); - emit(curr); - if (curr->type == Type::unreachable) { - emitUnreachable(); - } -} - -template<typename SubType> void BinaryenIRWriter<SubType>::visitNop(Nop* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitUnreachable(Unreachable* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitDrop(Drop* curr) { - visit(curr->value); - emit(curr); -} - -template<typename SubType> void BinaryenIRWriter<SubType>::visitPop(Pop* curr) { - emit(curr); -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitTupleMake(TupleMake* curr) { - for (auto* operand : curr->operands) { - visit(operand); - } - emit(curr); - if (curr->type == Type::unreachable) { - emitUnreachable(); - } -} - -template<typename SubType> -void BinaryenIRWriter<SubType>::visitTupleExtract(TupleExtract* curr) { - visit(curr->tuple); - if (curr->type == Type::unreachable) { - emitUnreachable(); - return; - } - emit(curr); -} - // Binaryen IR to binary writer class BinaryenIRToBinaryWriter : public BinaryenIRWriter<BinaryenIRToBinaryWriter> { |