diff options
author | Heejin Ahn <aheejin@gmail.com> | 2021-01-21 21:10:43 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-21 21:10:43 +0900 |
commit | b77d0af3fc369dd78f51d61d0499571a74366762 (patch) | |
tree | 7f1c47b0d41a6c3190ba6aee0ec398f3c1370b86 /src | |
parent | 85cfc36f0fccdf700045caa15218cef6cbc3e267 (diff) | |
download | binaryen-b77d0af3fc369dd78f51d61d0499571a74366762.tar.gz binaryen-b77d0af3fc369dd78f51d61d0499571a74366762.tar.bz2 binaryen-b77d0af3fc369dd78f51d61d0499571a74366762.zip |
CFG traversal for the new EH spec (#3494)
This updates CFG traversal to match the new spec. Previously there was
only a single `catch` block that caught all exceptions, so all throwing
instructions needed to have a link to its innermost catch BB. But now we
can have multiple catches per try, requiring all throwing instrutions to
have an edge to all of those innermost catch BBs. Furthermore, if there
are only `catch`es and not a `catch_all` in a try, throwing instructions
can further unwind to outer catches until they find a `catch_all`.
`unwindCatchStack` and `unwindExprStack` are necessary to track and make
correct links between throwing instructions and their unwind destination
BBs.
`processCatchStack` is used to remember the catch BBs currently being
processed, so that after processing all of them, we can make a link from
each of those catch's last block to the continuation block after the
try-catch.
RSE test cases are updated because they use the CFG traversal. The tests
there mainly test that if all possible CFG edge to a `local.set` sets
the same value to a local, the `local.set` is redundant and thus can be
removed.
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg/cfg-traversal.h | 161 | ||||
-rw-r--r-- | src/wasm-traversal.h | 2 |
2 files changed, 120 insertions, 43 deletions
diff --git a/src/cfg/cfg-traversal.h b/src/cfg/cfg-traversal.h index 479c09e4b..267fb9589 100644 --- a/src/cfg/cfg-traversal.h +++ b/src/cfg/cfg-traversal.h @@ -69,14 +69,27 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { std::vector<BasicBlock*> ifStack; // stack of the first blocks of loops std::vector<BasicBlock*> loopStack; + // stack of the last blocks of try bodies std::vector<BasicBlock*> tryStack; - // stack of the first blocks of catch bodies - std::vector<BasicBlock*> catchStack; - - void startBasicBlock() { + // stack of the first blocks of catches that throwing instructions should + // unwind to at any moment. Because there can be multiple catch blocks for a + // try, we maintain a vector of first blocks of catches. + std::vector<std::vector<BasicBlock*>> unwindCatchStack; + // stack of 'Try' expressions corresponding to unwindCatchStack. + std::vector<Expression*> unwindExprStack; + // A stack for each try, where each entry is a list of blocks, one for each + // catch, used during processing. We start by assigning the start blocks to + // here, and then read those at the appropriate time; when we finish a catch + // we write to here the end block, so that when we finish with them all we can + // connect the ends to the outside. In principle two vectors could be used, + // but their usage does not overlap in time, and this is more efficient. + std::vector<std::vector<BasicBlock*>> processCatchStack; + + BasicBlock* startBasicBlock() { currBasicBlock = ((SubType*)this)->makeBasicBlock(); basicBlocks.push_back(std::unique_ptr<BasicBlock>(currBasicBlock)); + return currBasicBlock; } void startUnreachableBlock() { currBasicBlock = nullptr; } @@ -119,16 +132,14 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { static void doStartIfTrue(SubType* self, Expression** currp) { auto* last = self->currBasicBlock; - self->startBasicBlock(); - self->link(last, self->currBasicBlock); // ifTrue + self->link(last, self->startBasicBlock()); // ifTrue self->ifStack.push_back(last); // the block before the ifTrue } static void doStartIfFalse(SubType* self, Expression** currp) { self->ifStack.push_back(self->currBasicBlock); // the ifTrue fallthrough - self->startBasicBlock(); self->link(self->ifStack[self->ifStack.size() - 2], - self->currBasicBlock); // before if -> ifFalse + self->startBasicBlock()); // before if -> ifFalse } static void doEndIf(SubType* self, Expression** currp) { @@ -159,8 +170,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { static void doEndLoop(SubType* self, Expression** currp) { auto* last = self->currBasicBlock; - self->startBasicBlock(); - self->link(last, self->currBasicBlock); // fallthrough + self->link(last, self->startBasicBlock()); // fallthrough auto* curr = (*currp)->cast<Loop>(); // branches to the top of the loop if (curr->name.is()) { @@ -180,8 +190,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { self->currBasicBlock); // branch to the target if (curr->condition) { auto* last = self->currBasicBlock; - self->startBasicBlock(); - self->link(last, self->currBasicBlock); // we might fall through + self->link(last, self->startBasicBlock()); // we might fall through } else { self->startUnreachableBlock(); } @@ -205,47 +214,106 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { self->startUnreachableBlock(); } + static void doEndThrowingInst(SubType* self, Expression** currp) { + // Even if the instruction can possibly throw, we don't end the current + // basic block unless the instruction is within a try-catch, because the CFG + // will have too many blocks that way, and if an exception is thrown, the + // function will be exited anyway. + if (self->unwindCatchStack.empty()) { + return; + } + + // Exception thrown. Create a link to each catch within the innermost try. + for (auto* block : self->unwindCatchStack.back()) { + self->link(self->currBasicBlock, block); + } + // If the innermost try does not have a catch_all clause, an exception + // thrown can be caught by any of its outer catch block. And if that outer + // try-catch also does not have a catch_all, this continues until we + // encounter a try-catch_all. Create a link to all those possible catch + // unwind destinations. + // TODO This can be more precise for `throw`s if we compare event types + // and create links to outer catch BBs only when the exception is not + // caught. + // TODO This can also be more precise if we analyze the structure of nested + // try-catches. For example, in the example below, 'call $foo' doesn't need + // a link to the BB of outer 'catch $e1', because if the exception thrown by + // the call is of event $e1, it would've already been caught by the inner + // 'catch $e1'. Optimize these cases later. + // try + // try + // call $foo + // catch $e1 + // ... + // catch $e2 + // ... + // end + // catch $e1 + // ... + // catch $e3 + // ... + // end + for (int i = self->unwindCatchStack.size() - 1; i > 0; i--) { + if (self->unwindExprStack[i]->template cast<Try>()->hasCatchAll()) { + break; + } + for (auto* block : self->unwindCatchStack[i - 1]) { + self->link(self->currBasicBlock, block); + } + } + } + static void doEndCall(SubType* self, Expression** currp) { - // Every call can possibly throw, but we don't end the current basic block - // unless the call is within a try-catch, because the CFG will have too many - // blocks that way, and if an exception is thrown, the function will be - // exited anyway. - if (!self->catchStack.empty()) { - auto* last = self->currBasicBlock; - self->startBasicBlock(); - self->link(last, self->currBasicBlock); // exception not thrown - self->link(last, self->catchStack.back()); // exception thrown + doEndThrowingInst(self, currp); + if (!self->unwindCatchStack.empty()) { + // exception not thrown. link to the continuation BB + self->link(self->currBasicBlock, self->startBasicBlock()); } } static void doStartTry(SubType* self, Expression** currp) { + auto* curr = (*currp)->cast<Try>(); auto* last = self->currBasicBlock; - self->startBasicBlock(); // catch body's first block - self->catchStack.push_back(self->currBasicBlock); + self->unwindCatchStack.emplace_back(); + self->unwindExprStack.push_back(curr); + for (Index i = 0; i < curr->catchBodies.size(); i++) { + // Create the catch body's first block + self->unwindCatchStack.back().push_back(self->startBasicBlock()); + } self->currBasicBlock = last; // reset to the current block } - static void doStartCatch(SubType* self, Expression** currp) { + static void doStartCatches(SubType* self, Expression** currp) { self->tryStack.push_back(self->currBasicBlock); // last block of try body - self->currBasicBlock = self->catchStack.back(); - self->catchStack.pop_back(); + self->processCatchStack.push_back(self->unwindCatchStack.back()); + self->unwindCatchStack.pop_back(); + self->unwindExprStack.pop_back(); + } + + static void doStartCatch(SubType* self, Expression** currp, Index i) { + // Get the block that starts this catch + self->currBasicBlock = self->processCatchStack.back()[i]; + } + + static void doEndCatch(SubType* self, Expression** currp, Index i) { + // We are done with this catch; set the block that ends it + self->processCatchStack.back()[i] = self->currBasicBlock; } static void doEndTry(SubType* self, Expression** currp) { - auto* last = self->currBasicBlock; self->startBasicBlock(); // continuation block after try-catch - // catch body's last block -> continuation block - self->link(last, self->currBasicBlock); + // each catch body's last block -> continuation block + for (auto* last : self->processCatchStack.back()) { + self->link(last, self->currBasicBlock); + } // try body's last block -> continuation block self->link(self->tryStack.back(), self->currBasicBlock); self->tryStack.pop_back(); + self->processCatchStack.pop_back(); } static void doEndThrow(SubType* self, Expression** currp) { - // We unwind to the innermost catch, if any - if (!self->catchStack.empty()) { - self->link(self->currBasicBlock, self->catchStack.back()); - } + doEndThrowingInst(self, currp); self->startUnreachableBlock(); } @@ -254,8 +322,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { self->branches[self->findBreakTarget(curr->name)].push_back( self->currBasicBlock); // branch to the target auto* last = self->currBasicBlock; - self->startBasicBlock(); - self->link(last, self->currBasicBlock); // we might fall through + self->link(last, self->startBasicBlock()); // we might fall through } static void scan(SubType* self, Expression** currp) { @@ -304,16 +371,24 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { break; } case Expression::Id::TryId: { - // FIXME Update the implementation to match the new spec - WASM_UNREACHABLE("unimp"); - /* self->pushTask(SubType::doEndTry, currp); - self->pushTask(SubType::scan, &curr->cast<Try>()->catchBody); - self->pushTask(SubType::doStartCatch, currp); + auto& catchBodies = curr->cast<Try>()->catchBodies; + using namespace std::placeholders; + for (Index i = 0; i < catchBodies.size(); i++) { + auto doEndCatchI = [i](SubType* self, Expression** currp) { + doEndCatch(self, currp, i); + }; + self->pushTask(doEndCatchI, currp); + self->pushTask(SubType::scan, &catchBodies[i]); + auto doStartCatchI = [i](SubType* self, Expression** currp) { + doStartCatch(self, currp, i); + }; + self->pushTask(doStartCatchI, currp); + } + self->pushTask(SubType::doStartCatches, currp); self->pushTask(SubType::scan, &curr->cast<Try>()->body); self->pushTask(SubType::doStartTry, currp); return; // don't do anything else - */ } case Expression::Id::ThrowId: case Expression::Id::RethrowId: { @@ -350,7 +425,9 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { assert(ifStack.size() == 0); assert(loopStack.size() == 0); assert(tryStack.size() == 0); - assert(catchStack.size() == 0); + assert(unwindCatchStack.size() == 0); + assert(unwindExprStack.size() == 0); + assert(processCatchStack.size() == 0); } std::unordered_set<BasicBlock*> findLiveBlocks() { diff --git a/src/wasm-traversal.h b/src/wasm-traversal.h index 28470c614..53c0e4ac7 100644 --- a/src/wasm-traversal.h +++ b/src/wasm-traversal.h @@ -250,7 +250,7 @@ struct Walker : public VisitorType { // nested. // Tasks receive the this pointer and a pointer to the pointer to operate on - typedef void (*TaskFunc)(SubType*, Expression**); + using TaskFunc = std::function<void(SubType*, Expression**)>; struct Task { TaskFunc func; |