Ensure cfg-traversal emits blocks in reverse postorder, refactoring try. NFC (#4101)

Reverse postorder basically just means that a block's immediate dominator must precede it in the list. That is useful because then algorithms that look at dominance can simply process the list in order, and the immediate dominator will have already been seen before each block. Another way to put it is that in reverse postorder a block's dominators appear before it in the list, as do all non-loop predecessors. At least in reducible graphs that is the case, and our IR, like wasm, is reducible. It is pretty natural to emit reverse postorder on wasm given the reducibility: simply process the wasm in postorder, and make sure to create new basic blocks only when reaching their code - that is, do not create them "ahead of time". We were doing that in a single place, for try-catch, so this PR refactors that. Specifically it makes us create the basic blocks for catches right when we reach them, and not earlier. So the data structure that used to store them becomes a list of things to connect to them. This is useful for #4100 , see more details there.
author: Alon Zakai <azakai@google.com> 2021-08-24 08:25:49 -0700
committer: GitHub <noreply@github.com> 2021-08-24 15:25:49 +0000
commit: a2323f2cfd90089c54100ab98c439b9438cc4dc1 (patch)
tree: 61dd41386ca0e17367d7e336ebd546b4282d1318 /src
parent: 3d88d11176a26b1156b83a71bbc033c71772dabe (diff)
download: binaryen-a2323f2cfd90089c54100ab98c439b9438cc4dc1.tar.gz
binaryen-a2323f2cfd90089c54100ab98c439b9438cc4dc1.tar.bz2
binaryen-a2323f2cfd90089c54100ab98c439b9438cc4dc1.zip
1 files changed, 48 insertions, 26 deletions
diff --git a/src/cfg/cfg-traversal.h b/src/cfg/cfg-traversal.h
index b9d9c8f1b..04dccf1eb 100644
--- a/src/cfg/cfg-traversal.h
+++ b/src/cfg/cfg-traversal.h
@@ -68,7 +68,15 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> {
 
   // internal details
 
-  std::vector<std::unique_ptr<BasicBlock>> basicBlocks; // all the blocks
+  // The list of basic blocks in the function.
+  //
+  // This is populated in reverse postorder, that is, a block appears after all
+  // those that dominate it. This is trivial to do given wasm's structured
+  // control flow: we simply create blocks only after the things that can reach
+  // them (the only nontrivial things are loops, but if the dominator was before
+  // the loop, then again, we would have created it before the loop body).
+  std::vector<std::unique_ptr<BasicBlock>> basicBlocks;
+
   // blocks that are the tops of loops, i.e., have backedges to them
   std::vector<BasicBlock*> loopTops;
 
@@ -88,11 +96,14 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> {
 
   // stack of the last blocks of try bodies
   std::vector<BasicBlock*> tryStack;
-  // stack of the first blocks of catches that throwing instructions should
-  // unwind to at any moment. Because there can be multiple catch blocks for a
-  // try, we maintain a vector of first blocks of catches.
-  std::vector<std::vector<BasicBlock*>> unwindCatchStack;
-  // stack of 'Try' expressions corresponding to unwindCatchStack.
+  // Stack of the blocks that contain a throwing instruction, and therefore they
+  // can reach the first blocks of catches that throwing instructions should
+  // unwind to at any moment. That is, the topmost item in this vector relates
+  // to the current try-catch scope, and the vector there is a list of the items
+  // that can reach catch blocks (each item is assumed to be able to reach any
+  // of the catches, although that could be improved perhaps).
+  std::vector<std::vector<BasicBlock*>> throwingInstsStack;
+  // stack of 'Try' expressions corresponding to throwingInstsStack.
   std::vector<Expression*> unwindExprStack;
   // A stack for each try, where each entry is a list of blocks, one for each
   // catch, used during processing. We start by assigning the start blocks to
@@ -225,14 +236,14 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> {
     // basic block unless the instruction is within a try-catch, because the CFG
     // will have too many blocks that way, and if an exception is thrown, the
     // function will be exited anyway.
-    if (self->unwindCatchStack.empty()) {
+    if (self->throwingInstsStack.empty()) {
       return;
     }
 
-    // Exception thrown. Create a link to each catch within the innermost try.
-    for (auto* block : self->unwindCatchStack.back()) {
-      self->link(self->currBasicBlock, block);
-    }
+    // Exception thrown. Note outselves so that we will create a link to each
+    // catch within the innermost try when we get there.
+    self->throwingInstsStack.back().push_back(self->currBasicBlock);
+
     // If the innermost try does not have a catch_all clause, an exception
     // thrown can be caught by any of its outer catch block. And if that outer
     // try-catch also does not have a catch_all, this continues until we
@@ -258,19 +269,17 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> {
     // catch $e3
     //   ...
     // end
-    for (int i = self->unwindCatchStack.size() - 1; i > 0; i--) {
+    for (int i = self->throwingInstsStack.size() - 1; i > 0; i--) {
       if (self->unwindExprStack[i]->template cast<Try>()->hasCatchAll()) {
         break;
       }
-      for (auto* block : self->unwindCatchStack[i - 1]) {
-        self->link(self->currBasicBlock, block);
-      }
+      self->throwingInstsStack[i - 1].push_back(self->currBasicBlock);
     }
   }
 
   static void doEndCall(SubType* self, Expression** currp) {
     doEndThrowingInst(self, currp);
-    if (!self->unwindCatchStack.empty()) {
+    if (!self->throwingInstsStack.empty()) {
       // exception not thrown. link to the continuation BB
       auto* last = self->currBasicBlock;
       self->link(last, self->startBasicBlock());
@@ -279,20 +288,33 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> {
 
   static void doStartTry(SubType* self, Expression** currp) {
     auto* curr = (*currp)->cast<Try>();
-    auto* last = self->currBasicBlock;
-    self->unwindCatchStack.emplace_back();
+    self->throwingInstsStack.emplace_back();
     self->unwindExprStack.push_back(curr);
-    for (Index i = 0; i < curr->catchBodies.size(); i++) {
-      // Create the catch body's first block
-      self->unwindCatchStack.back().push_back(self->startBasicBlock());
-    }
-    self->currBasicBlock = last; // reset to the current block
   }
 
   static void doStartCatches(SubType* self, Expression** currp) {
     self->tryStack.push_back(self->currBasicBlock); // last block of try body
-    self->processCatchStack.push_back(self->unwindCatchStack.back());
-    self->unwindCatchStack.pop_back();
+
+    // Now that we are starting the catches, create the basic blocks that they
+    // begin with.
+    auto* last = self->currBasicBlock;
+    auto* tryy = (*currp)->cast<Try>();
+    self->processCatchStack.emplace_back();
+    auto& entries = self->processCatchStack.back();
+    for (Index i = 0; i < tryy->catchBodies.size(); i++) {
+      entries.push_back(self->startBasicBlock());
+    }
+    self->currBasicBlock = last; // reset to the current block
+
+    // Create links from things that reach those new basic blocks.
+    auto& preds = self->throwingInstsStack.back();
+    for (auto* pred : preds) {
+      for (Index i = 0; i < entries.size(); i++) {
+        self->link(pred, entries[i]);
+      }
+    }
+
+    self->throwingInstsStack.pop_back();
     self->unwindExprStack.pop_back();
     self->catchIndexStack.push_back(0);
   }
@@ -408,7 +430,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> {
     assert(ifStack.size() == 0);
     assert(loopStack.size() == 0);
     assert(tryStack.size() == 0);
-    assert(unwindCatchStack.size() == 0);
+    assert(throwingInstsStack.size() == 0);
     assert(unwindExprStack.size() == 0);
     assert(processCatchStack.size() == 0);
   }
author	Alon Zakai <azakai@google.com>	2021-08-24 08:25:49 -0700
committer	GitHub <noreply@github.com>	2021-08-24 15:25:49 +0000
commit	a2323f2cfd90089c54100ab98c439b9438cc4dc1 (patch)
tree	61dd41386ca0e17367d7e336ebd546b4282d1318 /src
parent	3d88d11176a26b1156b83a71bbc033c71772dabe (diff)
download	binaryen-a2323f2cfd90089c54100ab98c439b9438cc4dc1.tar.gz binaryen-a2323f2cfd90089c54100ab98c439b9438cc4dc1.tar.bz2 binaryen-a2323f2cfd90089c54100ab98c439b9438cc4dc1.zip