1 files changed, 264 insertions, 0 deletions
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp
new file mode 100644
index 000000000..1ac8ed820
--- /dev/null
+++ b/src/passes/GUFA.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Grand Unified Flow Analysis (GUFA)
+//
+// Optimize based on information about what content can appear in each location
+// in the program. This does a whole-program analysis to find that out and
+// hopefully learn more than the type system does - for example, a type might be
+// $A, which means $A or any subtype can appear there, but perhaps the analysis
+// can find that only $A', a particular subtype, can appear there in practice,
+// and not $A or any subtypes of $A', etc. Or, we may find that no type is
+// actually possible at a particular location, say if we can prove that the
+// casts on the way to that location allow nothing through. We can also find
+// that only a particular value is possible of that type.
+//
+// GUFA will infer constants and unreachability, and add those to the code. This
+// can increase code size if further optimizations are not run later like dead
+// code elimination and vacuum. The "optimizing" variant of this pass will run
+// such followup opts automatically in functions where we make changes, and so
+// it is useful if GUFA is run near the end of the optimization pipeline.
+//
+// TODO: GUFA + polymorphic devirtualization + traps-never-happen. If we see
+//       that the possible call targets are {A, B, C}, and GUFA info lets us
+//       prove that A, C will trap if called - say, if they cast the first
+//       parameter to something GUFA proved it cannot be - then we can ignore
+//       them, and devirtualize to a call to B.
+//
+
+#include "ir/drop.h"
+#include "ir/eh-utils.h"
+#include "ir/possible-contents.h"
+#include "ir/properties.h"
+#include "ir/utils.h"
+#include "pass.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace {
+
+struct GUFAOptimizer
+  : public WalkerPass<
+      PostWalker<GUFAOptimizer, UnifiedExpressionVisitor<GUFAOptimizer>>> {
+  bool isFunctionParallel() override { return true; }
+
+  ContentOracle& oracle;
+  bool optimizing;
+
+  GUFAOptimizer(ContentOracle& oracle, bool optimizing)
+    : oracle(oracle), optimizing(optimizing) {}
+
+  GUFAOptimizer* create() override {
+    return new GUFAOptimizer(oracle, optimizing);
+  }
+
+  bool optimized = false;
+
+  void visitExpression(Expression* curr) {
+    // Skip things we can't improve in any way.
+    auto type = curr->type;
+    if (type == Type::unreachable || type == Type::none ||
+        Properties::isConstantExpression(curr)) {
+      return;
+    }
+
+    if (type.isTuple()) {
+      // TODO: tuple types.
+      return;
+    }
+
+    if (type.isRef() && (getTypeSystem() != TypeSystem::Nominal &&
+                         getTypeSystem() != TypeSystem::Isorecursive)) {
+      // Without type info we can't analyze subtypes, so we cannot infer
+      // anything about refs.
+      return;
+    }
+
+    // Ok, this is an interesting location that we might optimize. See what the
+    // oracle says is possible there.
+    auto contents = oracle.getContents(ExpressionLocation{curr, 0});
+
+    auto& options = getPassOptions();
+    auto& wasm = *getModule();
+    Builder builder(wasm);
+
+    if (contents.getType() == Type::unreachable) {
+      // This cannot contain any possible value at all. It must be unreachable
+      // code.
+      replaceCurrent(getDroppedUnconditionalChildrenAndAppend(
+        curr, wasm, options, builder.makeUnreachable()));
+      optimized = true;
+      return;
+    }
+
+    // This is reachable. Check if we can emit something optimized for it.
+    // TODO: can we handle more general things here too?
+    if (!contents.canMakeExpression()) {
+      return;
+    }
+
+    if (contents.isNull() && curr->type.isNullable()) {
+      // Null values are all identical, so just fix up the type here if we need
+      // to (the null's type might not fit in this expression, if it passed
+      // through casts).
+      if (!Type::isSubType(contents.getType(), curr->type)) {
+        contents = PossibleContents::literal(
+          Literal::makeNull(curr->type.getHeapType()));
+      }
+
+      // Note that if curr's type is *not* nullable, then the code will trap at
+      // runtime (the null must arrive through a cast that will trap). We handle
+      // that below, so we don't need to think about it here.
+
+      // TODO: would emitting a more specific null be useful when valid?
+    }
+
+    auto* c = contents.makeExpression(wasm);
+
+    // We can only place the constant value here if it has the right type. For
+    // example, a block may return (ref any), that is, not allow a null, but in
+    // practice only a null may flow there if it goes through casts that will
+    // trap at runtime.
+    // TODO: GUFA should eventually do this, but it will require it properly
+    //       filtering content not just on ref.cast as it does now, but also
+    //       ref.as etc. Once it does those we could assert on the type being
+    //       valid here.
+    if (Type::isSubType(c->type, curr->type)) {
+      replaceCurrent(
+        getDroppedUnconditionalChildrenAndAppend(curr, wasm, options, c));
+      optimized = true;
+    } else {
+      // The type is not compatible: we cannot place |c| in this location, even
+      // though we have proven it is the only value possible here.
+      if (Properties::isConstantExpression(c)) {
+        // The type is not compatible and this is a simple constant expression
+        // like a ref.func. That means this code must be unreachable. (See below
+        // for the case of a non-constant.)
+        replaceCurrent(getDroppedUnconditionalChildrenAndAppend(
+          curr, wasm, options, builder.makeUnreachable()));
+        optimized = true;
+      } else {
+        // This is not a constant expression, but we are certain it is the right
+        // value. Atm the only such case we handle is a global.get of an
+        // immutable global. We don't know what the value will be, nor its
+        // specific type, but we do know that a global.get will get that value
+        // properly. However, in this case it does not have the right type for
+        // this location. That can happen since the global.get does not have
+        // exactly the proper type for the contents: the global.get might be
+        // nullable, for example, even though the contents are not actually a
+        // null. For example, consider what happens here:
+        //
+        //  (global $foo (ref null any) (struct.new $Foo))
+        //  ..
+        //    (ref.as_non_null
+        //      (global.get $foo))
+        //
+        // We create a $Foo in the global $foo, so its value is not a null. But
+        // the global's type is nullable, so the global.get's type will be as
+        // well. When we get to the ref.as_non_null, we then want to replace it
+        // with a global.get - in fact that's what its child already is, showing
+        // it is the right content for it - but that global.get would not have a
+        // non-nullable type like a ref.as_non_null must have, so we cannot
+        // simply replace it.
+        //
+        // For now, do nothing here, but in some cases we could probably
+        // optimize (e.g. by adding a ref.as_non_null in the example) TODO
+        assert(c->is<GlobalGet>());
+      }
+    }
+  }
+
+  // TODO: If an instruction would trap on null, like struct.get, we could
+  //       remove it here if it has no possible contents and if we are in
+  //       traps-never-happen mode (that is, we'd have proven it can only trap,
+  //       but we assume no traps happen, so it must be dead code). That info
+  //       is present in OptimizeInstructions where it removes redundant
+  //       ref.as_non_null (it removes them because it knows that the parent
+  //       will trap on null anyhow), so maybe there is a way to share that
+  //       information about parents.
+
+  void visitFunction(Function* func) {
+    if (!optimized) {
+      return;
+    }
+
+    // Optimization may introduce more unreachables, which we need to
+    // propagate.
+    ReFinalize().walkFunctionInModule(func, getModule());
+
+    // We may add blocks around pops, which we must fix up.
+    EHUtils::handleBlockNestedPops(func, *getModule());
+
+    // If we are in "optimizing" mode, we'll also run some more passes on this
+    // function that we just optimized. If not, leave now.
+    if (!optimizing) {
+      return;
+    }
+
+    PassRunner runner(getModule(), getPassOptions());
+    runner.setIsNested(true);
+    // New unreachables we added have created dead code we can remove. If we do
+    // not do this, then running GUFA repeatedly can actually increase code size
+    // (by adding multiple unneeded unreachables).
+    runner.add("dce");
+    // New drops we added allow us to remove more unused code and values. As
+    // with unreachables, without a vacuum we may increase code size as in
+    // nested expressions we may apply the same value multiple times:
+    //
+    //  (block $out
+    //   (block $in
+    //    (i32.const 10)))
+    //
+    // In each of the blocks we'll infer the value must be 10, so we'll end up
+    // with this repeating code:
+    //
+    //  (block ;; a new block just to drop the old outer block
+    //   (drop
+    //    (block $out
+    //     (drop
+    //      (block $in
+    //       (i32.const 10)
+    //      )
+    //     )
+    //     (i32.const 10)
+    //    )
+    //   )
+    //   (i32.const 10)
+    //  )
+    runner.add("vacuum");
+    runner.runOnFunction(func);
+  }
+};
+
+struct GUFAPass : public Pass {
+  bool optimizing;
+
+  GUFAPass(bool optimizing) : optimizing(optimizing) {}
+
+  void run(PassRunner* runner, Module* module) override {
+    ContentOracle oracle(*module);
+    GUFAOptimizer(oracle, optimizing).run(runner, module);
+  }
+};
+
+} // anonymous namespace
+
+Pass* createGUFAPass() { return new GUFAPass(false); }
+Pass* createGUFAOptimizingPass() { return new GUFAPass(true); }
+
+} // namespace wasm