8 files changed, 1268 insertions, 124 deletions
diff --git a/src/ir/manipulation.h b/src/ir/manipulation.h
index 33c7d1bd7..64cd15dc3 100644
--- a/src/ir/manipulation.h
+++ b/src/ir/manipulation.h
@@ -64,6 +64,10 @@ inline OutputType* convert(InputType* input, MixedArena& allocator) {
   return output;
 }
 
+// Copy using a flexible custom copy function. This function is called on each
+// expression before copying it. If it returns a non-null value then that is
+// used (effectively overriding the normal copy), and if it is null then we do a
+// normal copy.
 using CustomCopier = std::function<Expression*(Expression*)>;
 Expression*
 flexibleCopy(Expression* original, Module& wasm, CustomCopier custom);
diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp
index 5791ed77c..8ad127086 100644
--- a/src/ir/module-utils.cpp
+++ b/src/ir/module-utils.cpp
@@ -46,6 +46,15 @@ Function* copyFunction(Function* func,
                        Module& out,
                        Name newName,
                        std::optional<std::vector<Index>> fileIndexMap) {
+  auto ret = copyFunctionWithoutAdd(func, out, newName, fileIndexMap);
+  return out.addFunction(std::move(ret));
+}
+
+std::unique_ptr<Function>
+copyFunctionWithoutAdd(Function* func,
+                       Module& out,
+                       Name newName,
+                       std::optional<std::vector<Index>> fileIndexMap) {
   auto ret = std::make_unique<Function>();
   ret->name = newName.is() ? newName : func->name;
   ret->hasExplicitName = func->hasExplicitName;
@@ -71,7 +80,7 @@ Function* copyFunction(Function* func,
   ret->base = func->base;
   ret->noFullInline = func->noFullInline;
   ret->noPartialInline = func->noPartialInline;
-  return out.addFunction(std::move(ret));
+  return ret;
 }
 
 Global* copyGlobal(Global* global, Module& out) {
diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h
index 0a101e5da..d9fd69428 100644
--- a/src/ir/module-utils.h
+++ b/src/ir/module-utils.h
@@ -33,6 +33,13 @@ copyFunction(Function* func,
              Name newName = Name(),
              std::optional<std::vector<Index>> fileIndexMap = std::nullopt);
 
+// As above, but does not add the copy to the module.
+std::unique_ptr<Function> copyFunctionWithoutAdd(
+  Function* func,
+  Module& out,
+  Name newName = Name(),
+  std::optional<std::vector<Index>> fileIndexMap = std::nullopt);
+
 Global* copyGlobal(Global* global, Module& out);
 
 Tag* copyTag(Tag* tag, Module& out);
diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp
index 012f24750..5237f7864 100644
--- a/src/passes/Monomorphize.cpp
+++ b/src/passes/Monomorphize.cpp
@@ -15,24 +15,58 @@
  */
 
 //
-// When we see a call foo(arg1, arg2) and at least one of the arguments has a
-// more refined type than is declared in the function being called, create a
-// copy of the function with the refined type. That copy can then potentially be
-// optimized in useful ways later.
+// Monomorphization of code based on callsite context: When we see a call, see
+// if the information at the callsite can help us optimize. For example, if a
+// parameter is constant, then using that constant in the called function may
+// unlock a lot of improvements. We may benefit from monomorphizing in the
+// following cases:
 //
-// Inlining also monomorphizes in effect. What this pass does is handle the
-// cases where inlining cannot be done.
+//  * If a call provides a more refined type than the function declares for a
+//    parameter.
+//  * If a call provides a constant as a parameter.
+//  * If a call provides a GC allocation as a parameter. TODO
+//  * If a call is dropped. TODO also other stuff on the outside?
 //
-// To see when monomorphizing makes sense, this optimizes the target function
-// both with and without the more refined types. If the refined types help then
-// the version with might remove a cast, for example. Note that while doing so
-// we keep the optimization results of the version without - there is no reason
-// to forget them since we've gone to the trouble anyhow. So this pass may have
-// the side effect of performing minor optimizations on functions. There is also
-// a variant of the pass that always monomorphizes, even when it does not seem
-// helpful, which is useful for testing, and possibly in cases where we need
-// more than just local optimizations to see the benefit - for example, perhaps
-// GUFA ends up more powerful later on.
+// We realize the benefit by creating a monomorphized (specialized/refined)
+// version of the function, and call that instead. For example, if we have
+//
+//  function foo(x) { return x + 22; }
+//  foo(7);
+//
+// then monomorphization leads to this:
+//
+//  function foo(x)  { return x + 22; } // original unmodified function
+//  foo_b();                            // now calls foo_b
+//  function foo_b() { return 7 + 22; } // monomorphized, constant 7 applied
+//
+// This is related to inlining both conceptually and practically. Conceptually,
+// one of inlining's big advantages is that we then optimize the called code
+// together with the code around the call, and monomorphization does something
+// similar. And, this pass does so by "reverse-inlining" content from the
+// caller to the monomorphized function: the constant 7 in the example above has
+// been "pulled in" from the caller into the callee. Larger amounts of code can
+// be moved in that manner, both values sent to the function, and the code that
+// receives it (see the mention of dropped calls, before).
+//
+// As this monormophization uses callsite context (the parameters, where the
+// result flows to), we call it "Contextual Monomorphization." The full name is
+// "Empirical Contextural Monomorphization" because we decide where to optimize
+// based on a "try it and see" (empirical) approach, that measures the benefit.
+// That is, we generate the monomorphized function as explained, then optimize
+// that function, which contains the original code + code from the callsite
+// context that we pulled in. If the optimizer manages to improve that combined
+// code in a useful way then we apply the optimization, and if not then we undo.
+//
+// The empirical approach significantly reduces the need for heuristics. For
+// example, rather than have a heuristic for "see if a constant parameter flows
+// into a conditional branch," we simply run the optimizer and let it optimize
+// that case. All other cases handled by the optimizer work as well, without
+// needing to specify them as heuristics, so this gets smarter as the optimizer
+// does.
+//
+// Aside from the main version of this pass there is also a variant useful for
+// testing that always monomorphizes non-trivial callsites, without checking if
+// the optimizer can help or not (that makes writing testcases simpler).
 //
 // TODO: When we optimize we could run multiple cycles: A calls B calls C might
 //       end up with the refined+optimized B now having refined types in its
@@ -47,25 +81,21 @@
 //       end on leaves. That would make it more likely for a single iteration to
 //       do more work, as if A->B->C then we'd do A->B and optimize B and only
 //       then look at B->C.
-// TODO: Also run the result-refining part of SignatureRefining, as if we
-//       refine the result then callers of the function may benefit, even if
-//       there is no benefit in the function itself.
 // TODO: If this is too slow, we could "group" things, for example we could
 //       compute the LUB of a bunch of calls to a target and then investigate
 //       that one case and use it in all those callers.
 // TODO: Not just direct calls? But updating vtables is complex.
-// TODO: Not just types? We could monomorphize using Literal values. E.g. for
-//       function references, if we monomorphized we'd end up specializing qsort
-//       for the particular functions it is given.
 //
 
 #include "ir/cost.h"
 #include "ir/find_all.h"
+#include "ir/manipulation.h"
 #include "ir/module-utils.h"
 #include "ir/names.h"
 #include "ir/type-updating.h"
 #include "ir/utils.h"
 #include "pass.h"
+#include "support/hash.h"
 #include "wasm-type.h"
 #include "wasm.h"
 
@@ -73,18 +103,200 @@ namespace wasm {
 
 namespace {
 
+// Relevant information about a callsite for purposes of monomorphization.
+struct CallContext {
+  // The operands of the call, processed to leave the parts that make sense to
+  // keep in the context. That is, the operands of the CallContext are the exact
+  // code that will appear at the start of the monomorphized function. For
+  // example:
+  //
+  //  (call $foo
+  //    (i32.const 10)
+  //    (..something complicated..)
+  //  )
+  //  (func $foo (param $int i32) (param $complex f64)
+  //    ..
+  //
+  // The context operands are
+  //
+  //  [
+  //    (i32.const 10)       ;; Unchanged: this can be pulled into the called
+  //                         ;; function, and removed from the caller side.
+  //    (local.get $0)       ;; The complicated child cannot; keep it as a value
+  //                         ;; sent from the caller, which we will local.get.
+  //  ]
+  //
+  // Both the const and the local.get are simply used in the monomorphized
+  // function, like this:
+  //
+  //  (func $foo-monomorphized (param $0 ..)
+  //    (..local defs..)
+  //    ;; Apply the first operand, which was pulled into here.
+  //    (local.set $int
+  //      (i32.const 10)
+  //    )
+  //    ;; Read the second, which remains a parameter to the function.
+  //    (local.set $complex
+  //      (local.get $0)
+  //    )
+  //    ;; The original body.
+  //    ..
+  //
+  // The $int param is no longer a parameter, and it is set in a local at the
+  // top: we have "reverse-inlined" code from the calling function into the
+  // caller, pulling the constant 10 into here. The second parameter cannot be
+  // pulled in, so we must still send it, but we still have a local.set there to
+  // copy it into a local (this does not matter in this case, but does if the
+  // sent value is more refined; always using a local.set is simpler and more
+  // regular).
+  std::vector<Expression*> operands;
+
+  // Whether the call is dropped. TODO
+  bool dropped;
+
+  bool operator==(const CallContext& other) const {
+    if (dropped != other.dropped) {
+      return false;
+    }
+
+    // We consider logically equivalent expressions as equal (rather than raw
+    // pointers), so that contexts with functionally identical shape are
+    // treated the same.
+    if (operands.size() != other.operands.size()) {
+      return false;
+    }
+    for (Index i = 0; i < operands.size(); i++) {
+      if (!ExpressionAnalyzer::equal(operands[i], other.operands[i])) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  bool operator!=(const CallContext& other) const { return !(*this == other); }
+
+  // Build the context from a given call. This builds up the context operands as
+  // as explained in the comments above, and updates the call to send any
+  // remaining values by updating |newOperands| (for example, if all the values
+  // sent are constants, then |newOperands| will end up empty, as we have
+  // nothing left to send).
+  void buildFromCall(Call* call,
+                     std::vector<Expression*>& newOperands,
+                     Module& wasm) {
+    Builder builder(wasm);
+
+    for (auto* operand : call->operands) {
+      // Process the operand. This is a copy operation, as we are trying to move
+      // (copy) code from the callsite into the called function. When we find we
+      // can copy then we do so, and when we cannot that value remains as a
+      // value sent from the call.
+      operands.push_back(ExpressionManipulator::flexibleCopy(
+        operand, wasm, [&](Expression* child) -> Expression* {
+          if (canBeMovedIntoContext(child)) {
+            // This can be moved, great: let the copy happen.
+            return nullptr;
+          }
+
+          // This cannot be moved, so we stop here: this is a value that is sent
+          // into the monomorphized function. It is a new operand in the call,
+          // and in the context operands it is a local.get, that reads that
+          // value.
+          auto paramIndex = newOperands.size();
+          newOperands.push_back(child);
+          // TODO: If one operand is a tee and another a get, we could actually
+          //       reuse the local, effectively showing the monomorphized
+          //       function that the values are the same. (But then the checks
+          //       later down to is<LocalGet> would need to check index too.)
+          return builder.makeLocalGet(paramIndex, child->type);
+        }));
+    }
+
+    // TODO: handle drop
+    dropped = false;
+  }
+
+  // Checks whether an expression can be moved into the context.
+  bool canBeMovedIntoContext(Expression* curr) {
+    // Constant numbers, funcs, strings, etc. can all be copied, so it is ok to
+    // add them to the context.
+    // TODO: Allow global.get as well, and anything else that is purely
+    //       copyable.
+    return Properties::isSingleConstantExpression(curr);
+  }
+
+  // Check if a context is trivial relative to a call, that is, the context
+  // contains no information that can allow optimization at all. Such trivial
+  // contexts can be dismissed early.
+  bool isTrivial(Call* call, Module& wasm) {
+    // Dropped contexts are not trivial.
+    if (dropped) {
+      return false;
+    }
+
+    // The context must match the call for us to compare them.
+    assert(operands.size() == call->operands.size());
+
+    // If an operand is not simply passed through, then we are not trivial.
+    auto callParams = wasm.getFunction(call->target)->getParams();
+    for (Index i = 0; i < operands.size(); i++) {
+      // A local.get of the same type implies we just pass through the value.
+      // Anything else is not trivial.
+      if (!operands[i]->is<LocalGet>() || operands[i]->type != callParams[i]) {
+        return false;
+      }
+    }
+
+    // We found nothing interesting, so this is trivial.
+    return true;
+  }
+};
+
+} // anonymous namespace
+
+} // namespace wasm
+
+namespace std {
+
+template<> struct hash<wasm::CallContext> {
+  size_t operator()(const wasm::CallContext& info) const {
+    size_t digest = hash<bool>{}(info.dropped);
+
+    wasm::rehash(digest, info.operands.size());
+    for (auto* operand : info.operands) {
+      wasm::hash_combine(digest, wasm::ExpressionAnalyzer::hash(operand));
+    }
+
+    return digest;
+  }
+};
+
+// Useful for debugging.
+[[maybe_unused]] void dump(std::ostream& o, wasm::CallContext& context) {
+  o << "CallContext{\n";
+  for (auto* operand : context.operands) {
+    o << "  " << *operand << '\n';
+  }
+  if (context.dropped) {
+    o << "  dropped\n";
+  }
+  o << "}\n";
+}
+
+} // namespace std
+
+namespace wasm {
+
+namespace {
+
 struct Monomorphize : public Pass {
-  // If set, we run some opts to see if monomorphization helps, and skip it if
-  // not.
+  // If set, we run some opts to see if monomorphization helps, and skip cases
+  // where we do not help out.
   bool onlyWhenHelpful;
 
   Monomorphize(bool onlyWhenHelpful) : onlyWhenHelpful(onlyWhenHelpful) {}
 
   void run(Module* module) override {
-    if (!module->features.hasGC()) {
-      return;
-    }
-
     // TODO: parallelize, see comments below
 
     // Note the list of all functions. We'll be adding more, and do not want to
@@ -94,7 +306,7 @@ struct Monomorphize : public Pass {
       *module, [&](Function* func) { funcNames.push_back(func->name); });
 
     // Find the calls in each function and optimize where we can, changing them
-    // to call more refined targets.
+    // to call the monomorphized targets.
     for (auto name : funcNames) {
       auto* func = module->getFunction(name);
       for (auto* call : FindAll<Call>(func->body).list) {
@@ -110,59 +322,67 @@ struct Monomorphize : public Pass {
           continue;
         }
 
-        call->target = getRefinedTarget(call, module);
+        processCall(call, *module);
       }
     }
   }
 
-  // Given a call, make a copy of the function it is calling that has more
-  // refined arguments that fit the arguments being passed perfectly.
-  Name getRefinedTarget(Call* call, Module* module) {
+  // Try to optimize a call.
+  void processCall(Call* call, Module& wasm) {
     auto target = call->target;
-    auto* func = module->getFunction(target);
+    auto* func = wasm.getFunction(target);
     if (func->imported()) {
       // Nothing to do since this calls outside of the module.
-      return target;
-    }
-    auto params = func->getParams();
-    bool hasRefinedParam = false;
-    for (Index i = 0; i < call->operands.size(); i++) {
-      if (call->operands[i]->type != params[i]) {
-        hasRefinedParam = true;
-        break;
-      }
-    }
-    if (!hasRefinedParam) {
-      // Nothing to do since all params are fully refined already.
-      return target;
+      return;
     }
 
-    std::vector<Type> refinedTypes;
-    for (auto* operand : call->operands) {
-      refinedTypes.push_back(operand->type);
-    }
-    auto refinedParams = Type(refinedTypes);
-    auto iter = funcParamMap.find({target, refinedParams});
-    if (iter != funcParamMap.end()) {
-      return iter->second;
+    // TODO: ignore calls with unreachable operands for simplicty
+
+    // Compute the call context, and the new operands that the call would send
+    // if we use that context.
+    CallContext context;
+    std::vector<Expression*> newOperands;
+    context.buildFromCall(call, newOperands, wasm);
+
+    // See if we've already evaluated this function + call context. If so, then
+    // we've memoized the result.
+    auto iter = funcContextMap.find({target, context});
+    if (iter != funcContextMap.end()) {
+      auto newTarget = iter->second;
+      if (newTarget != target) {
+        // When we computed this before we found a benefit to optimizing, and
+        // created a new monomorphized function to call. Use it by simply
+        // applying the new operands we computed, and adjusting the call target.
+        call->operands.set(newOperands);
+        call->target = newTarget;
+      }
+      return;
     }
 
-    // This is the first time we see this situation. Let's see if it is worth
-    // monomorphizing.
+    // This is the first time we see this situation. First, check if the context
+    // is trivial and has no opportunities for optimization.
+    if (context.isTrivial(call, wasm)) {
+      // Memoize the failure, and stop.
+      funcContextMap[{target, context}] = target;
+      return;
+    }
 
-    // Create a new function with refined parameters as a copy of the original.
-    auto refinedTarget = Names::getValidFunctionName(*module, target);
-    auto* refinedFunc = ModuleUtils::copyFunction(func, *module, refinedTarget);
-    TypeUpdating::updateParamTypes(refinedFunc, refinedTypes, *module);
-    refinedFunc->type = HeapType(Signature(refinedParams, func->getResults()));
+    // Create the monomorphized function that includes the call context.
+    std::unique_ptr<Function> monoFunc =
+      makeMonoFunctionWithContext(func, context, wasm);
 
-    // Assume we'll choose to use the refined target, but if we are being
-    // careful then we might change our mind.
-    auto chosenTarget = refinedTarget;
+    // Decide whether it is worth using the monomorphized function.
+    auto worthwhile = true;
     if (onlyWhenHelpful) {
       // Optimize both functions using minimal opts, hopefully enough to see if
-      // there is a benefit to the refined types (such as the new types allowing
-      // a cast to be removed).
+      // there is a benefit to the context. We optimize both to avoid confusion
+      // from the function benefiting from simply running another cycle of
+      // optimization.
+      //
+      // Note that we do *not* discard the optimizations to the original
+      // function if we decide not to optimize. We've already done them, and the
+      // function is improved, so we may as well keep them.
+      //
       // TODO: Atm this can be done many times per function as it is once per
       //       function and per set of types sent to it. Perhaps have some
       //       total limit to avoid slow runtimes.
@@ -181,23 +401,155 @@ struct Monomorphize : public Pass {
       //       keep optimizing from the current contents as we go. It's not
       //       obvious which approach is best here.
       doMinimalOpts(func);
-      doMinimalOpts(refinedFunc);
+      doMinimalOpts(monoFunc.get());
 
       auto costBefore = CostAnalyzer(func->body).cost;
-      auto costAfter = CostAnalyzer(refinedFunc->body).cost;
+      auto costAfter = CostAnalyzer(monoFunc->body).cost;
+      // TODO: We should probably only accept improvements above some minimum,
+      //       to avoid optimizing cases where we duplicate a huge function but
+      //       only optimize a tiny part of it compared to the original.
       if (costAfter >= costBefore) {
-        // We failed to improve. Remove the new function and return the old
-        // target.
-        module->removeFunction(refinedTarget);
-        chosenTarget = target;
+        worthwhile = false;
       }
     }
 
-    // Mark the chosen target in the map, so we don't do this work again: every
-    // pair of target and refinedParams is only considered once.
-    funcParamMap[{target, refinedParams}] = chosenTarget;
+    // Memoize what we decided to call here.
+    funcContextMap[{target, context}] = worthwhile ? monoFunc->name : target;
+
+    if (worthwhile) {
+      // We are using the monomorphized function, so update the call and add it
+      // to the module.
+      call->operands.set(newOperands);
+      call->target = monoFunc->name;
+
+      wasm.addFunction(std::move(monoFunc));
+    }
+  }
+
+  // Create a monomorphized function from the original + the call context. It
+  // may have different parameters, results, and may include parts of the call
+  // context.
+  std::unique_ptr<Function> makeMonoFunctionWithContext(
+    Function* func, const CallContext& context, Module& wasm) {
+
+    // The context has an operand for each one in the old function, each of
+    // which may contain reverse-inlined content. A mismatch here means we did
+    // not build the context right, or are using it with the wrong function.
+    assert(context.operands.size() == func->getNumParams());
+
+    // Pick a new name.
+    auto newName = Names::getValidFunctionName(wasm, func->name);
+
+    // Copy the function as the base for the new one.
+    auto newFunc = ModuleUtils::copyFunctionWithoutAdd(func, wasm, newName);
+
+    // Generate the new signature, and apply it to the new function.
+    std::vector<Type> newParams;
+    for (auto* operand : context.operands) {
+      // A local.get is a value that arrives in a parameter. Anything else is
+      // something that we are reverse-inlining into the function, so we don't
+      // need a param for it.
+      if (operand->is<LocalGet>()) {
+        newParams.push_back(operand->type);
+      }
+    }
+    // TODO: support changes to results.
+    auto newResults = func->getResults();
+    newFunc->type = Signature(Type(newParams), newResults);
+
+    // We must update local indexes: the new function has a  potentially
+    // different number of parameters, and parameters are at the very bottom of
+    // the local index space. We are also replacing old params with vars. To
+    // track this, map each old index to the new one.
+    std::unordered_map<Index, Index> mappedLocals;
+    auto newParamsMinusOld =
+      newFunc->getParams().size() - func->getParams().size();
+    for (Index i = 0; i < func->getNumLocals(); i++) {
+      if (func->isParam(i)) {
+        // Old params become new vars inside the function. Below we'll copy the
+        // proper values into these vars.
+        // TODO: We could avoid a var + copy when it is trivial (atm we rely on
+        //       optimizations to remove it).
+        auto local = Builder::addVar(newFunc.get(), func->getLocalType(i));
+        mappedLocals[i] = local;
+      } else {
+        // This is a var. The only thing to adjust here is that the parameters
+        // are changing.
+        mappedLocals[i] = i + newParamsMinusOld;
+      }
+    }
+
+    // Copy over local names to help debugging.
+    if (!func->localNames.empty()) {
+      newFunc->localNames.clear();
+      newFunc->localIndices.clear();
+      for (Index i = 0; i < func->getNumLocals(); i++) {
+        auto oldName = func->getLocalNameOrDefault(i);
+        if (oldName.isNull()) {
+          continue;
+        }
+
+        auto newIndex = mappedLocals[i];
+        auto newName = Names::getValidLocalName(*newFunc.get(), oldName);
+        newFunc->localNames[newIndex] = newName;
+        newFunc->localIndices[newName] = newIndex;
+      }
+    };
+
+    Builder builder(wasm);
+
+    // Surrounding the main body is the reverse-inlined content from the call
+    // context, like this:
+    //
+    //  (func $monomorphized
+    //    (..reverse-inlined parameter..)
+    //    (..old body..)
+    //  )
+    //
+    // For example, if a function that simply returns its input is called with a
+    // constant parameter, it will end up like this:
+    //
+    //  (func $monomorphized
+    //    (local $param i32)
+    //    (local.set $param (i32.const 42))  ;; reverse-inlined parameter
+    //    (local.get $param)                 ;; copied old body
+    //  )
+    //
+    // We need to add such an local.set in the prelude of the function for each
+    // operand in the context.
+    std::vector<Expression*> pre;
+    for (Index i = 0; i < context.operands.size(); i++) {
+      auto* operand = context.operands[i];
+
+      // Write the context operand (the reverse-inlined content) to the local
+      // we've allocated for this.
+      auto local = mappedLocals.at(i);
+      auto* value = ExpressionManipulator::copy(operand, wasm);
+      pre.push_back(builder.makeLocalSet(local, value));
+    }
+
+    // Map locals.
+    struct LocalUpdater : public PostWalker<LocalUpdater> {
+      const std::unordered_map<Index, Index>& mappedLocals;
+      LocalUpdater(const std::unordered_map<Index, Index>& mappedLocals)
+        : mappedLocals(mappedLocals) {}
+      void visitLocalGet(LocalGet* curr) { updateIndex(curr->index); }
+      void visitLocalSet(LocalSet* curr) { updateIndex(curr->index); }
+      void updateIndex(Index& index) {
+        auto iter = mappedLocals.find(index);
+        assert(iter != mappedLocals.end());
+        index = iter->second;
+      }
+    } localUpdater(mappedLocals);
+    localUpdater.walk(newFunc->body);
+
+    if (!pre.empty()) {
+      // Add the block after the prelude.
+      pre.push_back(newFunc->body);
+      newFunc->body = builder.makeBlock(pre);
+    }
 
-    return chosenTarget;
+    return newFunc;
   }
 
   // Run minimal function-level optimizations on a function. This optimizes at
@@ -219,19 +571,20 @@ struct Monomorphize : public Pass {
     // the entire point is that parameters now have more refined types, which
     // can lead to locals reading them being refinable as well.
     runner.add("local-subtyping");
+    // TODO: we need local propagation and escape analysis etc. -O3?
     runner.addDefaultFunctionOptimizationPasses();
     runner.setIsNested(true);
     runner.runOnFunction(func);
   }
 
-  // Maps [func name, param types] to the name of a new function whose params
-  // have those types.
+  // Maps [func name, call info] to the name of a new function which is a
+  // monomorphization of that function, specialized to that call info.
   //
-  // Note that this can contain funcParamMap{A, types} = A, that is, that maps
+  // Note that this can contain funcContextMap{A, ...} = A, that is, that maps
   // a function name to itself. That indicates we found no benefit from
-  // refining with those particular types, and saves us from computing it again
+  // monomorphizing with that context, and saves us from computing it again
   // later on.
-  std::unordered_map<std::pair<Name, Type>, Name> funcParamMap;
+  std::unordered_map<std::pair<Name, CallContext>, Name> funcContextMap;
 };
 
 } // anonymous namespace
diff --git a/test/lit/passes/monomorphize-consts.wast b/test/lit/passes/monomorphize-consts.wast
new file mode 100644
index 000000000..1dbdf1592
--- /dev/null
+++ b/test/lit/passes/monomorphize-consts.wast
@@ -0,0 +1,635 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; As in monomorphize-types.wast, test in both "always" mode, which always
+;; monomorphizes, and in "careful" mode which does it only when it appears to
+;; actually help.
+
+;; RUN: foreach %s %t wasm-opt --monomorphize-always -all -S -o - | filecheck %s --check-prefix ALWAYS
+;; RUN: foreach %s %t wasm-opt --monomorphize        -all -S -o - | filecheck %s --check-prefix CAREFUL
+
+(module
+  ;; Test that constants are monomorphized.
+
+  ;; ALWAYS:      (type $0 (func (param i32)))
+
+  ;; ALWAYS:      (type $1 (func))
+
+  ;; ALWAYS:      (type $2 (func (param i32 i32 funcref stringref)))
+
+  ;; ALWAYS:      (type $3 (func (param i32) (result i32)))
+
+  ;; ALWAYS:      (type $4 (func (result i32)))
+
+  ;; ALWAYS:      (import "a" "b" (func $import (type $0) (param i32)))
+  ;; CAREFUL:      (type $0 (func))
+
+  ;; CAREFUL:      (type $1 (func (param i32 i32 funcref stringref)))
+
+  ;; CAREFUL:      (type $2 (func (param i32)))
+
+  ;; CAREFUL:      (type $3 (func (param i32) (result i32)))
+
+  ;; CAREFUL:      (import "a" "b" (func $import (type $2) (param i32)))
+  (import "a" "b" (func $import (param i32)))
+
+  ;; ALWAYS:      (elem declare func $calls)
+
+  ;; ALWAYS:      (func $calls (type $1)
+  ;; ALWAYS-NEXT:  (call $target_9
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 2)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (call $target_9
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 3)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (call $target_10
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 2)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (elem declare func $calls)
+
+  ;; CAREFUL:      (func $calls (type $0)
+  ;; CAREFUL-NEXT:  (call $target
+  ;; CAREFUL-NEXT:   (i32.const 1)
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 2)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (ref.func $calls)
+  ;; CAREFUL-NEXT:   (string.const "foo")
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT:  (call $target
+  ;; CAREFUL-NEXT:   (i32.const 1)
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 3)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (ref.func $calls)
+  ;; CAREFUL-NEXT:   (string.const "foo")
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT:  (call $target
+  ;; CAREFUL-NEXT:   (i32.const 3)
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 2)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (ref.func $calls)
+  ;; CAREFUL-NEXT:   (string.const "foo")
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $calls
+    ;; All but the eqz parameter are constants that can be handled. In ALWAYS
+    ;; mode we optimize and remove all but that one. In CAREFUL mode we end up
+    ;; not doing anything, as the target function's body optimizes out anyhow
+    ;; (so there is no benefit from monomorphization, after opts).
+    (call $target
+      (i32.const 1)
+      (i32.eqz
+        (i32.const 2)
+      )
+      (ref.func $calls)
+      (string.const "foo")
+    )
+
+    ;; This has the same effective call context, as the constants are identical,
+    ;; and the non-constant is different, which we keep as a variable anyhow.
+    ;; This will call the same refined function as the previous call.
+    (call $target
+      (i32.const 1)
+      (i32.eqz
+        (i32.const 3)  ;; this changed
+      )
+      (ref.func $calls)
+      (string.const "foo")
+    )
+
+    ;; This has a different call context: one constant is different, so we'll
+    ;; call a different refined function.
+    (call $target
+      (i32.const 3)  ;; this changed
+      (i32.eqz
+        (i32.const 2)
+      )
+      (ref.func $calls)
+      (string.const "foo")
+    )
+  )
+
+  ;; ALWAYS:      (func $more-calls (type $1)
+  ;; ALWAYS-NEXT:  (call $target_9
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 999)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (call $other-target_11
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 999)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (call $work_12
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 999)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $more-calls (type $0)
+  ;; CAREFUL-NEXT:  (call $target
+  ;; CAREFUL-NEXT:   (i32.const 1)
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 999)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (ref.func $calls)
+  ;; CAREFUL-NEXT:   (string.const "foo")
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT:  (call $other-target
+  ;; CAREFUL-NEXT:   (i32.const 1)
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 999)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (ref.func $calls)
+  ;; CAREFUL-NEXT:   (string.const "foo")
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT:  (call $work_9
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 999)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $more-calls
+    ;; Identical to the first call in the previous function (except for the non-
+    ;; constant second param, which is ok to be different). We should call the
+    ;; same refined function before, even though we are in a different
+    ;; function here.
+    (call $target
+      (i32.const 1)
+      (i32.eqz
+        (i32.const 999)
+      )
+      (ref.func $calls)
+      (string.const "foo")
+    )
+
+    ;; Call a different function but with the exact same params. This tests that
+    ;; we handle identical contexts but with different functions. This will call
+    ;; a different refined function than before
+    (call $other-target
+      (i32.const 1)
+      (i32.eqz
+        (i32.const 999)
+      )
+      (ref.func $calls)
+      (string.const "foo")
+    )
+
+    ;; Call yet another different function with the same context, this time the
+    ;; function is worth optimizing even in CAREFUL mode, as the constants
+    ;; unlock actual work.
+    (call $work
+      (i32.const 3)
+      (i32.eqz
+        (i32.const 999)
+      )
+      (ref.func $calls)
+      (string.const "foo")
+    )
+  )
+
+  ;; ALWAYS:      (func $fail (type $1)
+  ;; ALWAYS-NEXT:  (call $target
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 1)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 999)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:   (block (result funcref)
+  ;; ALWAYS-NEXT:    (ref.func $calls)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:   (block (result stringref)
+  ;; ALWAYS-NEXT:    (string.const "foo")
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $fail (type $0)
+  ;; CAREFUL-NEXT:  (call $target
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 1)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 999)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (block (result funcref)
+  ;; CAREFUL-NEXT:    (ref.func $calls)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (block (result stringref)
+  ;; CAREFUL-NEXT:    (string.const "foo")
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $fail
+    ;; No operand is a constant here, so we do nothing.
+    (call $target
+      (i32.eqz
+        (i32.const 1)
+      )
+      (i32.eqz
+        (i32.const 999)
+      )
+      (block (result funcref)
+        (ref.func $calls)
+      )
+      (block (result stringref)
+        (string.const "foo")
+      )
+    )
+  )
+
+  ;; ALWAYS:      (func $mutual-recursion-a (type $3) (param $x i32) (result i32)
+  ;; ALWAYS-NEXT:  (if (result i32)
+  ;; ALWAYS-NEXT:   (local.get $x)
+  ;; ALWAYS-NEXT:   (then
+  ;; ALWAYS-NEXT:    (i32.add
+  ;; ALWAYS-NEXT:     (call $mutual-recursion-b
+  ;; ALWAYS-NEXT:      (local.get $x)
+  ;; ALWAYS-NEXT:     )
+  ;; ALWAYS-NEXT:     (call $mutual-recursion-b_13)
+  ;; ALWAYS-NEXT:    )
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:   (else
+  ;; ALWAYS-NEXT:    (i32.const 42)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $mutual-recursion-a (type $3) (param $0 i32) (result i32)
+  ;; CAREFUL-NEXT:  (if (result i32)
+  ;; CAREFUL-NEXT:   (local.get $0)
+  ;; CAREFUL-NEXT:   (then
+  ;; CAREFUL-NEXT:    (i32.add
+  ;; CAREFUL-NEXT:     (call $mutual-recursion-b
+  ;; CAREFUL-NEXT:      (local.get $0)
+  ;; CAREFUL-NEXT:     )
+  ;; CAREFUL-NEXT:     (call $mutual-recursion-b
+  ;; CAREFUL-NEXT:      (i32.const 0)
+  ;; CAREFUL-NEXT:     )
+  ;; CAREFUL-NEXT:    )
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (else
+  ;; CAREFUL-NEXT:    (i32.const 42)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $mutual-recursion-a (param $x i32) (result i32)
+    ;; We ignore direct recursion (see test in other monomorphize-types) but we
+    ;; do handle mutual recursion normally. This also tests another function
+    ;; that can be optimized, with a different signature than before.
+    (if (result i32)
+      (local.get $x)
+      (then
+        (i32.add
+          ;; This call cannot be monomorphized.
+          (call $mutual-recursion-b
+            (local.get $x)
+          )
+          ;; The constant here allows us to monomorphize (in ALWAYS; to see the
+          ;; benefit in CAREFUL, we need additional cycles, which we do not do
+          ;; yet).
+          (call $mutual-recursion-b
+            (i32.const 0)
+          )
+        )
+      )
+      (else
+        (i32.const 42)
+      )
+    )
+  )
+
+  ;; ALWAYS:      (func $mutual-recursion-b (type $3) (param $x i32) (result i32)
+  ;; ALWAYS-NEXT:  (i32.add
+  ;; ALWAYS-NEXT:   (call $mutual-recursion-a_14)
+  ;; ALWAYS-NEXT:   (i32.const 1337)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $mutual-recursion-b (type $3) (param $0 i32) (result i32)
+  ;; CAREFUL-NEXT:  (i32.add
+  ;; CAREFUL-NEXT:   (call $mutual-recursion-a
+  ;; CAREFUL-NEXT:    (i32.const 0)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:   (i32.const 1337)
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $mutual-recursion-b (param $x i32) (result i32)
+    (i32.add
+      ;; This can be optimized (in ALWAYS; to see the benefit in CAREFUL, we
+      ;; need additional cycles, which we do not do yet).
+      (call $mutual-recursion-a
+        (i32.const 0)
+      )
+      (i32.const 1337)
+    )
+  )
+
+  ;; ALWAYS:      (func $target (type $2) (param $x i32) (param $y i32) (param $func funcref) (param $str stringref)
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $x)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $y)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $func)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $str)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $target (type $1) (param $0 i32) (param $1 i32) (param $2 funcref) (param $3 stringref)
+  ;; CAREFUL-NEXT:  (nop)
+  ;; CAREFUL-NEXT: )
+  (func $target (param $x i32) (param $y i32) (param $func funcref) (param $str stringref)
+    (drop
+      (local.get $x)
+    )
+    (drop
+      (local.get $y)
+    )
+    (drop
+      (local.get $func)
+    )
+    (drop
+      (local.get $str)
+    )
+  )
+
+  ;; ALWAYS:      (func $other-target (type $2) (param $x i32) (param $y i32) (param $func funcref) (param $str stringref)
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $func)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $str)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $x)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (drop
+  ;; ALWAYS-NEXT:   (local.get $y)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $other-target (type $1) (param $0 i32) (param $1 i32) (param $2 funcref) (param $3 stringref)
+  ;; CAREFUL-NEXT:  (nop)
+  ;; CAREFUL-NEXT: )
+  (func $other-target (param $x i32) (param $y i32) (param $func funcref) (param $str stringref)
+    ;; Similar to $target, but the inside is a little reordered.
+    (drop
+      (local.get $func)
+    )
+    (drop
+      (local.get $str)
+    )
+    (drop
+      (local.get $x)
+    )
+    (drop
+      (local.get $y)
+    )
+  )
+
+  ;; ALWAYS:      (func $work (type $2) (param $x i32) (param $y i32) (param $func funcref) (param $str stringref)
+  ;; ALWAYS-NEXT:  (call $import
+  ;; ALWAYS-NEXT:   (i32.add
+  ;; ALWAYS-NEXT:    (local.get $x)
+  ;; ALWAYS-NEXT:    (i32.add
+  ;; ALWAYS-NEXT:     (ref.is_null
+  ;; ALWAYS-NEXT:      (local.get $func)
+  ;; ALWAYS-NEXT:     )
+  ;; ALWAYS-NEXT:     (ref.is_null
+  ;; ALWAYS-NEXT:      (local.get $str)
+  ;; ALWAYS-NEXT:     )
+  ;; ALWAYS-NEXT:    )
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT:  (call $import
+  ;; ALWAYS-NEXT:   (local.get $y)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $work (type $1) (param $0 i32) (param $1 i32) (param $2 funcref) (param $3 stringref)
+  ;; CAREFUL-NEXT:  (call $import
+  ;; CAREFUL-NEXT:   (i32.add
+  ;; CAREFUL-NEXT:    (local.get $0)
+  ;; CAREFUL-NEXT:    (i32.add
+  ;; CAREFUL-NEXT:     (ref.is_null
+  ;; CAREFUL-NEXT:      (local.get $2)
+  ;; CAREFUL-NEXT:     )
+  ;; CAREFUL-NEXT:     (ref.is_null
+  ;; CAREFUL-NEXT:      (local.get $3)
+  ;; CAREFUL-NEXT:     )
+  ;; CAREFUL-NEXT:    )
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT:  (call $import
+  ;; CAREFUL-NEXT:   (local.get $1)
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $work (param $x i32) (param $y i32) (param $func funcref) (param $str stringref)
+    ;; Similar to $target, but the inside has actual work that can be optimized
+    ;; away if we have constants here. Specifically the refs are not null and
+    ;; $x is 3, so we sent 3 to the import here.
+    (call $import
+      (i32.add
+        (local.get $x)
+        (i32.add
+          (ref.is_null
+            (local.get $func)
+          )
+          (ref.is_null
+            (local.get $str)
+          )
+        )
+      )
+    )
+    ;; This parameter is unknown, so we can't do any optimization in this part.
+    (call $import
+      (local.get $y)
+    )
+  )
+)
+;; ALWAYS:      (func $target_9 (type $0) (param $0 i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local $y i32)
+;; ALWAYS-NEXT:  (local $func funcref)
+;; ALWAYS-NEXT:  (local $str stringref)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (i32.const 1)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $y
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $func
+;; ALWAYS-NEXT:   (ref.func $calls)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $str
+;; ALWAYS-NEXT:   (string.const "foo")
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (block
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $x)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $y)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $func)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $str)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; ALWAYS:      (func $target_10 (type $0) (param $0 i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local $y i32)
+;; ALWAYS-NEXT:  (local $func funcref)
+;; ALWAYS-NEXT:  (local $str stringref)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (i32.const 3)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $y
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $func
+;; ALWAYS-NEXT:   (ref.func $calls)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $str
+;; ALWAYS-NEXT:   (string.const "foo")
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (block
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $x)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $y)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $func)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $str)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; ALWAYS:      (func $other-target_11 (type $0) (param $0 i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local $y i32)
+;; ALWAYS-NEXT:  (local $func funcref)
+;; ALWAYS-NEXT:  (local $str stringref)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (i32.const 1)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $y
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $func
+;; ALWAYS-NEXT:   (ref.func $calls)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $str
+;; ALWAYS-NEXT:   (string.const "foo")
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (block
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $func)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $str)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $x)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (drop
+;; ALWAYS-NEXT:    (local.get $y)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; ALWAYS:      (func $work_12 (type $0) (param $0 i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local $y i32)
+;; ALWAYS-NEXT:  (local $func funcref)
+;; ALWAYS-NEXT:  (local $str stringref)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (i32.const 3)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $y
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $func
+;; ALWAYS-NEXT:   (ref.func $calls)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $str
+;; ALWAYS-NEXT:   (string.const "foo")
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (block
+;; ALWAYS-NEXT:   (call $import
+;; ALWAYS-NEXT:    (i32.add
+;; ALWAYS-NEXT:     (local.get $x)
+;; ALWAYS-NEXT:     (i32.add
+;; ALWAYS-NEXT:      (ref.is_null
+;; ALWAYS-NEXT:       (local.get $func)
+;; ALWAYS-NEXT:      )
+;; ALWAYS-NEXT:      (ref.is_null
+;; ALWAYS-NEXT:       (local.get $str)
+;; ALWAYS-NEXT:      )
+;; ALWAYS-NEXT:     )
+;; ALWAYS-NEXT:    )
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (call $import
+;; ALWAYS-NEXT:    (local.get $y)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; ALWAYS:      (func $mutual-recursion-b_13 (type $4) (result i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (i32.const 0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (i32.add
+;; ALWAYS-NEXT:   (call $mutual-recursion-a
+;; ALWAYS-NEXT:    (i32.const 0)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (i32.const 1337)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; ALWAYS:      (func $mutual-recursion-a_14 (type $4) (result i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (i32.const 0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (if (result i32)
+;; ALWAYS-NEXT:   (local.get $x)
+;; ALWAYS-NEXT:   (then
+;; ALWAYS-NEXT:    (i32.add
+;; ALWAYS-NEXT:     (call $mutual-recursion-b
+;; ALWAYS-NEXT:      (local.get $x)
+;; ALWAYS-NEXT:     )
+;; ALWAYS-NEXT:     (call $mutual-recursion-b_13)
+;; ALWAYS-NEXT:    )
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (else
+;; ALWAYS-NEXT:    (i32.const 42)
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; CAREFUL:      (func $work_9 (type $2) (param $0 i32)
+;; CAREFUL-NEXT:  (call $import
+;; CAREFUL-NEXT:   (i32.const 3)
+;; CAREFUL-NEXT:  )
+;; CAREFUL-NEXT:  (call $import
+;; CAREFUL-NEXT:   (local.get $0)
+;; CAREFUL-NEXT:  )
+;; CAREFUL-NEXT: )
diff --git a/test/lit/passes/monomorphize-mvp.wast b/test/lit/passes/monomorphize-mvp.wast
new file mode 100644
index 000000000..567a4c2ce
--- /dev/null
+++ b/test/lit/passes/monomorphize-mvp.wast
@@ -0,0 +1,94 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; As in monomorphize-types.wast, test in both "always" mode, which always
+;; monomorphizes, and in "careful" mode which does it only when it appears to
+;; actually help.
+
+;; This file specifically tests that we optimize constants in MVP mode (most
+;; of the pass benefits from other features, but we should still do work in
+;; MVP).
+
+;; RUN: foreach %s %t wasm-opt --monomorphize-always -S -o - | filecheck %s --check-prefix ALWAYS
+;; RUN: foreach %s %t wasm-opt --monomorphize        -S -o - | filecheck %s --check-prefix CAREFUL
+
+(module
+  ;; ALWAYS:      (type $0 (func (result i32)))
+
+  ;; ALWAYS:      (type $1 (func (param i32 i32) (result i32)))
+
+  ;; ALWAYS:      (type $2 (func (param i32) (result i32)))
+
+  ;; ALWAYS:      (func $call (result i32)
+  ;; ALWAYS-NEXT:  (call $target_2
+  ;; ALWAYS-NEXT:   (i32.eqz
+  ;; ALWAYS-NEXT:    (i32.const 2)
+  ;; ALWAYS-NEXT:   )
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (type $0 (func (result i32)))
+
+  ;; CAREFUL:      (type $1 (func (param i32 i32) (result i32)))
+
+  ;; CAREFUL:      (type $2 (func (param i32) (result i32)))
+
+  ;; CAREFUL:      (func $call (result i32)
+  ;; CAREFUL-NEXT:  (call $target_2
+  ;; CAREFUL-NEXT:   (i32.eqz
+  ;; CAREFUL-NEXT:    (i32.const 2)
+  ;; CAREFUL-NEXT:   )
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $call (result i32)
+    ;; The second parameter can be monomorphized.
+    (call $target
+      (i32.eqz
+        (i32.const 2)
+      )
+      (i32.const 1)
+    )
+  )
+
+  ;; ALWAYS:      (func $target (param $x i32) (param $y i32) (result i32)
+  ;; ALWAYS-NEXT:  (select
+  ;; ALWAYS-NEXT:   (local.get $x)
+  ;; ALWAYS-NEXT:   (i32.const 42)
+  ;; ALWAYS-NEXT:   (local.get $y)
+  ;; ALWAYS-NEXT:  )
+  ;; ALWAYS-NEXT: )
+  ;; CAREFUL:      (func $target (param $0 i32) (param $1 i32) (result i32)
+  ;; CAREFUL-NEXT:  (select
+  ;; CAREFUL-NEXT:   (local.get $0)
+  ;; CAREFUL-NEXT:   (i32.const 42)
+  ;; CAREFUL-NEXT:   (local.get $1)
+  ;; CAREFUL-NEXT:  )
+  ;; CAREFUL-NEXT: )
+  (func $target (param $x i32) (param $y i32) (result i32)
+    ;; The monomorphized copies of this function will be able to remove the
+    ;; select, in CAREFUL (which optimizes).
+    (select
+      (local.get $x)
+      (i32.const 42)
+      (local.get $y)
+    )
+  )
+)
+
+;; ALWAYS:      (func $target_2 (param $0 i32) (result i32)
+;; ALWAYS-NEXT:  (local $x i32)
+;; ALWAYS-NEXT:  (local $y i32)
+;; ALWAYS-NEXT:  (local.set $x
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (local.set $y
+;; ALWAYS-NEXT:   (i32.const 1)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT:  (select
+;; ALWAYS-NEXT:   (local.get $x)
+;; ALWAYS-NEXT:   (i32.const 42)
+;; ALWAYS-NEXT:   (local.get $y)
+;; ALWAYS-NEXT:  )
+;; ALWAYS-NEXT: )
+
+;; CAREFUL:      (func $target_2 (param $0 i32) (result i32)
+;; CAREFUL-NEXT:  (local.get $0)
+;; CAREFUL-NEXT: )
diff --git a/test/lit/passes/monomorphize.wast b/test/lit/passes/monomorphize-types.wast
index 43208a97d..3133d88c5 100644
--- a/test/lit/passes/monomorphize.wast
+++ b/test/lit/passes/monomorphize-types.wast
@@ -115,7 +115,11 @@
 )
 
 
-;; ALWAYS:      (func $refinable_4 (type $4) (param $ref (ref $B))
+;; ALWAYS:      (func $refinable_4 (type $4) (param $0 (ref $B))
+;; ALWAYS-NEXT:  (local $ref (ref $A))
+;; ALWAYS-NEXT:  (local.set $ref
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT:  (drop
 ;; ALWAYS-NEXT:   (local.get $ref)
 ;; ALWAYS-NEXT:  )
@@ -186,17 +190,17 @@
 )
 
 
-;; ALWAYS:      (func $refinable_2 (type $4) (param $ref (ref $B))
+;; ALWAYS:      (func $refinable_2 (type $4) (param $0 (ref $B))
 ;; ALWAYS-NEXT:  (local $unref (ref $A))
-;; ALWAYS-NEXT:  (local $2 (ref $A))
-;; ALWAYS-NEXT:  (local.set $2
-;; ALWAYS-NEXT:   (local.get $ref)
+;; ALWAYS-NEXT:  (local $ref (ref $A))
+;; ALWAYS-NEXT:  (local.set $ref
+;; ALWAYS-NEXT:   (local.get $0)
 ;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT:  (block
 ;; ALWAYS-NEXT:   (local.set $unref
-;; ALWAYS-NEXT:    (local.get $2)
+;; ALWAYS-NEXT:    (local.get $ref)
 ;; ALWAYS-NEXT:   )
-;; ALWAYS-NEXT:   (local.set $2
+;; ALWAYS-NEXT:   (local.set $ref
 ;; ALWAYS-NEXT:    (local.get $unref)
 ;; ALWAYS-NEXT:   )
 ;; ALWAYS-NEXT:  )
@@ -306,19 +310,31 @@
   )
 )
 
-;; ALWAYS:      (func $refinable1_4 (type $5) (param $ref (ref $B))
+;; ALWAYS:      (func $refinable1_4 (type $5) (param $0 (ref $B))
+;; ALWAYS-NEXT:  (local $ref (ref $A))
+;; ALWAYS-NEXT:  (local.set $ref
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT:  (drop
 ;; ALWAYS-NEXT:   (local.get $ref)
 ;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT: )
 
-;; ALWAYS:      (func $refinable1_5 (type $6) (param $ref (ref $C))
+;; ALWAYS:      (func $refinable1_5 (type $6) (param $0 (ref $C))
+;; ALWAYS-NEXT:  (local $ref (ref $A))
+;; ALWAYS-NEXT:  (local.set $ref
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT:  (drop
 ;; ALWAYS-NEXT:   (local.get $ref)
 ;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT: )
 
-;; ALWAYS:      (func $refinable2_6 (type $5) (param $ref (ref $B))
+;; ALWAYS:      (func $refinable2_6 (type $5) (param $0 (ref $B))
+;; ALWAYS-NEXT:  (local $ref (ref $A))
+;; ALWAYS-NEXT:  (local.set $ref
+;; ALWAYS-NEXT:   (local.get $0)
+;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT:  (drop
 ;; ALWAYS-NEXT:   (local.get $ref)
 ;; ALWAYS-NEXT:  )
@@ -501,33 +517,39 @@
   )
 )
 
-;; ALWAYS:      (func $refinable_3 (type $2) (param $ref (ref $B))
+;; ALWAYS:      (func $refinable_3 (type $2) (param $0 (ref $B))
 ;; ALWAYS-NEXT:  (local $x (ref $A))
-;; ALWAYS-NEXT:  (call $import
-;; ALWAYS-NEXT:   (ref.cast (ref $B)
-;; ALWAYS-NEXT:    (local.get $ref)
-;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:  (local $ref (ref $A))
+;; ALWAYS-NEXT:  (local.set $ref
+;; ALWAYS-NEXT:   (local.get $0)
 ;; ALWAYS-NEXT:  )
-;; ALWAYS-NEXT:  (local.set $x
-;; ALWAYS-NEXT:   (select (result (ref $B))
-;; ALWAYS-NEXT:    (local.get $ref)
-;; ALWAYS-NEXT:    (struct.new_default $B)
-;; ALWAYS-NEXT:    (global.get $global)
+;; ALWAYS-NEXT:  (block
+;; ALWAYS-NEXT:   (call $import
+;; ALWAYS-NEXT:    (ref.cast (ref $B)
+;; ALWAYS-NEXT:     (local.get $ref)
+;; ALWAYS-NEXT:    )
 ;; ALWAYS-NEXT:   )
-;; ALWAYS-NEXT:  )
-;; ALWAYS-NEXT:  (call $import
-;; ALWAYS-NEXT:   (ref.cast (ref $B)
-;; ALWAYS-NEXT:    (local.get $x)
+;; ALWAYS-NEXT:   (local.set $x
+;; ALWAYS-NEXT:    (select (result (ref $A))
+;; ALWAYS-NEXT:     (local.get $ref)
+;; ALWAYS-NEXT:     (struct.new_default $B)
+;; ALWAYS-NEXT:     (global.get $global)
+;; ALWAYS-NEXT:    )
 ;; ALWAYS-NEXT:   )
-;; ALWAYS-NEXT:  )
-;; ALWAYS-NEXT:  (call $import
-;; ALWAYS-NEXT:   (ref.cast (ref $B)
-;; ALWAYS-NEXT:    (local.get $x)
+;; ALWAYS-NEXT:   (call $import
+;; ALWAYS-NEXT:    (ref.cast (ref $B)
+;; ALWAYS-NEXT:     (local.get $x)
+;; ALWAYS-NEXT:    )
 ;; ALWAYS-NEXT:   )
-;; ALWAYS-NEXT:  )
-;; ALWAYS-NEXT:  (call $import
-;; ALWAYS-NEXT:   (ref.cast (ref $B)
-;; ALWAYS-NEXT:    (local.get $ref)
+;; ALWAYS-NEXT:   (call $import
+;; ALWAYS-NEXT:    (ref.cast (ref $B)
+;; ALWAYS-NEXT:     (local.get $x)
+;; ALWAYS-NEXT:    )
+;; ALWAYS-NEXT:   )
+;; ALWAYS-NEXT:   (call $import
+;; ALWAYS-NEXT:    (ref.cast (ref $B)
+;; ALWAYS-NEXT:     (local.get $ref)
+;; ALWAYS-NEXT:    )
 ;; ALWAYS-NEXT:   )
 ;; ALWAYS-NEXT:  )
 ;; ALWAYS-NEXT: )
@@ -586,3 +608,4 @@
     )
   )
 )
+
diff --git a/test/lit/passes/no-inline-monomorphize-inlining.wast b/test/lit/passes/no-inline-monomorphize-inlining.wast
index be3b5759d..716d0beda 100644
--- a/test/lit/passes/no-inline-monomorphize-inlining.wast
+++ b/test/lit/passes/no-inline-monomorphize-inlining.wast
@@ -3,9 +3,12 @@
 ;; Monomorphization creates a new function, which we can then inline. When we
 ;; mark the original as no-inline, we should not inline the copy, as the copy
 ;; inherits the metadata.
+;;
+;; Use --optimize-level=3 to ensure inlining works at the maximum (to avoid it
+;; not happening because of size limits etc.).
 
-;; RUN: foreach %s %t wasm-opt --no-inline=*noinline* --monomorphize-always --inlining -all -S -o - | filecheck %s --check-prefix NO_INLINE
-;; RUN: foreach %s %t wasm-opt                        --monomorphize-always --inlining -all -S -o - | filecheck %s --check-prefix YESINLINE
+;; RUN: foreach %s %t wasm-opt --no-inline=*noinline* --monomorphize-always --inlining --optimize-level=3 -all -S -o - | filecheck %s --check-prefix NO_INLINE
+;; RUN: foreach %s %t wasm-opt                        --monomorphize-always --inlining --optimize-level=3 -all -S -o - | filecheck %s --check-prefix YESINLINE
 
 (module
   ;; NO_INLINE:      (type $A (sub (struct )))
@@ -42,7 +45,9 @@
   ;; YESINLINE-NEXT:  (local $0 (ref $A))
   ;; YESINLINE-NEXT:  (local $1 (ref $A))
   ;; YESINLINE-NEXT:  (local $2 (ref $B))
-  ;; YESINLINE-NEXT:  (local $3 (ref $B))
+  ;; YESINLINE-NEXT:  (local $3 (ref $A))
+  ;; YESINLINE-NEXT:  (local $4 (ref $B))
+  ;; YESINLINE-NEXT:  (local $5 (ref $A))
   ;; YESINLINE-NEXT:  (block
   ;; YESINLINE-NEXT:   (block $__inlined_func$refinable_noinline
   ;; YESINLINE-NEXT:    (local.set $0
@@ -68,18 +73,28 @@
   ;; YESINLINE-NEXT:    (local.set $2
   ;; YESINLINE-NEXT:     (struct.new_default $B)
   ;; YESINLINE-NEXT:    )
-  ;; YESINLINE-NEXT:    (drop
-  ;; YESINLINE-NEXT:     (local.get $2)
+  ;; YESINLINE-NEXT:    (block
+  ;; YESINLINE-NEXT:     (local.set $3
+  ;; YESINLINE-NEXT:      (local.get $2)
+  ;; YESINLINE-NEXT:     )
+  ;; YESINLINE-NEXT:     (drop
+  ;; YESINLINE-NEXT:      (local.get $3)
+  ;; YESINLINE-NEXT:     )
   ;; YESINLINE-NEXT:    )
   ;; YESINLINE-NEXT:   )
   ;; YESINLINE-NEXT:  )
   ;; YESINLINE-NEXT:  (block
   ;; YESINLINE-NEXT:   (block $__inlined_func$refinable_noinline_2$3
-  ;; YESINLINE-NEXT:    (local.set $3
+  ;; YESINLINE-NEXT:    (local.set $4
   ;; YESINLINE-NEXT:     (struct.new_default $B)
   ;; YESINLINE-NEXT:    )
-  ;; YESINLINE-NEXT:    (drop
-  ;; YESINLINE-NEXT:     (local.get $3)
+  ;; YESINLINE-NEXT:    (block
+  ;; YESINLINE-NEXT:     (local.set $5
+  ;; YESINLINE-NEXT:      (local.get $4)
+  ;; YESINLINE-NEXT:     )
+  ;; YESINLINE-NEXT:     (drop
+  ;; YESINLINE-NEXT:      (local.get $5)
+  ;; YESINLINE-NEXT:     )
   ;; YESINLINE-NEXT:    )
   ;; YESINLINE-NEXT:   )
   ;; YESINLINE-NEXT:  )
@@ -118,7 +133,11 @@
     )
   )
 )
-;; NO_INLINE:      (func $refinable_noinline_2 (type $4) (param $ref (ref $B))
+;; NO_INLINE:      (func $refinable_noinline_2 (type $4) (param $0 (ref $B))
+;; NO_INLINE-NEXT:  (local $ref (ref $A))
+;; NO_INLINE-NEXT:  (local.set $ref
+;; NO_INLINE-NEXT:   (local.get $0)
+;; NO_INLINE-NEXT:  )
 ;; NO_INLINE-NEXT:  (drop
 ;; NO_INLINE-NEXT:   (local.get $ref)
 ;; NO_INLINE-NEXT:  )