16 files changed, 382 insertions, 101 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h
index c193791f8..9a68542a5 100644
--- a/src/asm2wasm.h
+++ b/src/asm2wasm.h
@@ -957,6 +957,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
     }
     optimizingBuilder = make_unique<OptimizingIncrementalModuleBuilder>(&wasm, numFunctions, passOptions, [&](PassRunner& passRunner) {
       // addPrePasses
+      passRunner.options.lowMemoryUnused = true;
       if (debug) {
         passRunner.setDebug(true);
         passRunner.setValidateGlobally(false);
@@ -1189,6 +1190,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
     // functions). Optimize those now. Typically there are very few, just do it
     // sequentially.
     PassRunner passRunner(&wasm, passOptions);
+    passRunner.options.lowMemoryUnused = true;
     passRunner.addDefaultFunctionOptimizationPasses();
     for (auto& pair : trappingFunctions.getFunctions()) {
       auto* func = pair.second;
@@ -1447,6 +1449,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
   };
 
   PassRunner passRunner(&wasm, passOptions);
+  passRunner.options.lowMemoryUnused = true;
   passRunner.setFeatures(passOptions.features);
   if (debug) {
     passRunner.setDebug(true);
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp
index 6a99ed44e..1cf883c19 100644
--- a/src/ir/LocalGraph.cpp
+++ b/src/ir/LocalGraph.cpp
@@ -242,5 +242,38 @@ void LocalGraph::computeInfluences() {
   }
 }
 
+void LocalGraph::computeSSAIndexes() {
+  std::unordered_map<Index, std::set<SetLocal*>> indexSets;
+  for (auto& pair : getSetses) {
+    auto* get = pair.first;
+    auto& sets = pair.second;
+    for (auto* set : sets) {
+      indexSets[get->index].insert(set);
+    }
+  }
+  for (auto& pair : locations) {
+    auto* curr = pair.first;
+    if (auto* set = curr->dynCast<SetLocal>()) {
+      auto& sets = indexSets[set->index];
+      if (sets.size() == 1 && *sets.begin() != curr) {
+        // While it has just one set, it is not the right one (us),
+        // so mark it invalid.
+        sets.clear();
+      }
+    }
+  }
+  for (auto& pair : indexSets) {
+    auto index = pair.first;
+    auto& sets = pair.second;
+    if (sets.size() == 1) {
+      SSAIndexes.insert(index);
+    }
+  }
+}
+
+bool LocalGraph::isSSA(Index x) {
+  return SSAIndexes.count(x);
+}
+
 } // namespace wasm
 
diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h
index 725be0536..fd6a496c0 100644
--- a/src/ir/local-graph.h
+++ b/src/ir/local-graph.h
@@ -45,13 +45,40 @@ struct LocalGraph {
                        // value (0 for a var, the received value for a param)
   Locations locations; // where each get and set is (for easy replacing)
 
-  // optional computation: compute the influence graphs between sets and gets
-  // (useful for algorithms that propagate changes)
+  // Optional: compute the influence graphs between sets and gets
+  // (useful for algorithms that propagate changes).
+
+  void computeInfluences();
 
   std::unordered_map<GetLocal*, std::unordered_set<SetLocal*>> getInfluences; // for each get, the sets whose values are influenced by that get
   std::unordered_map<SetLocal*, std::unordered_set<GetLocal*>> setInfluences; // for each set, the gets whose values are influenced by that set
 
-  void computeInfluences();
+  // Optional: Compute the local indexes that are SSA, in the sense of
+  //  * a single set for all the gets for that local index
+  //  * the set dominates all the gets (logically implied by the former property)
+  //  * no other set (aside from the zero-init)
+  // The third property is not exactly standard SSA, but is useful since we are not in
+  // SSA form in our IR. To see why it matters, consider these:
+  //
+  // x = 0 // zero init
+  // [..]
+  // x = 10
+  // y = x + 20
+  // x = 30 // !!!
+  // f(y)
+  //
+  // The !!! line violates that property - it is another set for x, and it may interfere
+  // say with replacing f(y) with f(x + 20). Instead, if we know the only other possible set for x
+  // is the zero init, then things like the !!! line cannot exist, and it is valid to replace
+  // f(y) with f(x + 20).
+  // (This could be simpler, but in wasm the zero init always exists.)
+
+  void computeSSAIndexes();
+
+  bool isSSA(Index x);
+
+private:
+  std::set<Index> SSAIndexes;
 };
 
 } // namespace wasm
diff --git a/src/pass.h b/src/pass.h
index 78d1f594e..9ef159d7d 100644
--- a/src/pass.h
+++ b/src/pass.h
@@ -56,14 +56,26 @@ private:
 };
 
 struct PassOptions {
-  bool debug = false; // run passes in debug mode, doing extra validation and timing checks
-  bool validate = true; // whether to run the validator to check for errors
-  bool validateGlobally = false; // when validating validate globally and not just locally
-  int optimizeLevel = 0; // 0, 1, 2 correspond to -O0, -O1, -O2, etc.
-  int shrinkLevel = 0;   // 0, 1, 2 correspond to -O0, -Os, -Oz
-  bool ignoreImplicitTraps = false; // optimize assuming things like div by 0, bad load/store, will not trap
-  bool debugInfo = false; // whether to try to preserve debug info through, which are special calls
-  FeatureSet features = FeatureSet::All; // Which wasm features to accept, and be allowed to use
+  // Run passes in debug mode, doing extra validation and timing checks.
+  bool debug = false;
+  // Whether to run the validator to check for errors.
+  bool validate = true;
+  // When validating validate globally and not just locally
+  bool validateGlobally = false;
+  // 0, 1, 2 correspond to -O0, -O1, -O2, etc.
+  int optimizeLevel = 0;
+  // 0, 1, 2 correspond to -O0, -Os, -Oz
+  int shrinkLevel = 0;
+  // Optimize assuming things like div by 0, bad load/store, will not trap.
+  bool ignoreImplicitTraps = false;
+  // Optimize assuming that the low 1K of memory is not valid memory for the application
+  // to use. In that case, we can optimize load/store offsets in many cases.
+  bool lowMemoryUnused = false;
+  enum { LowMemoryBound = 1024 };
+  // Whether to try to preserve debug info through, which are special calls.
+  bool debugInfo = false;
+  // Which wasm features to accept, and be allowed to use.
+  FeatureSet features = FeatureSet::All;
 
   void setDefaultOptimizationOptions() {
     // -Os is our default
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 0ad9dbf05..2848946f8 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -31,6 +31,7 @@ SET(passes_SOURCES
   MinifyImportsAndExports.cpp
   NameList.cpp
   NoExitRuntime.cpp
+  OptimizeAddedConstants.cpp
   OptimizeInstructions.cpp
   PickLoadSigns.cpp
   PostEmscripten.cpp
diff --git a/src/passes/CodePushing.cpp b/src/passes/CodePushing.cpp
index 931df140d..52aab08ad 100644
--- a/src/passes/CodePushing.cpp
+++ b/src/passes/CodePushing.cpp
@@ -77,8 +77,8 @@ struct LocalAnalyzer : public PostWalker<LocalAnalyzer> {
   }
 };
 
-// Implement core optimization logic in a struct, used and then discarded entirely
-// for each block
+// Implements core optimization logic. Used and then discarded entirely
+// for each block.
 class Pusher {
   ExpressionList& list;
   LocalAnalyzer& analyzer;
@@ -92,7 +92,7 @@ public:
     // continuing forward.
     Index relevant = list.size() - 1; // we never need to push past a final element, as
                                       // we couldn't be used after it.
-    Index nothing = -1;
+    const Index nothing = -1;
     Index i = 0;
     Index firstPushable = nothing;
     while (i < relevant) {
diff --git a/src/passes/OptimizeAddedConstants.cpp b/src/passes/OptimizeAddedConstants.cpp
new file mode 100644
index 000000000..119d61637
--- /dev/null
+++ b/src/passes/OptimizeAddedConstants.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2019 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Optimize added constants into load/store offsets. This requires the assumption
+// that low memory is unused, so that we can replace an add (which might wrap)
+// with a load/store offset (which does not).
+//
+// The propagate option also propagates offsets across set/get local pairs.
+//
+// Optimizing constants into load/store offsets is almost always
+// beneficial for speed, as VMs can optimize these operations better.
+// If a LocalGraph is provided, this can also propagate values along get/set
+// pairs. In such a case, we may increase code size slightly or reduce
+// compressibility (e.g., replace (load (get $x)) with (load offset=Z (get $y)),
+// where Z is big enough to not fit in a single byte), but this is good for
+// speed, and may lead to code size reductions elsewhere by using fewer locals.
+//
+
+#include <wasm.h>
+#include <pass.h>
+#include <wasm-builder.h>
+#include <ir/local-graph.h>
+
+namespace wasm {
+
+template<typename T>
+class MemoryAccessOptimizer {
+public:
+  MemoryAccessOptimizer(T* curr, Module* module, LocalGraph* localGraph) : curr(curr), module(module), localGraph(localGraph) {
+    // The pointer itself may be a constant, if e.g. it was precomputed or
+    // a get that we propagated.
+    if (curr->ptr->template is<Const>()) {
+      optimizeConstantPointer();
+      return;
+    }
+    if (auto* add = curr->ptr->template dynCast<Binary>()) {
+      if (add->op == AddInt32) {
+        // Look for a constant on both sides.
+        if (tryToOptimizeConstant(add->right, add->left) ||
+            tryToOptimizeConstant(add->left, add->right)) {
+          return;
+        }
+      }
+    }
+    if (localGraph) {
+      // A final important case is a propagated add:
+      //
+      //  x = y + 10
+      //  ..
+      //  load(x)
+      // =>
+      //  x = y + 10
+      //  ..
+      //  load(y, offset=10)
+      //
+      // This is only valid if y does not change in the middle!
+      if (auto* get = curr->ptr->template dynCast<GetLocal>()) {
+        auto& sets = localGraph->getSetses[get];
+        if (sets.size() == 1) {
+          auto* set = *sets.begin();
+          // May be a zero-init (in which case, we can ignore it).
+          if (set) {
+            auto* value = set->value;
+            if (auto* add = value->template dynCast<Binary>()) {
+              if (add->op == AddInt32) {
+                // We can optimize on either side, but only if both we find
+                // a constant *and* the other side cannot change in the middle.
+                // TODO If it could change, we may add a new local to capture the
+                //      old value.
+                if (tryToOptimizePropagatedAdd(add->right, add->left, get) ||
+                   tryToOptimizePropagatedAdd(add->left, add->right, get)) {
+                  return;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+private:
+  T* curr;
+  Module* module;
+  LocalGraph* localGraph;
+
+  void optimizeConstantPointer() {
+    // The constant and an offset are interchangeable:
+    //   (load (const X))  <=>  (load offset=X (const 0))
+    // It may not matter if we do this or not - it's the same size,
+    // and in both cases the compiler can see it's a constant location.
+    // For code clarity and compressibility, we prefer to put the
+    // entire address in the constant.
+    if (curr->offset) {
+      // Note that the offset may already be larger than low memory - the
+      // code may know that is valid, even if we can't. Only handle the
+      // obviously valid case where an overflow can't occur.
+      auto* c = curr->ptr->template cast<Const>();
+      uint32_t base = c->value.geti32();
+      uint32_t offset = curr->offset;
+      if (uint64_t(base) + uint64_t(offset) < (uint64_t(1) << 32)) {
+        c->value = c->value.add(Literal(uint32_t(curr->offset)));
+        curr->offset = 0;
+      }
+    }
+  }
+
+  struct Result {
+    bool succeeded;
+    Address total;
+    Result() : succeeded(false) {}
+    Result(Address total) : succeeded(true), total(total) {}
+  };
+
+  // See if we can optimize an offset from an expression. If we report
+  // success, the returned offset can be added as a replacement for the
+  // expression here.
+  bool tryToOptimizeConstant(Expression* oneSide, Expression* otherSide) {
+    if (auto* c = oneSide->template dynCast<Const>()) {
+      auto result = canOptimizeConstant(c->value);
+      if (result.succeeded) {
+        curr->offset = result.total;
+        curr->ptr = otherSide;
+        if (curr->ptr->template is<Const>()) {
+          optimizeConstantPointer();
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool tryToOptimizePropagatedAdd(Expression* oneSide, Expression* otherSide, GetLocal* ptr) {
+    if (auto* c = oneSide->template dynCast<Const>()) {
+      auto result = canOptimizeConstant(c->value);
+      if (result.succeeded) {
+        // Looks good, but we need to make sure the other side cannot change:
+        //
+        //  x = y + 10
+        //  y = y + 1
+        //  load(x)
+        //
+        // This example should not be optimized into
+        //
+        //  load(x, offset=10)
+        //
+        // If the other side is a get, we may be able to prove that we can just use that same
+        // local, if both it and the pointer are in SSA form. In that case,
+        //
+        //  y = .. // single assignment that dominates all uses
+        //  x = y + 10 // single assignment that dominates all uses
+        //  [..]
+        //  load(x) => load(y, offset=10)
+        //
+        // This is valid since dominance is transitive, so y's definition dominates the load,
+        // and it is ok to replace x with y + 10 there.
+        // TODO otherwise, create a new local
+        if (auto* get = otherSide->template dynCast<GetLocal>()) {
+          if (localGraph->isSSA(get->index) && localGraph->isSSA(ptr->index)) {
+            curr->offset = result.total;
+            curr->ptr = Builder(*module).makeGetLocal(get->index, get->type);
+            return true;
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  // Sees if we can optimize a particular constant.
+  Result canOptimizeConstant(Literal literal) {
+    auto value = literal.geti32();
+    // Avoid uninteresting corner cases with peculiar offsets.
+    if (value >= 0 && value < PassOptions::LowMemoryBound) {
+      // The total offset must not allow reaching reasonable memory
+      // by overflowing.
+      auto total = curr->offset + value;
+      if (total < PassOptions::LowMemoryBound) {
+        return Result(total);
+      }
+    }
+    return Result();
+  }
+};
+
+struct OptimizeAddedConstants : public WalkerPass<PostWalker<OptimizeAddedConstants, UnifiedExpressionVisitor<OptimizeAddedConstants>>> {
+  bool isFunctionParallel() override { return true; }
+
+  bool propagate;
+
+  OptimizeAddedConstants(bool propagate) : propagate(propagate) {}
+
+  Pass* create() override { return new OptimizeAddedConstants(propagate); }
+
+  std::unique_ptr<LocalGraph> localGraph;
+
+  void visitLoad(Load* curr) {
+    MemoryAccessOptimizer<Load>(curr, getModule(), localGraph.get());
+  }
+
+  void visitStore(Store* curr) {
+    MemoryAccessOptimizer<Store>(curr, getModule(), localGraph.get());
+  }
+
+  void doWalkFunction(Function* func) {
+    // This pass is only valid under the assumption of unused low memory.
+    assert(getPassOptions().lowMemoryUnused);
+    if (propagate) {
+      localGraph = make_unique<LocalGraph>(func);
+      localGraph->computeSSAIndexes();
+    }
+    super::doWalkFunction(func);
+  }
+};
+
+Pass *createOptimizeAddedConstantsPass() {
+  return new OptimizeAddedConstants(false);
+}
+
+Pass *createOptimizeAddedConstantsPropagatePass() {
+  return new OptimizeAddedConstants(true);
+}
+
+} // namespace wasm
+
diff --git a/src/passes/PickLoadSigns.cpp b/src/passes/PickLoadSigns.cpp
index 827346839..fce50b4bb 100644
--- a/src/passes/PickLoadSigns.cpp
+++ b/src/passes/PickLoadSigns.cpp
@@ -46,22 +46,8 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> {
     usages.resize(func->getNumLocals());
     // walk
     ExpressionStackWalker<PickLoadSigns>::doWalkFunction(func);
-    // optimize based on the info we saw
-    for (auto& pair : loads) {
-      auto* load = pair.first;
-      auto index = pair.second;
-      auto& usage = usages[index];
-      // if we can't optimize, give up
-      if (usage.totalUsages == 0 || // no usages, so no idea
-          usage.signedUsages + usage.unsignedUsages != usage.totalUsages || // non-sign/unsigned usages, so cannot change
-          (usage.signedUsages   != 0 && usage.signedBits   != load->bytes * 8) || // sign usages exist but the wrong size
-          (usage.unsignedUsages != 0 && usage.unsignedBits != load->bytes * 8)) { // unsigned usages exist but the wrong size
-        continue;
-      }
-      // we can pick the optimal one. our hope is to remove 2 items per
-      // signed use (two shifts), so we factor that in
-      load->signed_ = usage.signedUsages * 2 >= usage.unsignedUsages;
-    }
+    // optimize
+    optimize();
   }
 
   void visitGetLocal(GetLocal* curr) {
@@ -102,6 +88,26 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> {
       loads[load] = curr->index;
     }
   }
+
+  void optimize() {
+    // optimize based on the info we saw
+    for (auto& pair : loads) {
+      auto* load = pair.first;
+      auto index = pair.second;
+      auto& usage = usages[index];
+      // if we can't optimize, give up
+      if (usage.totalUsages == 0 || // no usages, so no idea
+          usage.signedUsages + usage.unsignedUsages != usage.totalUsages || // non-sign/unsigned usages, so cannot change
+          (usage.signedUsages   != 0 && usage.signedBits   != load->bytes * 8) || // sign usages exist but the wrong size
+          (usage.unsignedUsages != 0 && usage.unsignedBits != load->bytes * 8)) { // unsigned usages exist but the wrong size
+        continue;
+      }
+      // we can pick the optimal one. our hope is to remove 2 items per
+      // signed use (two shifts), so we factor that in
+      load->signed_ = usage.signedUsages * 2 >= usage.unsignedUsages;
+    }
+  }
+
 };
 
 Pass *createPickLoadSignsPass() {
diff --git a/src/passes/PostEmscripten.cpp b/src/passes/PostEmscripten.cpp
index 72c0d8808..7e2bacf25 100644
--- a/src/passes/PostEmscripten.cpp
+++ b/src/passes/PostEmscripten.cpp
@@ -32,66 +32,6 @@ struct PostEmscripten : public WalkerPass<PostWalker<PostEmscripten>> {
 
   Pass* create() override { return new PostEmscripten; }
 
-  // When we have a Load from a local value (typically a GetLocal) plus a constant offset,
-  // we may be able to fold it in.
-  // The semantics of the Add are to wrap, while wasm offset semantics purposefully do
-  // not wrap. So this is not always safe to do. For example, a load may depend on
-  // wrapping via
-  //      (2^32 - 10) + 100   =>  wrap and load from address 90
-  // Without wrapping, we get something too large, and an error. *However*, for
-  // asm2wasm output coming from Emscripten, we allocate the lowest 1024 for mapped
-  // globals. Mapped globals are simple types (i32, float or double), always
-  // accessed directly by a single constant. Therefore if we see (..) + K where
-  // K is less then 1024, then if it wraps, it wraps into [0, 1024) which is at best
-  // a mapped global, but it can't be because they are accessed directly (at worst,
-  // it's 0 or an unused section of memory that was reserved for mapped globlas).
-  // Thus it is ok to optimize such small constants into Load offsets.
-
-  #define SAFE_MAX 1024
-
-  void optimizeMemoryAccess(Expression*& ptr, Address& offset) {
-    while (1) {
-      auto* add = ptr->dynCast<Binary>();
-      if (!add) break;
-      if (add->op != AddInt32) break;
-      auto* left = add->left->dynCast<Const>();
-      auto* right = add->right->dynCast<Const>();
-      // note: in optimized code, we shouldn't see an add of two constants, so don't worry about that much
-      // (precompute would optimize that)
-      if (left) {
-        auto value = left->value.geti32();
-        if (value >= 0 && value < SAFE_MAX) {
-          offset = offset + value;
-          ptr = add->right;
-          continue;
-        }
-      }
-      if (right) {
-        auto value = right->value.geti32();
-        if (value >= 0 && value < SAFE_MAX) {
-          offset = offset + value;
-          ptr = add->left;
-          continue;
-        }
-      }
-      break;
-    }
-    // finally ptr may be a const, but it isn't worth folding that in (we still have a const); in fact,
-    // it's better to do the opposite for gzip purposes as well as for readability.
-    auto* last = ptr->dynCast<Const>();
-    if (last) {
-      last->value = Literal(int32_t(last->value.geti32() + offset));
-      offset = 0;
-    }
-  }
-
-  void visitLoad(Load* curr) {
-    optimizeMemoryAccess(curr->ptr, curr->offset);
-  }
-  void visitStore(Store* curr) {
-    optimizeMemoryAccess(curr->ptr, curr->offset);
-  }
-
   void visitCall(Call* curr) {
     // special asm.js imports can be optimized
     auto* func = getModule()->getFunction(curr->target);
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
index c23babda4..5e9d9c9ea 100644
--- a/src/passes/Precompute.cpp
+++ b/src/passes/Precompute.cpp
@@ -15,7 +15,11 @@
  */
 
 //
-// Computes code at compile time where possible.
+// Computes code at compile time where possible, replacing it with the
+// computed constant.
+//
+// The "propagate" variant of this pass also propagates constants across
+// sets and gets, which implements a standard constant propagation.
 //
 // Possible nondeterminism: WebAssembly NaN signs are nondeterministic,
 // and this pass may optimize e.g. a float 0 / 0 into +nan while a VM may
@@ -267,6 +271,7 @@ private:
     // compute all dependencies
     LocalGraph localGraph(func);
     localGraph.computeInfluences();
+    localGraph.computeSSAIndexes();
     // prepare the work list. we add things here that might change to a constant
     // initially, that means everything
     std::unordered_set<Expression*> work;
diff --git a/src/passes/SafeHeap.cpp b/src/passes/SafeHeap.cpp
index f170041e1..3cc1021ed 100644
--- a/src/passes/SafeHeap.cpp
+++ b/src/passes/SafeHeap.cpp
@@ -100,7 +100,10 @@ struct AccessInstrumenter : public WalkerPass<PostWalker<AccessInstrumenter>> {
 };
 
 struct SafeHeap : public Pass {
+  PassOptions options;
+
   void run(PassRunner* runner, Module* module) override {
+    options = runner->options;
     // add imports
     addImports(module);
     // instrument loads and stores
@@ -314,13 +317,15 @@ struct SafeHeap : public Pass {
   }
 
   Expression* makeBoundsCheck(Type type, Builder& builder, Index local, Index bytes) {
+    auto upperOp = options.lowMemoryUnused ? LtUInt32 : EqInt32;
+    auto upperBound = options.lowMemoryUnused ? PassOptions::LowMemoryBound : 0;
     return builder.makeIf(
       builder.makeBinary(
         OrInt32,
         builder.makeBinary(
-          EqInt32,
+          upperOp,
           builder.makeGetLocal(local, i32),
-          builder.makeConst(Literal(int32_t(0)))
+          builder.makeConst(Literal(int32_t(upperBound)))
         ),
         builder.makeBinary(
           GtUInt32,
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 58a5f2f0a..3712bf19e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -100,6 +100,8 @@ void PassRegistry::registerPasses() {
   registerPass("minify-imports-and-exports", "minifies both import and export names, and emits a mapping to the minified ones", createMinifyImportsAndExportsPass);
   registerPass("nm", "name list", createNameListPass);
   registerPass("no-exit-runtime", "removes calls to atexit(), which is valid if the C runtime will never be exited", createNoExitRuntimePass);
+  registerPass("optimize-added-constants", "optimizes added constants into load/store offsets", createOptimizeAddedConstantsPass);
+  registerPass("optimize-added-constants-propagate", "optimizes added constants into load/store offsets, propagating them across locals too", createOptimizeAddedConstantsPropagatePass);
   registerPass("optimize-instructions", "optimizes instruction combinations", createOptimizeInstructionsPass);
   registerPass("optimize-stack-ir", "optimize Stack IR", createOptimizeStackIRPass);
   registerPass("pick-load-signs", "pick load signs based on their uses", createPickLoadSignsPass);
@@ -169,6 +171,13 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
   } else {
     add("precompute");
   }
+  if (options.lowMemoryUnused) {
+    if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) {
+      add("optimize-added-constants-propagate");
+    } else {
+      add("optimize-added-constants");
+    }
+  }
   if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) {
     add("code-pushing");
   }
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 15493f313..ab11721fb 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -57,6 +57,8 @@ Pass* createMinifyImportsAndExportsPass();
 Pass* createMetricsPass();
 Pass* createNameListPass();
 Pass* createNoExitRuntimePass();
+Pass* createOptimizeAddedConstantsPass();
+Pass* createOptimizeAddedConstantsPropagatePass();
 Pass* createOptimizeInstructionsPass();
 Pass* createOptimizeStackIRPass();
 Pass* createPickLoadSignsPass();
diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h
index 53fd60e4d..4b8bfc139 100644
--- a/src/tools/optimization-options.h
+++ b/src/tools/optimization-options.h
@@ -96,6 +96,11 @@ struct OptimizationOptions : public ToolOptions {
                 Options::Arguments::Zero,
                 [this](Options*, const std::string&) {
                   passOptions.ignoreImplicitTraps = true;
+                })
+           .add("--low-memory-unused", "-lmu", "Optimize under the helpful assumption that the low 1K of memory is not used by the application",
+                Options::Arguments::Zero,
+                [this](Options*, const std::string&) {
+                  passOptions.lowMemoryUnused = true;
                 });
     // add passes in registry
     for (const auto& p : PassRegistry::get()->getRegisteredNames()) {
diff --git a/src/wasm.h b/src/wasm.h
index a1761a430..a16dab478 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -98,12 +98,6 @@ struct Address {
   Address& operator++() { ++addr; return *this; }
 };
 
-// An offset into memory
-typedef int32_t Offset;
-
-// Types
-
-
 // Operators
 
 enum UnaryOp {
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index d7b3824a9..7fd4679b1 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -966,10 +966,10 @@ static size_t parseMemAttributes(Element& s, Address* offset, Address* align, Ad
       throw ParseException("bad memory attribute immediate", s.line, s.col);
     }
     if (str[0] == 'a') {
-      if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad align");
+      if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad align", s.line, s.col);
       *align = value;
     } else if (str[0] == 'o') {
-      if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad offset");
+      if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad offset", s.line, s.col);
       *offset = value;
     } else throw ParseException("bad memory attribute");
     i++;