summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/asm2wasm.h3
-rw-r--r--src/ir/LocalGraph.cpp33
-rw-r--r--src/ir/local-graph.h33
-rw-r--r--src/pass.h28
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/CodePushing.cpp6
-rw-r--r--src/passes/OptimizeAddedConstants.cpp239
-rw-r--r--src/passes/PickLoadSigns.cpp38
-rw-r--r--src/passes/PostEmscripten.cpp60
-rw-r--r--src/passes/Precompute.cpp7
-rw-r--r--src/passes/SafeHeap.cpp9
-rw-r--r--src/passes/pass.cpp9
-rw-r--r--src/passes/passes.h2
-rw-r--r--src/tools/optimization-options.h5
-rw-r--r--src/wasm.h6
-rw-r--r--src/wasm/wasm-s-parser.cpp4
16 files changed, 382 insertions, 101 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h
index c193791f8..9a68542a5 100644
--- a/src/asm2wasm.h
+++ b/src/asm2wasm.h
@@ -957,6 +957,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
}
optimizingBuilder = make_unique<OptimizingIncrementalModuleBuilder>(&wasm, numFunctions, passOptions, [&](PassRunner& passRunner) {
// addPrePasses
+ passRunner.options.lowMemoryUnused = true;
if (debug) {
passRunner.setDebug(true);
passRunner.setValidateGlobally(false);
@@ -1189,6 +1190,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
// functions). Optimize those now. Typically there are very few, just do it
// sequentially.
PassRunner passRunner(&wasm, passOptions);
+ passRunner.options.lowMemoryUnused = true;
passRunner.addDefaultFunctionOptimizationPasses();
for (auto& pair : trappingFunctions.getFunctions()) {
auto* func = pair.second;
@@ -1447,6 +1449,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
};
PassRunner passRunner(&wasm, passOptions);
+ passRunner.options.lowMemoryUnused = true;
passRunner.setFeatures(passOptions.features);
if (debug) {
passRunner.setDebug(true);
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp
index 6a99ed44e..1cf883c19 100644
--- a/src/ir/LocalGraph.cpp
+++ b/src/ir/LocalGraph.cpp
@@ -242,5 +242,38 @@ void LocalGraph::computeInfluences() {
}
}
+void LocalGraph::computeSSAIndexes() {
+ std::unordered_map<Index, std::set<SetLocal*>> indexSets;
+ for (auto& pair : getSetses) {
+ auto* get = pair.first;
+ auto& sets = pair.second;
+ for (auto* set : sets) {
+ indexSets[get->index].insert(set);
+ }
+ }
+ for (auto& pair : locations) {
+ auto* curr = pair.first;
+ if (auto* set = curr->dynCast<SetLocal>()) {
+ auto& sets = indexSets[set->index];
+ if (sets.size() == 1 && *sets.begin() != curr) {
+ // While it has just one set, it is not the right one (us),
+ // so mark it invalid.
+ sets.clear();
+ }
+ }
+ }
+ for (auto& pair : indexSets) {
+ auto index = pair.first;
+ auto& sets = pair.second;
+ if (sets.size() == 1) {
+ SSAIndexes.insert(index);
+ }
+ }
+}
+
+bool LocalGraph::isSSA(Index x) {
+ return SSAIndexes.count(x);
+}
+
} // namespace wasm
diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h
index 725be0536..fd6a496c0 100644
--- a/src/ir/local-graph.h
+++ b/src/ir/local-graph.h
@@ -45,13 +45,40 @@ struct LocalGraph {
// value (0 for a var, the received value for a param)
Locations locations; // where each get and set is (for easy replacing)
- // optional computation: compute the influence graphs between sets and gets
- // (useful for algorithms that propagate changes)
+ // Optional: compute the influence graphs between sets and gets
+ // (useful for algorithms that propagate changes).
+
+ void computeInfluences();
std::unordered_map<GetLocal*, std::unordered_set<SetLocal*>> getInfluences; // for each get, the sets whose values are influenced by that get
std::unordered_map<SetLocal*, std::unordered_set<GetLocal*>> setInfluences; // for each set, the gets whose values are influenced by that set
- void computeInfluences();
+ // Optional: Compute the local indexes that are SSA, in the sense of
+ // * a single set for all the gets for that local index
+ // * the set dominates all the gets (logically implied by the former property)
+ // * no other set (aside from the zero-init)
+ // The third property is not exactly standard SSA, but is useful since we are not in
+ // SSA form in our IR. To see why it matters, consider these:
+ //
+ // x = 0 // zero init
+ // [..]
+ // x = 10
+ // y = x + 20
+ // x = 30 // !!!
+ // f(y)
+ //
+ // The !!! line violates that property - it is another set for x, and it may interfere
+ // say with replacing f(y) with f(x + 20). Instead, if we know the only other possible set for x
+ // is the zero init, then things like the !!! line cannot exist, and it is valid to replace
+ // f(y) with f(x + 20).
+ // (This could be simpler, but in wasm the zero init always exists.)
+
+ void computeSSAIndexes();
+
+ bool isSSA(Index x);
+
+private:
+ std::set<Index> SSAIndexes;
};
} // namespace wasm
diff --git a/src/pass.h b/src/pass.h
index 78d1f594e..9ef159d7d 100644
--- a/src/pass.h
+++ b/src/pass.h
@@ -56,14 +56,26 @@ private:
};
struct PassOptions {
- bool debug = false; // run passes in debug mode, doing extra validation and timing checks
- bool validate = true; // whether to run the validator to check for errors
- bool validateGlobally = false; // when validating validate globally and not just locally
- int optimizeLevel = 0; // 0, 1, 2 correspond to -O0, -O1, -O2, etc.
- int shrinkLevel = 0; // 0, 1, 2 correspond to -O0, -Os, -Oz
- bool ignoreImplicitTraps = false; // optimize assuming things like div by 0, bad load/store, will not trap
- bool debugInfo = false; // whether to try to preserve debug info through, which are special calls
- FeatureSet features = FeatureSet::All; // Which wasm features to accept, and be allowed to use
+ // Run passes in debug mode, doing extra validation and timing checks.
+ bool debug = false;
+ // Whether to run the validator to check for errors.
+ bool validate = true;
+ // When validating validate globally and not just locally
+ bool validateGlobally = false;
+ // 0, 1, 2 correspond to -O0, -O1, -O2, etc.
+ int optimizeLevel = 0;
+ // 0, 1, 2 correspond to -O0, -Os, -Oz
+ int shrinkLevel = 0;
+ // Optimize assuming things like div by 0, bad load/store, will not trap.
+ bool ignoreImplicitTraps = false;
+ // Optimize assuming that the low 1K of memory is not valid memory for the application
+ // to use. In that case, we can optimize load/store offsets in many cases.
+ bool lowMemoryUnused = false;
+ enum { LowMemoryBound = 1024 };
+ // Whether to try to preserve debug info through, which are special calls.
+ bool debugInfo = false;
+ // Which wasm features to accept, and be allowed to use.
+ FeatureSet features = FeatureSet::All;
void setDefaultOptimizationOptions() {
// -Os is our default
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 0ad9dbf05..2848946f8 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -31,6 +31,7 @@ SET(passes_SOURCES
MinifyImportsAndExports.cpp
NameList.cpp
NoExitRuntime.cpp
+ OptimizeAddedConstants.cpp
OptimizeInstructions.cpp
PickLoadSigns.cpp
PostEmscripten.cpp
diff --git a/src/passes/CodePushing.cpp b/src/passes/CodePushing.cpp
index 931df140d..52aab08ad 100644
--- a/src/passes/CodePushing.cpp
+++ b/src/passes/CodePushing.cpp
@@ -77,8 +77,8 @@ struct LocalAnalyzer : public PostWalker<LocalAnalyzer> {
}
};
-// Implement core optimization logic in a struct, used and then discarded entirely
-// for each block
+// Implements core optimization logic. Used and then discarded entirely
+// for each block.
class Pusher {
ExpressionList& list;
LocalAnalyzer& analyzer;
@@ -92,7 +92,7 @@ public:
// continuing forward.
Index relevant = list.size() - 1; // we never need to push past a final element, as
// we couldn't be used after it.
- Index nothing = -1;
+ const Index nothing = -1;
Index i = 0;
Index firstPushable = nothing;
while (i < relevant) {
diff --git a/src/passes/OptimizeAddedConstants.cpp b/src/passes/OptimizeAddedConstants.cpp
new file mode 100644
index 000000000..119d61637
--- /dev/null
+++ b/src/passes/OptimizeAddedConstants.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2019 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Optimize added constants into load/store offsets. This requires the assumption
+// that low memory is unused, so that we can replace an add (which might wrap)
+// with a load/store offset (which does not).
+//
+// The propagate option also propagates offsets across set/get local pairs.
+//
+// Optimizing constants into load/store offsets is almost always
+// beneficial for speed, as VMs can optimize these operations better.
+// If a LocalGraph is provided, this can also propagate values along get/set
+// pairs. In such a case, we may increase code size slightly or reduce
+// compressibility (e.g., replace (load (get $x)) with (load offset=Z (get $y)),
+// where Z is big enough to not fit in a single byte), but this is good for
+// speed, and may lead to code size reductions elsewhere by using fewer locals.
+//
+
+#include <wasm.h>
+#include <pass.h>
+#include <wasm-builder.h>
+#include <ir/local-graph.h>
+
+namespace wasm {
+
+template<typename T>
+class MemoryAccessOptimizer {
+public:
+ MemoryAccessOptimizer(T* curr, Module* module, LocalGraph* localGraph) : curr(curr), module(module), localGraph(localGraph) {
+ // The pointer itself may be a constant, if e.g. it was precomputed or
+ // a get that we propagated.
+ if (curr->ptr->template is<Const>()) {
+ optimizeConstantPointer();
+ return;
+ }
+ if (auto* add = curr->ptr->template dynCast<Binary>()) {
+ if (add->op == AddInt32) {
+ // Look for a constant on both sides.
+ if (tryToOptimizeConstant(add->right, add->left) ||
+ tryToOptimizeConstant(add->left, add->right)) {
+ return;
+ }
+ }
+ }
+ if (localGraph) {
+ // A final important case is a propagated add:
+ //
+ // x = y + 10
+ // ..
+ // load(x)
+ // =>
+ // x = y + 10
+ // ..
+ // load(y, offset=10)
+ //
+ // This is only valid if y does not change in the middle!
+ if (auto* get = curr->ptr->template dynCast<GetLocal>()) {
+ auto& sets = localGraph->getSetses[get];
+ if (sets.size() == 1) {
+ auto* set = *sets.begin();
+ // May be a zero-init (in which case, we can ignore it).
+ if (set) {
+ auto* value = set->value;
+ if (auto* add = value->template dynCast<Binary>()) {
+ if (add->op == AddInt32) {
+ // We can optimize on either side, but only if both we find
+ // a constant *and* the other side cannot change in the middle.
+ // TODO If it could change, we may add a new local to capture the
+ // old value.
+ if (tryToOptimizePropagatedAdd(add->right, add->left, get) ||
+ tryToOptimizePropagatedAdd(add->left, add->right, get)) {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+private:
+ T* curr;
+ Module* module;
+ LocalGraph* localGraph;
+
+ void optimizeConstantPointer() {
+ // The constant and an offset are interchangeable:
+ // (load (const X)) <=> (load offset=X (const 0))
+ // It may not matter if we do this or not - it's the same size,
+ // and in both cases the compiler can see it's a constant location.
+ // For code clarity and compressibility, we prefer to put the
+ // entire address in the constant.
+ if (curr->offset) {
+ // Note that the offset may already be larger than low memory - the
+ // code may know that is valid, even if we can't. Only handle the
+ // obviously valid case where an overflow can't occur.
+ auto* c = curr->ptr->template cast<Const>();
+ uint32_t base = c->value.geti32();
+ uint32_t offset = curr->offset;
+ if (uint64_t(base) + uint64_t(offset) < (uint64_t(1) << 32)) {
+ c->value = c->value.add(Literal(uint32_t(curr->offset)));
+ curr->offset = 0;
+ }
+ }
+ }
+
+ struct Result {
+ bool succeeded;
+ Address total;
+ Result() : succeeded(false) {}
+ Result(Address total) : succeeded(true), total(total) {}
+ };
+
+ // See if we can optimize an offset from an expression. If we report
+ // success, the returned offset can be added as a replacement for the
+ // expression here.
+ bool tryToOptimizeConstant(Expression* oneSide, Expression* otherSide) {
+ if (auto* c = oneSide->template dynCast<Const>()) {
+ auto result = canOptimizeConstant(c->value);
+ if (result.succeeded) {
+ curr->offset = result.total;
+ curr->ptr = otherSide;
+ if (curr->ptr->template is<Const>()) {
+ optimizeConstantPointer();
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool tryToOptimizePropagatedAdd(Expression* oneSide, Expression* otherSide, GetLocal* ptr) {
+ if (auto* c = oneSide->template dynCast<Const>()) {
+ auto result = canOptimizeConstant(c->value);
+ if (result.succeeded) {
+ // Looks good, but we need to make sure the other side cannot change:
+ //
+ // x = y + 10
+ // y = y + 1
+ // load(x)
+ //
+ // This example should not be optimized into
+ //
+ // load(x, offset=10)
+ //
+ // If the other side is a get, we may be able to prove that we can just use that same
+ // local, if both it and the pointer are in SSA form. In that case,
+ //
+ // y = .. // single assignment that dominates all uses
+ // x = y + 10 // single assignment that dominates all uses
+ // [..]
+ // load(x) => load(y, offset=10)
+ //
+ // This is valid since dominance is transitive, so y's definition dominates the load,
+ // and it is ok to replace x with y + 10 there.
+ // TODO otherwise, create a new local
+ if (auto* get = otherSide->template dynCast<GetLocal>()) {
+ if (localGraph->isSSA(get->index) && localGraph->isSSA(ptr->index)) {
+ curr->offset = result.total;
+ curr->ptr = Builder(*module).makeGetLocal(get->index, get->type);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ // Sees if we can optimize a particular constant.
+ Result canOptimizeConstant(Literal literal) {
+ auto value = literal.geti32();
+ // Avoid uninteresting corner cases with peculiar offsets.
+ if (value >= 0 && value < PassOptions::LowMemoryBound) {
+ // The total offset must not allow reaching reasonable memory
+ // by overflowing.
+ auto total = curr->offset + value;
+ if (total < PassOptions::LowMemoryBound) {
+ return Result(total);
+ }
+ }
+ return Result();
+ }
+};
+
+struct OptimizeAddedConstants : public WalkerPass<PostWalker<OptimizeAddedConstants, UnifiedExpressionVisitor<OptimizeAddedConstants>>> {
+ bool isFunctionParallel() override { return true; }
+
+ bool propagate;
+
+ OptimizeAddedConstants(bool propagate) : propagate(propagate) {}
+
+ Pass* create() override { return new OptimizeAddedConstants(propagate); }
+
+ std::unique_ptr<LocalGraph> localGraph;
+
+ void visitLoad(Load* curr) {
+ MemoryAccessOptimizer<Load>(curr, getModule(), localGraph.get());
+ }
+
+ void visitStore(Store* curr) {
+ MemoryAccessOptimizer<Store>(curr, getModule(), localGraph.get());
+ }
+
+ void doWalkFunction(Function* func) {
+ // This pass is only valid under the assumption of unused low memory.
+ assert(getPassOptions().lowMemoryUnused);
+ if (propagate) {
+ localGraph = make_unique<LocalGraph>(func);
+ localGraph->computeSSAIndexes();
+ }
+ super::doWalkFunction(func);
+ }
+};
+
+Pass *createOptimizeAddedConstantsPass() {
+ return new OptimizeAddedConstants(false);
+}
+
+Pass *createOptimizeAddedConstantsPropagatePass() {
+ return new OptimizeAddedConstants(true);
+}
+
+} // namespace wasm
+
diff --git a/src/passes/PickLoadSigns.cpp b/src/passes/PickLoadSigns.cpp
index 827346839..fce50b4bb 100644
--- a/src/passes/PickLoadSigns.cpp
+++ b/src/passes/PickLoadSigns.cpp
@@ -46,22 +46,8 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> {
usages.resize(func->getNumLocals());
// walk
ExpressionStackWalker<PickLoadSigns>::doWalkFunction(func);
- // optimize based on the info we saw
- for (auto& pair : loads) {
- auto* load = pair.first;
- auto index = pair.second;
- auto& usage = usages[index];
- // if we can't optimize, give up
- if (usage.totalUsages == 0 || // no usages, so no idea
- usage.signedUsages + usage.unsignedUsages != usage.totalUsages || // non-sign/unsigned usages, so cannot change
- (usage.signedUsages != 0 && usage.signedBits != load->bytes * 8) || // sign usages exist but the wrong size
- (usage.unsignedUsages != 0 && usage.unsignedBits != load->bytes * 8)) { // unsigned usages exist but the wrong size
- continue;
- }
- // we can pick the optimal one. our hope is to remove 2 items per
- // signed use (two shifts), so we factor that in
- load->signed_ = usage.signedUsages * 2 >= usage.unsignedUsages;
- }
+ // optimize
+ optimize();
}
void visitGetLocal(GetLocal* curr) {
@@ -102,6 +88,26 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> {
loads[load] = curr->index;
}
}
+
+ void optimize() {
+ // optimize based on the info we saw
+ for (auto& pair : loads) {
+ auto* load = pair.first;
+ auto index = pair.second;
+ auto& usage = usages[index];
+ // if we can't optimize, give up
+ if (usage.totalUsages == 0 || // no usages, so no idea
+ usage.signedUsages + usage.unsignedUsages != usage.totalUsages || // non-sign/unsigned usages, so cannot change
+ (usage.signedUsages != 0 && usage.signedBits != load->bytes * 8) || // sign usages exist but the wrong size
+ (usage.unsignedUsages != 0 && usage.unsignedBits != load->bytes * 8)) { // unsigned usages exist but the wrong size
+ continue;
+ }
+ // we can pick the optimal one. our hope is to remove 2 items per
+ // signed use (two shifts), so we factor that in
+ load->signed_ = usage.signedUsages * 2 >= usage.unsignedUsages;
+ }
+ }
+
};
Pass *createPickLoadSignsPass() {
diff --git a/src/passes/PostEmscripten.cpp b/src/passes/PostEmscripten.cpp
index 72c0d8808..7e2bacf25 100644
--- a/src/passes/PostEmscripten.cpp
+++ b/src/passes/PostEmscripten.cpp
@@ -32,66 +32,6 @@ struct PostEmscripten : public WalkerPass<PostWalker<PostEmscripten>> {
Pass* create() override { return new PostEmscripten; }
- // When we have a Load from a local value (typically a GetLocal) plus a constant offset,
- // we may be able to fold it in.
- // The semantics of the Add are to wrap, while wasm offset semantics purposefully do
- // not wrap. So this is not always safe to do. For example, a load may depend on
- // wrapping via
- // (2^32 - 10) + 100 => wrap and load from address 90
- // Without wrapping, we get something too large, and an error. *However*, for
- // asm2wasm output coming from Emscripten, we allocate the lowest 1024 for mapped
- // globals. Mapped globals are simple types (i32, float or double), always
- // accessed directly by a single constant. Therefore if we see (..) + K where
- // K is less then 1024, then if it wraps, it wraps into [0, 1024) which is at best
- // a mapped global, but it can't be because they are accessed directly (at worst,
- // it's 0 or an unused section of memory that was reserved for mapped globlas).
- // Thus it is ok to optimize such small constants into Load offsets.
-
- #define SAFE_MAX 1024
-
- void optimizeMemoryAccess(Expression*& ptr, Address& offset) {
- while (1) {
- auto* add = ptr->dynCast<Binary>();
- if (!add) break;
- if (add->op != AddInt32) break;
- auto* left = add->left->dynCast<Const>();
- auto* right = add->right->dynCast<Const>();
- // note: in optimized code, we shouldn't see an add of two constants, so don't worry about that much
- // (precompute would optimize that)
- if (left) {
- auto value = left->value.geti32();
- if (value >= 0 && value < SAFE_MAX) {
- offset = offset + value;
- ptr = add->right;
- continue;
- }
- }
- if (right) {
- auto value = right->value.geti32();
- if (value >= 0 && value < SAFE_MAX) {
- offset = offset + value;
- ptr = add->left;
- continue;
- }
- }
- break;
- }
- // finally ptr may be a const, but it isn't worth folding that in (we still have a const); in fact,
- // it's better to do the opposite for gzip purposes as well as for readability.
- auto* last = ptr->dynCast<Const>();
- if (last) {
- last->value = Literal(int32_t(last->value.geti32() + offset));
- offset = 0;
- }
- }
-
- void visitLoad(Load* curr) {
- optimizeMemoryAccess(curr->ptr, curr->offset);
- }
- void visitStore(Store* curr) {
- optimizeMemoryAccess(curr->ptr, curr->offset);
- }
-
void visitCall(Call* curr) {
// special asm.js imports can be optimized
auto* func = getModule()->getFunction(curr->target);
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
index c23babda4..5e9d9c9ea 100644
--- a/src/passes/Precompute.cpp
+++ b/src/passes/Precompute.cpp
@@ -15,7 +15,11 @@
*/
//
-// Computes code at compile time where possible.
+// Computes code at compile time where possible, replacing it with the
+// computed constant.
+//
+// The "propagate" variant of this pass also propagates constants across
+// sets and gets, which implements a standard constant propagation.
//
// Possible nondeterminism: WebAssembly NaN signs are nondeterministic,
// and this pass may optimize e.g. a float 0 / 0 into +nan while a VM may
@@ -267,6 +271,7 @@ private:
// compute all dependencies
LocalGraph localGraph(func);
localGraph.computeInfluences();
+ localGraph.computeSSAIndexes();
// prepare the work list. we add things here that might change to a constant
// initially, that means everything
std::unordered_set<Expression*> work;
diff --git a/src/passes/SafeHeap.cpp b/src/passes/SafeHeap.cpp
index f170041e1..3cc1021ed 100644
--- a/src/passes/SafeHeap.cpp
+++ b/src/passes/SafeHeap.cpp
@@ -100,7 +100,10 @@ struct AccessInstrumenter : public WalkerPass<PostWalker<AccessInstrumenter>> {
};
struct SafeHeap : public Pass {
+ PassOptions options;
+
void run(PassRunner* runner, Module* module) override {
+ options = runner->options;
// add imports
addImports(module);
// instrument loads and stores
@@ -314,13 +317,15 @@ struct SafeHeap : public Pass {
}
Expression* makeBoundsCheck(Type type, Builder& builder, Index local, Index bytes) {
+ auto upperOp = options.lowMemoryUnused ? LtUInt32 : EqInt32;
+ auto upperBound = options.lowMemoryUnused ? PassOptions::LowMemoryBound : 0;
return builder.makeIf(
builder.makeBinary(
OrInt32,
builder.makeBinary(
- EqInt32,
+ upperOp,
builder.makeGetLocal(local, i32),
- builder.makeConst(Literal(int32_t(0)))
+ builder.makeConst(Literal(int32_t(upperBound)))
),
builder.makeBinary(
GtUInt32,
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 58a5f2f0a..3712bf19e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -100,6 +100,8 @@ void PassRegistry::registerPasses() {
registerPass("minify-imports-and-exports", "minifies both import and export names, and emits a mapping to the minified ones", createMinifyImportsAndExportsPass);
registerPass("nm", "name list", createNameListPass);
registerPass("no-exit-runtime", "removes calls to atexit(), which is valid if the C runtime will never be exited", createNoExitRuntimePass);
+ registerPass("optimize-added-constants", "optimizes added constants into load/store offsets", createOptimizeAddedConstantsPass);
+ registerPass("optimize-added-constants-propagate", "optimizes added constants into load/store offsets, propagating them across locals too", createOptimizeAddedConstantsPropagatePass);
registerPass("optimize-instructions", "optimizes instruction combinations", createOptimizeInstructionsPass);
registerPass("optimize-stack-ir", "optimize Stack IR", createOptimizeStackIRPass);
registerPass("pick-load-signs", "pick load signs based on their uses", createPickLoadSignsPass);
@@ -169,6 +171,13 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
} else {
add("precompute");
}
+ if (options.lowMemoryUnused) {
+ if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) {
+ add("optimize-added-constants-propagate");
+ } else {
+ add("optimize-added-constants");
+ }
+ }
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) {
add("code-pushing");
}
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 15493f313..ab11721fb 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -57,6 +57,8 @@ Pass* createMinifyImportsAndExportsPass();
Pass* createMetricsPass();
Pass* createNameListPass();
Pass* createNoExitRuntimePass();
+Pass* createOptimizeAddedConstantsPass();
+Pass* createOptimizeAddedConstantsPropagatePass();
Pass* createOptimizeInstructionsPass();
Pass* createOptimizeStackIRPass();
Pass* createPickLoadSignsPass();
diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h
index 53fd60e4d..4b8bfc139 100644
--- a/src/tools/optimization-options.h
+++ b/src/tools/optimization-options.h
@@ -96,6 +96,11 @@ struct OptimizationOptions : public ToolOptions {
Options::Arguments::Zero,
[this](Options*, const std::string&) {
passOptions.ignoreImplicitTraps = true;
+ })
+ .add("--low-memory-unused", "-lmu", "Optimize under the helpful assumption that the low 1K of memory is not used by the application",
+ Options::Arguments::Zero,
+ [this](Options*, const std::string&) {
+ passOptions.lowMemoryUnused = true;
});
// add passes in registry
for (const auto& p : PassRegistry::get()->getRegisteredNames()) {
diff --git a/src/wasm.h b/src/wasm.h
index a1761a430..a16dab478 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -98,12 +98,6 @@ struct Address {
Address& operator++() { ++addr; return *this; }
};
-// An offset into memory
-typedef int32_t Offset;
-
-// Types
-
-
// Operators
enum UnaryOp {
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index d7b3824a9..7fd4679b1 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -966,10 +966,10 @@ static size_t parseMemAttributes(Element& s, Address* offset, Address* align, Ad
throw ParseException("bad memory attribute immediate", s.line, s.col);
}
if (str[0] == 'a') {
- if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad align");
+ if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad align", s.line, s.col);
*align = value;
} else if (str[0] == 'o') {
- if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad offset");
+ if (value > std::numeric_limits<uint32_t>::max()) throw ParseException("bad offset", s.line, s.col);
*offset = value;
} else throw ParseException("bad memory attribute");
i++;