summaryrefslogtreecommitdiff
path: root/src/passes
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes')
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/CodePushing.cpp6
-rw-r--r--src/passes/OptimizeAddedConstants.cpp239
-rw-r--r--src/passes/PickLoadSigns.cpp38
-rw-r--r--src/passes/PostEmscripten.cpp60
-rw-r--r--src/passes/Precompute.cpp7
-rw-r--r--src/passes/SafeHeap.cpp9
-rw-r--r--src/passes/pass.cpp9
-rw-r--r--src/passes/passes.h2
9 files changed, 289 insertions, 82 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 0ad9dbf05..2848946f8 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -31,6 +31,7 @@ SET(passes_SOURCES
MinifyImportsAndExports.cpp
NameList.cpp
NoExitRuntime.cpp
+ OptimizeAddedConstants.cpp
OptimizeInstructions.cpp
PickLoadSigns.cpp
PostEmscripten.cpp
diff --git a/src/passes/CodePushing.cpp b/src/passes/CodePushing.cpp
index 931df140d..52aab08ad 100644
--- a/src/passes/CodePushing.cpp
+++ b/src/passes/CodePushing.cpp
@@ -77,8 +77,8 @@ struct LocalAnalyzer : public PostWalker<LocalAnalyzer> {
}
};
-// Implement core optimization logic in a struct, used and then discarded entirely
-// for each block
+// Implements core optimization logic. Used and then discarded entirely
+// for each block.
class Pusher {
ExpressionList& list;
LocalAnalyzer& analyzer;
@@ -92,7 +92,7 @@ public:
// continuing forward.
Index relevant = list.size() - 1; // we never need to push past a final element, as
// we couldn't be used after it.
- Index nothing = -1;
+ const Index nothing = -1;
Index i = 0;
Index firstPushable = nothing;
while (i < relevant) {
diff --git a/src/passes/OptimizeAddedConstants.cpp b/src/passes/OptimizeAddedConstants.cpp
new file mode 100644
index 000000000..119d61637
--- /dev/null
+++ b/src/passes/OptimizeAddedConstants.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2019 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Optimize added constants into load/store offsets. This requires the assumption
+// that low memory is unused, so that we can replace an add (which might wrap)
+// with a load/store offset (which does not).
+//
+// The propagate option also propagates offsets across set/get local pairs.
+//
+// Optimizing constants into load/store offsets is almost always
+// beneficial for speed, as VMs can optimize these operations better.
+// If a LocalGraph is provided, this can also propagate values along get/set
+// pairs. In such a case, we may increase code size slightly or reduce
+// compressibility (e.g., replace (load (get $x)) with (load offset=Z (get $y)),
+// where Z is big enough to not fit in a single byte), but this is good for
+// speed, and may lead to code size reductions elsewhere by using fewer locals.
+//
+
+#include <wasm.h>
+#include <pass.h>
+#include <wasm-builder.h>
+#include <ir/local-graph.h>
+
+namespace wasm {
+
+template<typename T>
+class MemoryAccessOptimizer {
+public:
+ MemoryAccessOptimizer(T* curr, Module* module, LocalGraph* localGraph) : curr(curr), module(module), localGraph(localGraph) {
+ // The pointer itself may be a constant, if e.g. it was precomputed or
+ // a get that we propagated.
+ if (curr->ptr->template is<Const>()) {
+ optimizeConstantPointer();
+ return;
+ }
+ if (auto* add = curr->ptr->template dynCast<Binary>()) {
+ if (add->op == AddInt32) {
+ // Look for a constant on both sides.
+ if (tryToOptimizeConstant(add->right, add->left) ||
+ tryToOptimizeConstant(add->left, add->right)) {
+ return;
+ }
+ }
+ }
+ if (localGraph) {
+ // A final important case is a propagated add:
+ //
+ // x = y + 10
+ // ..
+ // load(x)
+ // =>
+ // x = y + 10
+ // ..
+ // load(y, offset=10)
+ //
+ // This is only valid if y does not change in the middle!
+ if (auto* get = curr->ptr->template dynCast<GetLocal>()) {
+ auto& sets = localGraph->getSetses[get];
+ if (sets.size() == 1) {
+ auto* set = *sets.begin();
+ // May be a zero-init (in which case, we can ignore it).
+ if (set) {
+ auto* value = set->value;
+ if (auto* add = value->template dynCast<Binary>()) {
+ if (add->op == AddInt32) {
+ // We can optimize on either side, but only if both we find
+ // a constant *and* the other side cannot change in the middle.
+ // TODO If it could change, we may add a new local to capture the
+ // old value.
+ if (tryToOptimizePropagatedAdd(add->right, add->left, get) ||
+ tryToOptimizePropagatedAdd(add->left, add->right, get)) {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+private:
+ T* curr;
+ Module* module;
+ LocalGraph* localGraph;
+
+ void optimizeConstantPointer() {
+ // The constant and an offset are interchangeable:
+ // (load (const X)) <=> (load offset=X (const 0))
+ // It may not matter if we do this or not - it's the same size,
+ // and in both cases the compiler can see it's a constant location.
+ // For code clarity and compressibility, we prefer to put the
+ // entire address in the constant.
+ if (curr->offset) {
+ // Note that the offset may already be larger than low memory - the
+ // code may know that is valid, even if we can't. Only handle the
+ // obviously valid case where an overflow can't occur.
+ auto* c = curr->ptr->template cast<Const>();
+ uint32_t base = c->value.geti32();
+ uint32_t offset = curr->offset;
+ if (uint64_t(base) + uint64_t(offset) < (uint64_t(1) << 32)) {
+ c->value = c->value.add(Literal(uint32_t(curr->offset)));
+ curr->offset = 0;
+ }
+ }
+ }
+
+ struct Result {
+ bool succeeded;
+ Address total;
+ Result() : succeeded(false) {}
+ Result(Address total) : succeeded(true), total(total) {}
+ };
+
+ // See if we can optimize an offset from an expression. If we report
+ // success, the returned offset can be added as a replacement for the
+ // expression here.
+ bool tryToOptimizeConstant(Expression* oneSide, Expression* otherSide) {
+ if (auto* c = oneSide->template dynCast<Const>()) {
+ auto result = canOptimizeConstant(c->value);
+ if (result.succeeded) {
+ curr->offset = result.total;
+ curr->ptr = otherSide;
+ if (curr->ptr->template is<Const>()) {
+ optimizeConstantPointer();
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool tryToOptimizePropagatedAdd(Expression* oneSide, Expression* otherSide, GetLocal* ptr) {
+ if (auto* c = oneSide->template dynCast<Const>()) {
+ auto result = canOptimizeConstant(c->value);
+ if (result.succeeded) {
+ // Looks good, but we need to make sure the other side cannot change:
+ //
+ // x = y + 10
+ // y = y + 1
+ // load(x)
+ //
+ // This example should not be optimized into
+ //
+ // load(x, offset=10)
+ //
+ // If the other side is a get, we may be able to prove that we can just use that same
+ // local, if both it and the pointer are in SSA form. In that case,
+ //
+ // y = .. // single assignment that dominates all uses
+ // x = y + 10 // single assignment that dominates all uses
+ // [..]
+ // load(x) => load(y, offset=10)
+ //
+ // This is valid since dominance is transitive, so y's definition dominates the load,
+ // and it is ok to replace x with y + 10 there.
+ // TODO otherwise, create a new local
+ if (auto* get = otherSide->template dynCast<GetLocal>()) {
+ if (localGraph->isSSA(get->index) && localGraph->isSSA(ptr->index)) {
+ curr->offset = result.total;
+ curr->ptr = Builder(*module).makeGetLocal(get->index, get->type);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ // Sees if we can optimize a particular constant.
+ Result canOptimizeConstant(Literal literal) {
+ auto value = literal.geti32();
+ // Avoid uninteresting corner cases with peculiar offsets.
+ if (value >= 0 && value < PassOptions::LowMemoryBound) {
+ // The total offset must not allow reaching reasonable memory
+ // by overflowing.
+ auto total = curr->offset + value;
+ if (total < PassOptions::LowMemoryBound) {
+ return Result(total);
+ }
+ }
+ return Result();
+ }
+};
+
+struct OptimizeAddedConstants : public WalkerPass<PostWalker<OptimizeAddedConstants, UnifiedExpressionVisitor<OptimizeAddedConstants>>> {
+ bool isFunctionParallel() override { return true; }
+
+ bool propagate;
+
+ OptimizeAddedConstants(bool propagate) : propagate(propagate) {}
+
+ Pass* create() override { return new OptimizeAddedConstants(propagate); }
+
+ std::unique_ptr<LocalGraph> localGraph;
+
+ void visitLoad(Load* curr) {
+ MemoryAccessOptimizer<Load>(curr, getModule(), localGraph.get());
+ }
+
+ void visitStore(Store* curr) {
+ MemoryAccessOptimizer<Store>(curr, getModule(), localGraph.get());
+ }
+
+ void doWalkFunction(Function* func) {
+ // This pass is only valid under the assumption of unused low memory.
+ assert(getPassOptions().lowMemoryUnused);
+ if (propagate) {
+ localGraph = make_unique<LocalGraph>(func);
+ localGraph->computeSSAIndexes();
+ }
+ super::doWalkFunction(func);
+ }
+};
+
+Pass *createOptimizeAddedConstantsPass() {
+ return new OptimizeAddedConstants(false);
+}
+
+Pass *createOptimizeAddedConstantsPropagatePass() {
+ return new OptimizeAddedConstants(true);
+}
+
+} // namespace wasm
+
diff --git a/src/passes/PickLoadSigns.cpp b/src/passes/PickLoadSigns.cpp
index 827346839..fce50b4bb 100644
--- a/src/passes/PickLoadSigns.cpp
+++ b/src/passes/PickLoadSigns.cpp
@@ -46,22 +46,8 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> {
usages.resize(func->getNumLocals());
// walk
ExpressionStackWalker<PickLoadSigns>::doWalkFunction(func);
- // optimize based on the info we saw
- for (auto& pair : loads) {
- auto* load = pair.first;
- auto index = pair.second;
- auto& usage = usages[index];
- // if we can't optimize, give up
- if (usage.totalUsages == 0 || // no usages, so no idea
- usage.signedUsages + usage.unsignedUsages != usage.totalUsages || // non-sign/unsigned usages, so cannot change
- (usage.signedUsages != 0 && usage.signedBits != load->bytes * 8) || // sign usages exist but the wrong size
- (usage.unsignedUsages != 0 && usage.unsignedBits != load->bytes * 8)) { // unsigned usages exist but the wrong size
- continue;
- }
- // we can pick the optimal one. our hope is to remove 2 items per
- // signed use (two shifts), so we factor that in
- load->signed_ = usage.signedUsages * 2 >= usage.unsignedUsages;
- }
+ // optimize
+ optimize();
}
void visitGetLocal(GetLocal* curr) {
@@ -102,6 +88,26 @@ struct PickLoadSigns : public WalkerPass<ExpressionStackWalker<PickLoadSigns>> {
loads[load] = curr->index;
}
}
+
+ void optimize() {
+ // optimize based on the info we saw
+ for (auto& pair : loads) {
+ auto* load = pair.first;
+ auto index = pair.second;
+ auto& usage = usages[index];
+ // if we can't optimize, give up
+ if (usage.totalUsages == 0 || // no usages, so no idea
+ usage.signedUsages + usage.unsignedUsages != usage.totalUsages || // non-sign/unsigned usages, so cannot change
+ (usage.signedUsages != 0 && usage.signedBits != load->bytes * 8) || // sign usages exist but the wrong size
+ (usage.unsignedUsages != 0 && usage.unsignedBits != load->bytes * 8)) { // unsigned usages exist but the wrong size
+ continue;
+ }
+ // we can pick the optimal one. our hope is to remove 2 items per
+ // signed use (two shifts), so we factor that in
+ load->signed_ = usage.signedUsages * 2 >= usage.unsignedUsages;
+ }
+ }
+
};
Pass *createPickLoadSignsPass() {
diff --git a/src/passes/PostEmscripten.cpp b/src/passes/PostEmscripten.cpp
index 72c0d8808..7e2bacf25 100644
--- a/src/passes/PostEmscripten.cpp
+++ b/src/passes/PostEmscripten.cpp
@@ -32,66 +32,6 @@ struct PostEmscripten : public WalkerPass<PostWalker<PostEmscripten>> {
Pass* create() override { return new PostEmscripten; }
- // When we have a Load from a local value (typically a GetLocal) plus a constant offset,
- // we may be able to fold it in.
- // The semantics of the Add are to wrap, while wasm offset semantics purposefully do
- // not wrap. So this is not always safe to do. For example, a load may depend on
- // wrapping via
- // (2^32 - 10) + 100 => wrap and load from address 90
- // Without wrapping, we get something too large, and an error. *However*, for
- // asm2wasm output coming from Emscripten, we allocate the lowest 1024 for mapped
- // globals. Mapped globals are simple types (i32, float or double), always
- // accessed directly by a single constant. Therefore if we see (..) + K where
- // K is less then 1024, then if it wraps, it wraps into [0, 1024) which is at best
- // a mapped global, but it can't be because they are accessed directly (at worst,
- // it's 0 or an unused section of memory that was reserved for mapped globlas).
- // Thus it is ok to optimize such small constants into Load offsets.
-
- #define SAFE_MAX 1024
-
- void optimizeMemoryAccess(Expression*& ptr, Address& offset) {
- while (1) {
- auto* add = ptr->dynCast<Binary>();
- if (!add) break;
- if (add->op != AddInt32) break;
- auto* left = add->left->dynCast<Const>();
- auto* right = add->right->dynCast<Const>();
- // note: in optimized code, we shouldn't see an add of two constants, so don't worry about that much
- // (precompute would optimize that)
- if (left) {
- auto value = left->value.geti32();
- if (value >= 0 && value < SAFE_MAX) {
- offset = offset + value;
- ptr = add->right;
- continue;
- }
- }
- if (right) {
- auto value = right->value.geti32();
- if (value >= 0 && value < SAFE_MAX) {
- offset = offset + value;
- ptr = add->left;
- continue;
- }
- }
- break;
- }
- // finally ptr may be a const, but it isn't worth folding that in (we still have a const); in fact,
- // it's better to do the opposite for gzip purposes as well as for readability.
- auto* last = ptr->dynCast<Const>();
- if (last) {
- last->value = Literal(int32_t(last->value.geti32() + offset));
- offset = 0;
- }
- }
-
- void visitLoad(Load* curr) {
- optimizeMemoryAccess(curr->ptr, curr->offset);
- }
- void visitStore(Store* curr) {
- optimizeMemoryAccess(curr->ptr, curr->offset);
- }
-
void visitCall(Call* curr) {
// special asm.js imports can be optimized
auto* func = getModule()->getFunction(curr->target);
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
index c23babda4..5e9d9c9ea 100644
--- a/src/passes/Precompute.cpp
+++ b/src/passes/Precompute.cpp
@@ -15,7 +15,11 @@
*/
//
-// Computes code at compile time where possible.
+// Computes code at compile time where possible, replacing it with the
+// computed constant.
+//
+// The "propagate" variant of this pass also propagates constants across
+// sets and gets, which implements a standard constant propagation.
//
// Possible nondeterminism: WebAssembly NaN signs are nondeterministic,
// and this pass may optimize e.g. a float 0 / 0 into +nan while a VM may
@@ -267,6 +271,7 @@ private:
// compute all dependencies
LocalGraph localGraph(func);
localGraph.computeInfluences();
+ localGraph.computeSSAIndexes();
// prepare the work list. we add things here that might change to a constant
// initially, that means everything
std::unordered_set<Expression*> work;
diff --git a/src/passes/SafeHeap.cpp b/src/passes/SafeHeap.cpp
index f170041e1..3cc1021ed 100644
--- a/src/passes/SafeHeap.cpp
+++ b/src/passes/SafeHeap.cpp
@@ -100,7 +100,10 @@ struct AccessInstrumenter : public WalkerPass<PostWalker<AccessInstrumenter>> {
};
struct SafeHeap : public Pass {
+ PassOptions options;
+
void run(PassRunner* runner, Module* module) override {
+ options = runner->options;
// add imports
addImports(module);
// instrument loads and stores
@@ -314,13 +317,15 @@ struct SafeHeap : public Pass {
}
Expression* makeBoundsCheck(Type type, Builder& builder, Index local, Index bytes) {
+ auto upperOp = options.lowMemoryUnused ? LtUInt32 : EqInt32;
+ auto upperBound = options.lowMemoryUnused ? PassOptions::LowMemoryBound : 0;
return builder.makeIf(
builder.makeBinary(
OrInt32,
builder.makeBinary(
- EqInt32,
+ upperOp,
builder.makeGetLocal(local, i32),
- builder.makeConst(Literal(int32_t(0)))
+ builder.makeConst(Literal(int32_t(upperBound)))
),
builder.makeBinary(
GtUInt32,
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 58a5f2f0a..3712bf19e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -100,6 +100,8 @@ void PassRegistry::registerPasses() {
registerPass("minify-imports-and-exports", "minifies both import and export names, and emits a mapping to the minified ones", createMinifyImportsAndExportsPass);
registerPass("nm", "name list", createNameListPass);
registerPass("no-exit-runtime", "removes calls to atexit(), which is valid if the C runtime will never be exited", createNoExitRuntimePass);
+ registerPass("optimize-added-constants", "optimizes added constants into load/store offsets", createOptimizeAddedConstantsPass);
+ registerPass("optimize-added-constants-propagate", "optimizes added constants into load/store offsets, propagating them across locals too", createOptimizeAddedConstantsPropagatePass);
registerPass("optimize-instructions", "optimizes instruction combinations", createOptimizeInstructionsPass);
registerPass("optimize-stack-ir", "optimize Stack IR", createOptimizeStackIRPass);
registerPass("pick-load-signs", "pick load signs based on their uses", createPickLoadSignsPass);
@@ -169,6 +171,13 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
} else {
add("precompute");
}
+ if (options.lowMemoryUnused) {
+ if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) {
+ add("optimize-added-constants-propagate");
+ } else {
+ add("optimize-added-constants");
+ }
+ }
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) {
add("code-pushing");
}
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 15493f313..ab11721fb 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -57,6 +57,8 @@ Pass* createMinifyImportsAndExportsPass();
Pass* createMetricsPass();
Pass* createNameListPass();
Pass* createNoExitRuntimePass();
+Pass* createOptimizeAddedConstantsPass();
+Pass* createOptimizeAddedConstantsPropagatePass();
Pass* createOptimizeInstructionsPass();
Pass* createOptimizeStackIRPass();
Pass* createPickLoadSignsPass();