summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ast/hashed.h59
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/LocalCSE.cpp156
-rw-r--r--src/passes/pass.cpp5
-rw-r--r--src/passes/passes.h1
-rw-r--r--test/passes/Oz.txt55
-rw-r--r--test/passes/Oz.wast57
-rw-r--r--test/passes/local-cse.txt183
-rw-r--r--test/passes/local-cse.wast163
-rw-r--r--test/passes/local-cse_ignore-implicit-traps.txt30
-rw-r--r--test/passes/local-cse_ignore-implicit-traps.wast20
11 files changed, 730 insertions, 0 deletions
diff --git a/src/ast/hashed.h b/src/ast/hashed.h
new file mode 100644
index 000000000..04ee7f401
--- /dev/null
+++ b/src/ast/hashed.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2017 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _wasm_ast_hashed_h
+
+#include "support/hash.h"
+#include "wasm.h"
+#include "ast_utils.h"
+
+namespace wasm {
+
+// An expression with a cached hash value
+struct HashedExpression {
+ Expression* expr;
+ size_t hash;
+
+ HashedExpression(Expression* expr) : expr(expr) {
+ if (expr) {
+ hash = ExpressionAnalyzer::hash(expr);
+ }
+ }
+
+ HashedExpression(const HashedExpression& other) : expr(other.expr), hash(other.hash) {}
+};
+
+struct ExpressionHasher {
+ size_t operator()(const HashedExpression value) const {
+ return value.hash;
+ }
+};
+
+struct ExpressionComparer {
+ bool operator()(const HashedExpression a, const HashedExpression b) const {
+ if (a.hash != b.hash) return false;
+ return ExpressionAnalyzer::equal(a.expr, b.expr);
+ }
+};
+
+template<typename T>
+class HashedExpressionMap : public std::unordered_map<HashedExpression, T, ExpressionHasher, ExpressionComparer> {
+};
+
+} // namespace wasm
+
+#endif // _wasm_ast_hashed_h
+
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 74f120e85..876a71d74 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -7,6 +7,7 @@ SET(passes_SOURCES
ExtractFunction.cpp
Inlining.cpp
LegalizeJSInterface.cpp
+ LocalCSE.cpp
MemoryPacking.cpp
MergeBlocks.cpp
Metrics.cpp
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp
new file mode 100644
index 000000000..6d48a1107
--- /dev/null
+++ b/src/passes/LocalCSE.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2017 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Local CSE
+//
+// In each linear area of execution,
+// * track each relevant (big enough) expression
+// * if already seen, write to a local if not already, and reuse
+// * invalidate the list as we see effects
+//
+// TODO: global, inter-block gvn etc.
+//
+
+#include <wasm.h>
+#include <wasm-builder.h>
+#include <wasm-traversal.h>
+#include <pass.h>
+#include <ast_utils.h>
+#include <ast/hashed.h>
+
+namespace wasm {
+
+const Index UNUSED = -1;
+
+struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
+ bool isFunctionParallel() override { return true; }
+
+ Pass* create() override { return new LocalCSE(); }
+
+ // information for an expression we can reuse
+ struct UsableInfo {
+ Expression** item;
+ Index index; // if not UNUSED, then the local we are assigned to, use that to reuse us
+ EffectAnalyzer effects;
+
+ UsableInfo(Expression** item, PassOptions& passOptions) : item(item), index(UNUSED), effects(passOptions, *item) {}
+ };
+
+ // a list of usables in a linear execution trace
+ typedef HashedExpressionMap<UsableInfo> Usables;
+
+ // locals in current linear execution trace, which we try to sink
+ Usables usables;
+
+ static void doNoteNonLinear(LocalCSE* self, Expression** currp) {
+ self->usables.clear();
+ }
+
+ void checkInvalidations(EffectAnalyzer& effects) {
+ // TODO: this is O(bad)
+ std::vector<HashedExpression> invalidated;
+ for (auto& sinkable : usables) {
+ if (effects.invalidates(sinkable.second.effects)) {
+ invalidated.push_back(sinkable.first);
+ }
+ }
+ for (auto index : invalidated) {
+ usables.erase(index);
+ }
+ }
+
+ std::vector<Expression*> expressionStack;
+
+ static void visitPre(LocalCSE* self, Expression** currp) {
+ // pre operations
+ Expression* curr = *currp;
+
+ EffectAnalyzer effects(self->getPassOptions());
+ if (effects.checkPre(curr)) {
+ self->checkInvalidations(effects);
+ }
+
+ self->expressionStack.push_back(curr);
+ }
+
+ static void visitPost(LocalCSE* self, Expression** currp) {
+ auto* curr = *currp;
+
+ // main operations
+ if (self->isRelevant(curr)) {
+ self->handle(currp, curr);
+ }
+
+ // post operations
+
+ EffectAnalyzer effects(self->getPassOptions());
+ if (effects.checkPost(curr)) {
+ self->checkInvalidations(effects);
+ }
+
+ self->expressionStack.pop_back();
+ }
+
+ // override scan to add a pre and a post check task to all nodes
+ static void scan(LocalCSE* self, Expression** currp) {
+ self->pushTask(visitPost, currp);
+
+ WalkerPass<LinearExecutionWalker<LocalCSE>>::scan(self, currp);
+
+ self->pushTask(visitPre, currp);
+ }
+
+ bool isRelevant(Expression* curr) {
+ if (curr->is<GetLocal>()) {
+ return false; // trivial, this is what we optimize to!
+ }
+ if (!isConcreteWasmType(curr->type)) {
+ return false; // don't bother with unreachable etc.
+ }
+ if (EffectAnalyzer(getPassOptions(), curr).hasSideEffects()) {
+ return false; // we can't combine things with side effects
+ }
+ // check what we care about TODO: use optimize/shrink levels?
+ return Measurer::measure(curr) > 1;
+ }
+
+ void handle(Expression** currp, Expression* curr) {
+ HashedExpression hashed(curr);
+ auto iter = usables.find(hashed);
+ if (iter != usables.end()) {
+ // already exists in the table, this is good to reuse
+ auto& info = iter->second;
+ if (info.index == UNUSED) {
+ // we need to assign to a local. create a new one
+ auto index = info.index = Builder::addVar(getFunction(), curr->type);
+ (*info.item) = Builder(*getModule()).makeTeeLocal(index, *info.item);
+ }
+ replaceCurrent(
+ Builder(*getModule()).makeGetLocal(info.index, curr->type)
+ );
+ } else {
+ // not in table, add this, maybe we can help others later
+ usables.emplace(std::make_pair(hashed, UsableInfo(currp, getPassOptions())));
+ }
+ }
+};
+
+Pass *createLocalCSEPass() {
+ return new LocalCSE();
+}
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 081164d90..a82b2fa58 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -71,6 +71,7 @@ void PassRegistry::registerPasses() {
registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass);
registerPass("inlining", "inlines functions (currently only ones with a single use)", createInliningPass);
registerPass("legalize-js-interface", "legalizes i64 types on the import/export boundary", createLegalizeJSInterfacePass);
+ registerPass("local-cse", "common subexpression elimination inside basic blocks", createLocalCSEPass);
registerPass("memory-packing", "packs memory into separate segments, skipping zeros", createMemoryPackingPass);
registerPass("merge-blocks", "merges blocks to their parents", createMergeBlocksPass);
registerPass("metrics", "reports metrics", createMetricsPass);
@@ -132,6 +133,10 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
add("merge-blocks");
add("optimize-instructions");
add("precompute");
+ if (options.shrinkLevel >= 2) {
+ add("local-cse"); // TODO: run this early, before first coalesce-locals. right now doing so uncovers some deficiencies we need to fix first
+ add("coalesce-locals"); // just for localCSE
+ }
add("vacuum"); // should not be needed, last few passes do not create garbage, but just to be safe
}
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 83bf556d8..c3660b048 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -31,6 +31,7 @@ Pass *createExtractFunctionPass();
Pass *createFullPrinterPass();
Pass *createInliningPass();
Pass *createLegalizeJSInterfacePass();
+Pass *createLocalCSEPass();
Pass *createLowerIfElsePass();
Pass *createMemoryPackingPass();
Pass *createMergeBlocksPass();
diff --git a/test/passes/Oz.txt b/test/passes/Oz.txt
new file mode 100644
index 000000000..7dd706ac2
--- /dev/null
+++ b/test/passes/Oz.txt
@@ -0,0 +1,55 @@
+(module
+ (type $0 (func (param i32 i32) (result i32)))
+ (type $1 (func (param i32) (result i32)))
+ (memory $0 100 100)
+ (export "localcse" (func $basics))
+ (export "localcse-2" (func $8))
+ (func $basics (type $0) (param $0 i32) (param $1 i32) (result i32)
+ (local $2 i32)
+ (i32.add
+ (tee_local $2
+ (i32.add
+ (get_local $0)
+ (get_local $1)
+ )
+ )
+ (get_local $2)
+ )
+ )
+ (func $8 (type $1) (param $0 i32) (result i32)
+ (local $1 i32)
+ (i32.store
+ (tee_local $0
+ (tee_local $1
+ (i32.add
+ (get_local $1)
+ (i32.const 4)
+ )
+ )
+ )
+ (i32.and
+ (i32.load
+ (get_local $0)
+ )
+ (i32.xor
+ (tee_local $0
+ (i32.const 74)
+ )
+ (i32.const -1)
+ )
+ )
+ )
+ (i32.store
+ (get_local $1)
+ (i32.or
+ (i32.load
+ (get_local $1)
+ )
+ (i32.and
+ (get_local $0)
+ (i32.const 8)
+ )
+ )
+ )
+ )
+)
diff --git a/test/passes/Oz.wast b/test/passes/Oz.wast
new file mode 100644
index 000000000..fc465669e
--- /dev/null
+++ b/test/passes/Oz.wast
@@ -0,0 +1,57 @@
+(module
+ (memory 100 100)
+ (func $basics (export "localcse") (param $x i32) ($param $y i32) (result i32) ;; -O3 does localcse
+ (local $x2 i32)
+ (local $y2 i32)
+ (set_local $x2
+ (i32.add (get_local $x) (get_local $y))
+ )
+ (set_local $y2
+ (i32.add (get_local $x) (get_local $y))
+ )
+ (i32.add (get_local $x2) (get_local $y2))
+ )
+ (func $8 (export "localcse-2") (param $var$0 i32) (result i32)
+ (local $var$1 i32)
+ (local $var$2 i32)
+ (local $var$3 i32)
+ (block $label$0 i32
+ (i32.store
+ (tee_local $var$2
+ (i32.add
+ (get_local $var$1)
+ (i32.const 4)
+ )
+ )
+ (i32.and
+ (i32.load
+ (get_local $var$2)
+ )
+ (i32.xor
+ (tee_local $var$2
+ (i32.const 74)
+ )
+ (i32.const -1)
+ )
+ )
+ )
+ (i32.store
+ (tee_local $var$1
+ (i32.add
+ (get_local $var$1)
+ (i32.const 4)
+ )
+ )
+ (i32.or
+ (i32.load
+ (get_local $var$1)
+ )
+ (i32.and
+ (get_local $var$2)
+ (i32.const 8)
+ )
+ )
+ )
+ )
+ )
+)
diff --git a/test/passes/local-cse.txt b/test/passes/local-cse.txt
new file mode 100644
index 000000000..1b740236e
--- /dev/null
+++ b/test/passes/local-cse.txt
@@ -0,0 +1,183 @@
+(module
+ (type $0 (func))
+ (type $1 (func (param i32) (result i32)))
+ (memory $0 100 100)
+ (func $basics (type $0)
+ (local $x i32)
+ (local $y i32)
+ (local $2 i32)
+ (local $3 i32)
+ (drop
+ (tee_local $2
+ (i32.add
+ (i32.const 1)
+ (i32.const 2)
+ )
+ )
+ )
+ (drop
+ (get_local $2)
+ )
+ (if
+ (i32.const 0)
+ (nop)
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.const 2)
+ )
+ )
+ (drop
+ (tee_local $3
+ (i32.add
+ (get_local $x)
+ (get_local $y)
+ )
+ )
+ )
+ (drop
+ (get_local $3)
+ )
+ (drop
+ (get_local $3)
+ )
+ (call $basics)
+ (drop
+ (get_local $3)
+ )
+ (set_local $x
+ (i32.const 100)
+ )
+ (drop
+ (i32.add
+ (get_local $x)
+ (get_local $y)
+ )
+ )
+ )
+ (func $recursive1 (type $0)
+ (local $x i32)
+ (local $y i32)
+ (local $2 i32)
+ (drop
+ (i32.add
+ (i32.const 1)
+ (tee_local $2
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (get_local $2)
+ )
+ )
+ (drop
+ (get_local $2)
+ )
+ )
+ (func $recursive2 (type $0)
+ (local $x i32)
+ (local $y i32)
+ (local $2 i32)
+ (drop
+ (i32.add
+ (i32.const 1)
+ (tee_local $2
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ )
+ (drop
+ (get_local $2)
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (get_local $2)
+ )
+ )
+ )
+ (func $self (type $0)
+ (local $x i32)
+ (local $y i32)
+ (local $2 i32)
+ (drop
+ (i32.add
+ (tee_local $2
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ (get_local $2)
+ )
+ )
+ (drop
+ (get_local $2)
+ )
+ )
+ (func $loads (type $0)
+ (drop
+ (i32.load
+ (i32.const 10)
+ )
+ )
+ (drop
+ (i32.load
+ (i32.const 10)
+ )
+ )
+ )
+ (func $8 (type $1) (param $var$0 i32) (result i32)
+ (local $var$1 i32)
+ (local $var$2 i32)
+ (local $var$3 i32)
+ (local $4 i32)
+ (block $label$0 i32
+ (i32.store
+ (tee_local $var$2
+ (tee_local $4
+ (i32.add
+ (get_local $var$1)
+ (i32.const 4)
+ )
+ )
+ )
+ (i32.and
+ (i32.load
+ (get_local $var$2)
+ )
+ (i32.xor
+ (tee_local $var$2
+ (i32.const 74)
+ )
+ (i32.const -1)
+ )
+ )
+ )
+ (i32.store
+ (tee_local $var$1
+ (get_local $4)
+ )
+ (i32.or
+ (i32.load
+ (get_local $var$1)
+ )
+ (i32.and
+ (get_local $var$2)
+ (i32.const 8)
+ )
+ )
+ )
+ )
+ )
+)
diff --git a/test/passes/local-cse.wast b/test/passes/local-cse.wast
new file mode 100644
index 000000000..b1155ce9d
--- /dev/null
+++ b/test/passes/local-cse.wast
@@ -0,0 +1,163 @@
+(module
+ (memory 100 100)
+ (func $basics
+ (local $x i32)
+ (local $y i32)
+ (drop
+ (i32.add (i32.const 1) (i32.const 2))
+ )
+ (drop
+ (i32.add (i32.const 1) (i32.const 2))
+ )
+ (if (i32.const 0) (nop))
+ (drop ;; we can't do this yet, non-linear
+ (i32.add (i32.const 1) (i32.const 2))
+ )
+ (drop
+ (i32.add (get_local $x) (get_local $y))
+ )
+ (drop
+ (i32.add (get_local $x) (get_local $y))
+ )
+ (drop
+ (i32.add (get_local $x) (get_local $y))
+ )
+ (call $basics) ;; side effects, but no matter for our locals
+ (drop
+ (i32.add (get_local $x) (get_local $y))
+ )
+ (set_local $x (i32.const 100))
+ (drop ;; x was changed!
+ (i32.add (get_local $x) (get_local $y))
+ )
+ )
+ (func $recursive1
+ (local $x i32)
+ (local $y i32)
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (func $recursive2
+ (local $x i32)
+ (local $y i32)
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ )
+ (func $self
+ (local $x i32)
+ (local $y i32)
+ (drop
+ (i32.add
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (func $loads
+ (drop
+ (i32.load (i32.const 10))
+ )
+ (drop
+ (i32.load (i32.const 10)) ;; implicit traps, sad
+ )
+ )
+ (func $8 (param $var$0 i32) (result i32)
+ (local $var$1 i32)
+ (local $var$2 i32)
+ (local $var$3 i32)
+ (block $label$0 i32
+ (i32.store
+ (tee_local $var$2
+ (i32.add
+ (get_local $var$1)
+ (i32.const 4)
+ )
+ )
+ (i32.and
+ (i32.load
+ (get_local $var$2)
+ )
+ (i32.xor
+ (tee_local $var$2
+ (i32.const 74)
+ )
+ (i32.const -1)
+ )
+ )
+ )
+ (i32.store
+ (tee_local $var$1
+ (i32.add
+ (get_local $var$1)
+ (i32.const 4)
+ )
+ )
+ (i32.or
+ (i32.load
+ (get_local $var$1)
+ )
+ (i32.and
+ (get_local $var$2)
+ (i32.const 8)
+ )
+ )
+ )
+ )
+ )
+)
diff --git a/test/passes/local-cse_ignore-implicit-traps.txt b/test/passes/local-cse_ignore-implicit-traps.txt
new file mode 100644
index 000000000..7b8ebba69
--- /dev/null
+++ b/test/passes/local-cse_ignore-implicit-traps.txt
@@ -0,0 +1,30 @@
+(module
+ (type $0 (func))
+ (memory $0 100 100)
+ (func $loads (type $0)
+ (local $0 i32)
+ (drop
+ (tee_local $0
+ (i32.load
+ (i32.const 10)
+ )
+ )
+ )
+ (drop
+ (get_local $0)
+ )
+ (drop
+ (i32.load offset=5
+ (i32.const 10)
+ )
+ )
+ (drop
+ (i32.load
+ (i32.const 11)
+ )
+ )
+ (drop
+ (get_local $0)
+ )
+ )
+)
diff --git a/test/passes/local-cse_ignore-implicit-traps.wast b/test/passes/local-cse_ignore-implicit-traps.wast
new file mode 100644
index 000000000..0f22084c6
--- /dev/null
+++ b/test/passes/local-cse_ignore-implicit-traps.wast
@@ -0,0 +1,20 @@
+(module
+ (memory 100 100)
+ (func $loads
+ (drop
+ (i32.load (i32.const 10))
+ )
+ (drop
+ (i32.load (i32.const 10))
+ )
+ (drop
+ (i32.load offset=5 (i32.const 10))
+ )
+ (drop
+ (i32.load (i32.const 11))
+ )
+ (drop
+ (i32.load (i32.const 10))
+ )
+ )
+)