diff options
-rw-r--r-- | src/ast/hashed.h | 59 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/LocalCSE.cpp | 156 | ||||
-rw-r--r-- | src/passes/pass.cpp | 5 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | test/passes/Oz.txt | 55 | ||||
-rw-r--r-- | test/passes/Oz.wast | 57 | ||||
-rw-r--r-- | test/passes/local-cse.txt | 183 | ||||
-rw-r--r-- | test/passes/local-cse.wast | 163 | ||||
-rw-r--r-- | test/passes/local-cse_ignore-implicit-traps.txt | 30 | ||||
-rw-r--r-- | test/passes/local-cse_ignore-implicit-traps.wast | 20 |
11 files changed, 730 insertions, 0 deletions
diff --git a/src/ast/hashed.h b/src/ast/hashed.h new file mode 100644 index 000000000..04ee7f401 --- /dev/null +++ b/src/ast/hashed.h @@ -0,0 +1,59 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _wasm_ast_hashed_h + +#include "support/hash.h" +#include "wasm.h" +#include "ast_utils.h" + +namespace wasm { + +// An expression with a cached hash value +struct HashedExpression { + Expression* expr; + size_t hash; + + HashedExpression(Expression* expr) : expr(expr) { + if (expr) { + hash = ExpressionAnalyzer::hash(expr); + } + } + + HashedExpression(const HashedExpression& other) : expr(other.expr), hash(other.hash) {} +}; + +struct ExpressionHasher { + size_t operator()(const HashedExpression value) const { + return value.hash; + } +}; + +struct ExpressionComparer { + bool operator()(const HashedExpression a, const HashedExpression b) const { + if (a.hash != b.hash) return false; + return ExpressionAnalyzer::equal(a.expr, b.expr); + } +}; + +template<typename T> +class HashedExpressionMap : public std::unordered_map<HashedExpression, T, ExpressionHasher, ExpressionComparer> { +}; + +} // namespace wasm + +#endif // _wasm_ast_hashed_h + diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 74f120e85..876a71d74 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -7,6 +7,7 @@ SET(passes_SOURCES ExtractFunction.cpp Inlining.cpp LegalizeJSInterface.cpp + LocalCSE.cpp MemoryPacking.cpp MergeBlocks.cpp Metrics.cpp diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp new file mode 100644 index 000000000..6d48a1107 --- /dev/null +++ b/src/passes/LocalCSE.cpp @@ -0,0 +1,156 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Local CSE +// +// In each linear area of execution, +// * track each relevant (big enough) expression +// * if already seen, write to a local if not already, and reuse +// * invalidate the list as we see effects +// +// TODO: global, inter-block gvn etc. +// + +#include <wasm.h> +#include <wasm-builder.h> +#include <wasm-traversal.h> +#include <pass.h> +#include <ast_utils.h> +#include <ast/hashed.h> + +namespace wasm { + +const Index UNUSED = -1; + +struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new LocalCSE(); } + + // information for an expression we can reuse + struct UsableInfo { + Expression** item; + Index index; // if not UNUSED, then the local we are assigned to, use that to reuse us + EffectAnalyzer effects; + + UsableInfo(Expression** item, PassOptions& passOptions) : item(item), index(UNUSED), effects(passOptions, *item) {} + }; + + // a list of usables in a linear execution trace + typedef HashedExpressionMap<UsableInfo> Usables; + + // locals in current linear execution trace, which we try to sink + Usables usables; + + static void doNoteNonLinear(LocalCSE* self, Expression** currp) { + self->usables.clear(); + } + + void checkInvalidations(EffectAnalyzer& effects) { + // TODO: this is O(bad) + std::vector<HashedExpression> invalidated; + for (auto& sinkable : usables) { + if (effects.invalidates(sinkable.second.effects)) { + invalidated.push_back(sinkable.first); + } + } + for (auto index : invalidated) { + usables.erase(index); + } + } + + std::vector<Expression*> expressionStack; + + static void visitPre(LocalCSE* self, Expression** currp) { + // pre operations + Expression* curr = *currp; + + EffectAnalyzer effects(self->getPassOptions()); + if (effects.checkPre(curr)) { + self->checkInvalidations(effects); + } + + self->expressionStack.push_back(curr); + } + + static void visitPost(LocalCSE* self, Expression** currp) { + auto* curr = *currp; + + // main operations + if (self->isRelevant(curr)) { + self->handle(currp, curr); + } + + // post operations + + EffectAnalyzer effects(self->getPassOptions()); + if (effects.checkPost(curr)) { + self->checkInvalidations(effects); + } + + self->expressionStack.pop_back(); + } + + // override scan to add a pre and a post check task to all nodes + static void scan(LocalCSE* self, Expression** currp) { + self->pushTask(visitPost, currp); + + WalkerPass<LinearExecutionWalker<LocalCSE>>::scan(self, currp); + + self->pushTask(visitPre, currp); + } + + bool isRelevant(Expression* curr) { + if (curr->is<GetLocal>()) { + return false; // trivial, this is what we optimize to! + } + if (!isConcreteWasmType(curr->type)) { + return false; // don't bother with unreachable etc. + } + if (EffectAnalyzer(getPassOptions(), curr).hasSideEffects()) { + return false; // we can't combine things with side effects + } + // check what we care about TODO: use optimize/shrink levels? + return Measurer::measure(curr) > 1; + } + + void handle(Expression** currp, Expression* curr) { + HashedExpression hashed(curr); + auto iter = usables.find(hashed); + if (iter != usables.end()) { + // already exists in the table, this is good to reuse + auto& info = iter->second; + if (info.index == UNUSED) { + // we need to assign to a local. create a new one + auto index = info.index = Builder::addVar(getFunction(), curr->type); + (*info.item) = Builder(*getModule()).makeTeeLocal(index, *info.item); + } + replaceCurrent( + Builder(*getModule()).makeGetLocal(info.index, curr->type) + ); + } else { + // not in table, add this, maybe we can help others later + usables.emplace(std::make_pair(hashed, UsableInfo(currp, getPassOptions()))); + } + } +}; + +Pass *createLocalCSEPass() { + return new LocalCSE(); +} + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 081164d90..a82b2fa58 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -71,6 +71,7 @@ void PassRegistry::registerPasses() { registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass); registerPass("inlining", "inlines functions (currently only ones with a single use)", createInliningPass); registerPass("legalize-js-interface", "legalizes i64 types on the import/export boundary", createLegalizeJSInterfacePass); + registerPass("local-cse", "common subexpression elimination inside basic blocks", createLocalCSEPass); registerPass("memory-packing", "packs memory into separate segments, skipping zeros", createMemoryPackingPass); registerPass("merge-blocks", "merges blocks to their parents", createMergeBlocksPass); registerPass("metrics", "reports metrics", createMetricsPass); @@ -132,6 +133,10 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { add("merge-blocks"); add("optimize-instructions"); add("precompute"); + if (options.shrinkLevel >= 2) { + add("local-cse"); // TODO: run this early, before first coalesce-locals. right now doing so uncovers some deficiencies we need to fix first + add("coalesce-locals"); // just for localCSE + } add("vacuum"); // should not be needed, last few passes do not create garbage, but just to be safe } diff --git a/src/passes/passes.h b/src/passes/passes.h index 83bf556d8..c3660b048 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -31,6 +31,7 @@ Pass *createExtractFunctionPass(); Pass *createFullPrinterPass(); Pass *createInliningPass(); Pass *createLegalizeJSInterfacePass(); +Pass *createLocalCSEPass(); Pass *createLowerIfElsePass(); Pass *createMemoryPackingPass(); Pass *createMergeBlocksPass(); diff --git a/test/passes/Oz.txt b/test/passes/Oz.txt new file mode 100644 index 000000000..7dd706ac2 --- /dev/null +++ b/test/passes/Oz.txt @@ -0,0 +1,55 @@ +(module + (type $0 (func (param i32 i32) (result i32))) + (type $1 (func (param i32) (result i32))) + (memory $0 100 100) + (export "localcse" (func $basics)) + (export "localcse-2" (func $8)) + (func $basics (type $0) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (i32.add + (tee_local $2 + (i32.add + (get_local $0) + (get_local $1) + ) + ) + (get_local $2) + ) + ) + (func $8 (type $1) (param $0 i32) (result i32) + (local $1 i32) + (i32.store + (tee_local $0 + (tee_local $1 + (i32.add + (get_local $1) + (i32.const 4) + ) + ) + ) + (i32.and + (i32.load + (get_local $0) + ) + (i32.xor + (tee_local $0 + (i32.const 74) + ) + (i32.const -1) + ) + ) + ) + (i32.store + (get_local $1) + (i32.or + (i32.load + (get_local $1) + ) + (i32.and + (get_local $0) + (i32.const 8) + ) + ) + ) + ) +) diff --git a/test/passes/Oz.wast b/test/passes/Oz.wast new file mode 100644 index 000000000..fc465669e --- /dev/null +++ b/test/passes/Oz.wast @@ -0,0 +1,57 @@ +(module + (memory 100 100) + (func $basics (export "localcse") (param $x i32) ($param $y i32) (result i32) ;; -O3 does localcse + (local $x2 i32) + (local $y2 i32) + (set_local $x2 + (i32.add (get_local $x) (get_local $y)) + ) + (set_local $y2 + (i32.add (get_local $x) (get_local $y)) + ) + (i32.add (get_local $x2) (get_local $y2)) + ) + (func $8 (export "localcse-2") (param $var$0 i32) (result i32) + (local $var$1 i32) + (local $var$2 i32) + (local $var$3 i32) + (block $label$0 i32 + (i32.store + (tee_local $var$2 + (i32.add + (get_local $var$1) + (i32.const 4) + ) + ) + (i32.and + (i32.load + (get_local $var$2) + ) + (i32.xor + (tee_local $var$2 + (i32.const 74) + ) + (i32.const -1) + ) + ) + ) + (i32.store + (tee_local $var$1 + (i32.add + (get_local $var$1) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (get_local $var$1) + ) + (i32.and + (get_local $var$2) + (i32.const 8) + ) + ) + ) + ) + ) +) diff --git a/test/passes/local-cse.txt b/test/passes/local-cse.txt new file mode 100644 index 000000000..1b740236e --- /dev/null +++ b/test/passes/local-cse.txt @@ -0,0 +1,183 @@ +(module + (type $0 (func)) + (type $1 (func (param i32) (result i32))) + (memory $0 100 100) + (func $basics (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (local $3 i32) + (drop + (tee_local $2 + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + ) + (drop + (get_local $2) + ) + (if + (i32.const 0) + (nop) + ) + (drop + (i32.add + (i32.const 1) + (i32.const 2) + ) + ) + (drop + (tee_local $3 + (i32.add + (get_local $x) + (get_local $y) + ) + ) + ) + (drop + (get_local $3) + ) + (drop + (get_local $3) + ) + (call $basics) + (drop + (get_local $3) + ) + (set_local $x + (i32.const 100) + ) + (drop + (i32.add + (get_local $x) + (get_local $y) + ) + ) + ) + (func $recursive1 (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (drop + (i32.add + (i32.const 1) + (tee_local $2 + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + ) + (drop + (i32.add + (i32.const 1) + (get_local $2) + ) + ) + (drop + (get_local $2) + ) + ) + (func $recursive2 (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (drop + (i32.add + (i32.const 1) + (tee_local $2 + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + ) + (drop + (get_local $2) + ) + (drop + (i32.add + (i32.const 1) + (get_local $2) + ) + ) + ) + (func $self (type $0) + (local $x i32) + (local $y i32) + (local $2 i32) + (drop + (i32.add + (tee_local $2 + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + (get_local $2) + ) + ) + (drop + (get_local $2) + ) + ) + (func $loads (type $0) + (drop + (i32.load + (i32.const 10) + ) + ) + (drop + (i32.load + (i32.const 10) + ) + ) + ) + (func $8 (type $1) (param $var$0 i32) (result i32) + (local $var$1 i32) + (local $var$2 i32) + (local $var$3 i32) + (local $4 i32) + (block $label$0 i32 + (i32.store + (tee_local $var$2 + (tee_local $4 + (i32.add + (get_local $var$1) + (i32.const 4) + ) + ) + ) + (i32.and + (i32.load + (get_local $var$2) + ) + (i32.xor + (tee_local $var$2 + (i32.const 74) + ) + (i32.const -1) + ) + ) + ) + (i32.store + (tee_local $var$1 + (get_local $4) + ) + (i32.or + (i32.load + (get_local $var$1) + ) + (i32.and + (get_local $var$2) + (i32.const 8) + ) + ) + ) + ) + ) +) diff --git a/test/passes/local-cse.wast b/test/passes/local-cse.wast new file mode 100644 index 000000000..b1155ce9d --- /dev/null +++ b/test/passes/local-cse.wast @@ -0,0 +1,163 @@ +(module + (memory 100 100) + (func $basics + (local $x i32) + (local $y i32) + (drop + (i32.add (i32.const 1) (i32.const 2)) + ) + (drop + (i32.add (i32.const 1) (i32.const 2)) + ) + (if (i32.const 0) (nop)) + (drop ;; we can't do this yet, non-linear + (i32.add (i32.const 1) (i32.const 2)) + ) + (drop + (i32.add (get_local $x) (get_local $y)) + ) + (drop + (i32.add (get_local $x) (get_local $y)) + ) + (drop + (i32.add (get_local $x) (get_local $y)) + ) + (call $basics) ;; side effects, but no matter for our locals + (drop + (i32.add (get_local $x) (get_local $y)) + ) + (set_local $x (i32.const 100)) + (drop ;; x was changed! + (i32.add (get_local $x) (get_local $y)) + ) + ) + (func $recursive1 + (local $x i32) + (local $y i32) + (drop + (i32.add + (i32.const 1) + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + (drop + (i32.add + (i32.const 1) + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + (drop + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + (func $recursive2 + (local $x i32) + (local $y i32) + (drop + (i32.add + (i32.const 1) + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + (drop + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + (drop + (i32.add + (i32.const 1) + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + ) + (func $self + (local $x i32) + (local $y i32) + (drop + (i32.add + (i32.add + (i32.const 2) + (i32.const 3) + ) + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + (drop + (i32.add + (i32.const 2) + (i32.const 3) + ) + ) + ) + (func $loads + (drop + (i32.load (i32.const 10)) + ) + (drop + (i32.load (i32.const 10)) ;; implicit traps, sad + ) + ) + (func $8 (param $var$0 i32) (result i32) + (local $var$1 i32) + (local $var$2 i32) + (local $var$3 i32) + (block $label$0 i32 + (i32.store + (tee_local $var$2 + (i32.add + (get_local $var$1) + (i32.const 4) + ) + ) + (i32.and + (i32.load + (get_local $var$2) + ) + (i32.xor + (tee_local $var$2 + (i32.const 74) + ) + (i32.const -1) + ) + ) + ) + (i32.store + (tee_local $var$1 + (i32.add + (get_local $var$1) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (get_local $var$1) + ) + (i32.and + (get_local $var$2) + (i32.const 8) + ) + ) + ) + ) + ) +) diff --git a/test/passes/local-cse_ignore-implicit-traps.txt b/test/passes/local-cse_ignore-implicit-traps.txt new file mode 100644 index 000000000..7b8ebba69 --- /dev/null +++ b/test/passes/local-cse_ignore-implicit-traps.txt @@ -0,0 +1,30 @@ +(module + (type $0 (func)) + (memory $0 100 100) + (func $loads (type $0) + (local $0 i32) + (drop + (tee_local $0 + (i32.load + (i32.const 10) + ) + ) + ) + (drop + (get_local $0) + ) + (drop + (i32.load offset=5 + (i32.const 10) + ) + ) + (drop + (i32.load + (i32.const 11) + ) + ) + (drop + (get_local $0) + ) + ) +) diff --git a/test/passes/local-cse_ignore-implicit-traps.wast b/test/passes/local-cse_ignore-implicit-traps.wast new file mode 100644 index 000000000..0f22084c6 --- /dev/null +++ b/test/passes/local-cse_ignore-implicit-traps.wast @@ -0,0 +1,20 @@ +(module + (memory 100 100) + (func $loads + (drop + (i32.load (i32.const 10)) + ) + (drop + (i32.load (i32.const 10)) + ) + (drop + (i32.load offset=5 (i32.const 10)) + ) + (drop + (i32.load (i32.const 11)) + ) + (drop + (i32.load (i32.const 10)) + ) + ) +) |