diff options
author | Alon Zakai <azakai@google.com> | 2022-10-07 10:31:05 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-07 10:31:05 -0700 |
commit | ada164df40f9b590f546dfecdddecc3894e345ae (patch) | |
tree | 8362af7aa04a83c700bdb1ca51b1311ca8a03ebf | |
parent | d5aa6e7c070a7a14a6eb23edbe6343f6ee638e92 (diff) | |
download | binaryen-ada164df40f9b590f546dfecdddecc3894e345ae.tar.gz binaryen-ada164df40f9b590f546dfecdddecc3894e345ae.tar.bz2 binaryen-ada164df40f9b590f546dfecdddecc3894e345ae.zip |
GUFA: Use SSA-style information (#5121)
Previously we treated each local index as a location, and every local.set to
that index could be read by every local.get. With this we connect only
relevant sets to gets.
Practically speaking, this removes LocalLocation which is what was just
described, and instead there is ParamLocation for incoming parameter
values. And local.get/set use normal ExpressionLocations to connect a
set to a get.
I was worried this would be slow, since computing LocalGraph takes time,
but it actually more than makes up for itself on J2Wasm and we are faster
actually rocket I guess since we do less updating after local.sets.
This makes a noticeable change on the J2Wasm binary, and perhaps will
help with benchmarks.
-rw-r--r-- | src/ir/possible-contents.cpp | 82 | ||||
-rw-r--r-- | src/ir/possible-contents.h | 33 | ||||
-rw-r--r-- | test/lit/passes/gufa-refs.wast | 59 | ||||
-rw-r--r-- | test/lit/passes/gufa-ssa.wast | 104 | ||||
-rw-r--r-- | test/lit/passes/gufa-vs-cfp.wast | 22 | ||||
-rw-r--r-- | test/lit/passes/gufa.wast | 13 |
6 files changed, 206 insertions, 107 deletions
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 9ad43f090..6d75d65ca 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -19,6 +19,7 @@ #include "ir/branch-utils.h" #include "ir/eh-utils.h" +#include "ir/local-graph.h" #include "ir/module-utils.h" #include "ir/possible-contents.h" #include "wasm.h" @@ -217,7 +218,8 @@ template<typename T> void disallowDuplicates(const T& targets) { // A link indicates a flow of content from one location to another. For // example, if we do a local.get and return that value from a function, then -// we have a link from a LocalLocation to a ResultLocation. +// we have a link from the ExpressionLocation of that local.get to a +// ResultLocation. template<typename T> struct Link { T from; T to; @@ -438,7 +440,7 @@ struct InfoCollector auto* func = getModule()->getFunction(curr->func); for (Index i = 0; i < func->getParams().size(); i++) { info.links.push_back( - {SignatureParamLocation{func->type, i}, LocalLocation{func, i, 0}}); + {SignatureParamLocation{func->type, i}, ParamLocation{func, i}}); } for (Index i = 0; i < func->getResults().size(); i++) { info.links.push_back( @@ -494,28 +496,19 @@ struct InfoCollector receiveChildValue(curr->value, curr); } - // Locals read and write to their index. - // TODO: we could use a LocalGraph for SSA-like precision - void visitLocalGet(LocalGet* curr) { - if (isRelevant(curr->type)) { - for (Index i = 0; i < curr->type.size(); i++) { - info.links.push_back({LocalLocation{getFunction(), curr->index, i}, - ExpressionLocation{curr, i}}); - } - } - } void visitLocalSet(LocalSet* curr) { if (!isRelevant(curr->value->type)) { return; } - for (Index i = 0; i < curr->value->type.size(); i++) { - info.links.push_back({ExpressionLocation{curr->value, i}, - LocalLocation{getFunction(), curr->index, i}}); - } - // Tees also flow out the value (receiveChildValue will see if this is a tee + // Tees flow out the value (receiveChildValue will see if this is a tee // based on the type, automatically). receiveChildValue(curr->value, curr); + + // We handle connecting local.gets to local.sets below, in visitFunction. + } + void visitLocalGet(LocalGet* curr) { + // We handle connecting local.gets to local.sets below, in visitFunction. } // Globals read and write from their location. @@ -582,7 +575,7 @@ struct InfoCollector curr, [&](Index i) { assert(i <= target->getParams().size()); - return LocalLocation{target, i, 0}; + return ParamLocation{target, i}; }, [&](Index i) { assert(i <= target->getResults().size()); @@ -931,26 +924,41 @@ struct InfoCollector void visitReturn(Return* curr) { addResult(curr->value); } - void visitFunction(Function* curr) { - // Vars have an initial value. - for (Index i = 0; i < curr->getNumLocals(); i++) { - if (curr->isVar(i)) { - Index j = 0; - for (auto t : curr->getLocalType(i)) { - if (t.isDefaultable()) { - info.links.push_back( - {getNullLocation(t), LocalLocation{curr, i, j}}); - } - j++; - } - } - } - + void visitFunction(Function* func) { // Functions with a result can flow a value out from their body. - addResult(curr->body); + addResult(func->body); // See visitPop(). assert(handledPops == totalPops); + + // Handle local.get/sets: each set must write to the proper gets. + LocalGraph localGraph(func); + + for (auto& [get, setsForGet] : localGraph.getSetses) { + auto index = get->index; + auto type = func->getLocalType(index); + if (!isRelevant(type)) { + continue; + } + + // Each get reads from its relevant sets. + for (auto* set : setsForGet) { + for (Index i = 0; i < type.size(); i++) { + Location source; + if (set) { + // This is a normal local.set. + source = ExpressionLocation{set->value, i}; + } else if (getFunction()->isParam(index)) { + // This is a parameter. + source = ParamLocation{getFunction(), index}; + } else { + // This is the default value from the function entry, a null. + source = getNullLocation(type[i]); + } + info.links.push_back({source, ExpressionLocation{get, i}}); + } + } + } } // Helpers @@ -1284,7 +1292,7 @@ Flower::Flower(Module& wasm) : wasm(wasm) { auto calledFromOutside = [&](Name funcName) { auto* func = wasm.getFunction(funcName); for (Index i = 0; i < func->getParams().size(); i++) { - roots[LocalLocation{func, i, 0}] = PossibleContents::many(); + roots[ParamLocation{func, i}] = PossibleContents::many(); } }; @@ -1793,8 +1801,8 @@ void Flower::dump(Location location) { std::cout << " : " << loc->index << '\n'; } else if (auto* loc = std::get_if<TagLocation>(&location)) { std::cout << " tagloc " << loc->tag << '\n'; - } else if (auto* loc = std::get_if<LocalLocation>(&location)) { - std::cout << " localloc " << loc->func->name << " : " << loc->index + } else if (auto* loc = std::get_if<ParamLocation>(&location)) { + std::cout << " paramloc " << loc->func->name << " : " << loc->index << " tupleIndex " << loc->tupleIndex << '\n'; } else if (auto* loc = std::get_if<ResultLocation>(&location)) { std::cout << " resultloc " << loc->func->name << " : " << loc->index diff --git a/src/ir/possible-contents.h b/src/ir/possible-contents.h index 863a0dbab..a6a27062a 100644 --- a/src/ir/possible-contents.h +++ b/src/ir/possible-contents.h @@ -286,26 +286,21 @@ struct ExpressionLocation { } }; -// The location of one of the results of a function. -struct ResultLocation { +// The location of one of the parameters of a function. +struct ParamLocation { Function* func; Index index; - bool operator==(const ResultLocation& other) const { + bool operator==(const ParamLocation& other) const { return func == other.func && index == other.index; } }; -// The location of one of the locals in a function (either a param or a var). -// TODO: would separating params from vars help? (SSA might be enough) -struct LocalLocation { +// The location of one of the results of a function. +struct ResultLocation { Function* func; - // The index of the local. Index index; - // As in ExpressionLocation, the index inside the tuple, or 0 if not a tuple. - Index tupleIndex; - bool operator==(const LocalLocation& other) const { - return func == other.func && index == other.index && - tupleIndex == other.tupleIndex; + bool operator==(const ResultLocation& other) const { + return func == other.func && index == other.index; } }; @@ -407,8 +402,8 @@ struct ConeReadLocation { // A location is a variant over all the possible flavors of locations that we // have. using Location = std::variant<ExpressionLocation, + ParamLocation, ResultLocation, - LocalLocation, BreakTargetLocation, GlobalLocation, SignatureParamLocation, @@ -441,17 +436,17 @@ template<> struct hash<wasm::ExpressionLocation> { } }; -template<> struct hash<wasm::ResultLocation> { - size_t operator()(const wasm::ResultLocation& loc) const { +template<> struct hash<wasm::ParamLocation> { + size_t operator()(const wasm::ParamLocation& loc) const { return std::hash<std::pair<size_t, wasm::Index>>{}( {size_t(loc.func), loc.index}); } }; -template<> struct hash<wasm::LocalLocation> { - size_t operator()(const wasm::LocalLocation& loc) const { - return std::hash<std::pair<size_t, std::pair<wasm::Index, wasm::Index>>>{}( - {size_t(loc.func), {loc.index, loc.tupleIndex}}); +template<> struct hash<wasm::ResultLocation> { + size_t operator()(const wasm::ResultLocation& loc) const { + return std::hash<std::pair<size_t, wasm::Index>>{}( + {size_t(loc.func), loc.index}); } }; diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast index f75b0dd96..e02fdc70a 100644 --- a/test/lit/passes/gufa-refs.wast +++ b/test/lit/passes/gufa-refs.wast @@ -322,7 +322,7 @@ ;; CHECK-NEXT: (struct.new_default $struct) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) @@ -346,11 +346,8 @@ (local.set $z (struct.new $struct) ) - ;; Get the 3 locals, to check that we optimize. We can replace x and y with - ;; a null constant. (x will not actually contain null since the call will - ;; trap, but the only value we see x can contain is the default value, and - ;; we don't use SSA yet, so all values written to x anywhere are considered - ;; possible at all local.gets) + ;; Get the 3 locals, to check that we optimize. We can replace x with an + ;; unreachable and y with a null constant. (drop (local.get $x) ) @@ -1390,9 +1387,11 @@ ;; CHECK: (type $child (struct_subtype (field (mut i32)) (field i32) $parent)) (type $child (struct_subtype (field (mut i32)) (field i32) $parent)) - ;; CHECK: (type $none_=>_none (func_subtype func)) + ;; CHECK: (type $i32_=>_none (func_subtype (param i32) func)) - ;; CHECK: (func $func (type $none_=>_none) + ;; CHECK: (export "func" (func $func)) + + ;; CHECK: (func $func (type $i32_=>_none) (param $x i32) ;; CHECK-NEXT: (local $child (ref null $child)) ;; CHECK-NEXT: (local $parent (ref null $parent)) ;; CHECK-NEXT: (local.set $parent @@ -1400,19 +1399,22 @@ ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.set $parent + ;; CHECK-NEXT: (local.tee $child + ;; CHECK-NEXT: (struct.new $child + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (struct.get $parent 0 ;; CHECK-NEXT: (local.get $parent) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $parent - ;; CHECK-NEXT: (local.tee $child - ;; CHECK-NEXT: (struct.new $child - ;; CHECK-NEXT: (i32.const 20) - ;; CHECK-NEXT: (i32.const 30) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop @@ -1424,7 +1426,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $func + (func $func (export "func") (param $x i32) (local $child (ref null $child)) (local $parent (ref null $parent)) (local.set $parent @@ -1432,7 +1434,19 @@ (i32.const 10) ) ) - ;; This get cannot be optimized because later down the local is written a + ;; Another, optional, set to $parent. + (if + (local.get $x) + (local.set $parent + (local.tee $child + (struct.new $child + (i32.const 20) + (i32.const 30) + ) + ) + ) + ) + ;; This get cannot be optimized because before us the local might be set a ;; child as well. So the local $parent can refer to either type, and they ;; disagree on the aliased value. (drop @@ -1440,15 +1454,6 @@ (local.get $parent) ) ) - ;; This extra local.set to $parent is added here. - (local.set $parent - (local.tee $child - (struct.new $child - (i32.const 20) - (i32.const 30) - ) - ) - ) ;; But this one can be optimized as $child can only contain a child. (drop (struct.get $child 0 diff --git a/test/lit/passes/gufa-ssa.wast b/test/lit/passes/gufa-ssa.wast new file mode 100644 index 000000000..665342818 --- /dev/null +++ b/test/lit/passes/gufa-ssa.wast @@ -0,0 +1,104 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; RUN: foreach %s %t wasm-opt -all --gufa -S -o - | filecheck %s + +(module + ;; CHECK: (type $i32_=>_none (func (param i32))) + + ;; CHECK: (export "test" (func $test)) + + ;; CHECK: (func $test (param $x i32) + ;; CHECK-NEXT: (local $y i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 50) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (export "test") (param $x i32) + (local $y i32) + ;; A parameter - nothing to optimize. + (drop + (local.get $x) + ) + ;; This has the default value, and can be optimized to 0. + (drop + (local.get $y) + ) + (local.set $x + (i32.const 10) + ) + (local.set $y + (i32.const 20) + ) + ;; These can both be optimized to constants, 10 and 20. + (drop + (local.get $x) + ) + (drop + (local.get $y) + ) + (local.set $x + (i32.const 30) + ) + (local.set $y + (i32.const 40) + ) + ;; Now these are 30 and 40. + (drop + (local.get $x) + ) + (drop + (local.get $y) + ) + (if + (local.get $x) + (local.set $y + (i32.const 50) + ) + ) + ;; x is the same but y is no longer optimizable, since it might contain 50. + (drop + (local.get $x) + ) + (drop + (local.get $y) + ) + ) +) diff --git a/test/lit/passes/gufa-vs-cfp.wast b/test/lit/passes/gufa-vs-cfp.wast index 623d25679..98db72833 100644 --- a/test/lit/passes/gufa-vs-cfp.wast +++ b/test/lit/passes/gufa-vs-cfp.wast @@ -602,11 +602,10 @@ (module ;; CHECK: (type $struct (struct_subtype (field (mut i32)) data)) (type $struct (struct (mut i32))) - ;; CHECK: (type $none_=>_none (func_subtype func)) - - ;; CHECK: (type $substruct (struct_subtype (field (mut i32)) $struct)) (type $substruct (struct_subtype (mut i32) $struct)) + ;; CHECK: (type $none_=>_none (func_subtype func)) + ;; CHECK: (func $test (type $none_=>_none) ;; CHECK-NEXT: (local $ref (ref null $struct)) ;; CHECK-NEXT: (local.set $ref @@ -620,16 +619,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result nullref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_static $substruct - ;; CHECK-NEXT: (local.get $ref) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (ref.null none) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -649,9 +639,9 @@ ;; This must trap, so we can add an unreachable. (struct.get $substruct 0 ;; Only a null can pass through here, as the cast would not allow a ref - ;; to $struct. A null looks possible to the pass due to the default - ;; value of the local $ref - an SSA analysis would remove that. For now, - ;; we'll optimize the ref.cast to have a null after it. + ;; to $struct. But no null is possible since the local gets written a + ;; non-null value before we get here, so we can optimize this to an + ;; unreachable. (ref.cast_static $substruct (local.get $ref) ) diff --git a/test/lit/passes/gufa.wast b/test/lit/passes/gufa.wast index a1ac729c3..d9b11f779 100644 --- a/test/lit/passes/gufa.wast +++ b/test/lit/passes/gufa.wast @@ -246,7 +246,7 @@ ;; CHECK: (func $param-yes (param $param i32) (result i32) ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: (local.set $param ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) @@ -254,19 +254,16 @@ ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) (func $param-yes (param $param i32) (result i32) + ;; As above, but now the function is not exported. That means it has no + ;; callers, so the first local.get can contain nothing, and will become an + ;; unreachable. The other local.get later down can only contain the + ;; local.set in the if, so we'll optimize it to 1. (if (local.get $param) (local.set $param (i32.const 1) ) ) - ;; As above, but now the function is not exported. That means it has no - ;; callers, so the only possible contents for $param are the local.set here, - ;; as this code is unreachable. We will infer a constant of 1 for all values - ;; of $param here. (With an SSA analysis, we could see that the first - ;; local.get must be unreachable, and optimize even better; as things are, - ;; we see the local.set and it is the only thing that sends values to the - ;; local.) (local.get $param) ) |