summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2020-09-07 18:29:33 -0700
committerGitHub <noreply@github.com>2020-09-07 18:29:33 -0700
commita6816a287bd3f802cb6db51032e77145f036c8b7 (patch)
tree2b13779744aa126688055095bb1c747dc786f265 /src
parent775363a98002a14c64bdc4f8d591c6f37b1e1604 (diff)
downloadbinaryen-a6816a287bd3f802cb6db51032e77145f036c8b7.tar.gz
binaryen-a6816a287bd3f802cb6db51032e77145f036c8b7.tar.bz2
binaryen-a6816a287bd3f802cb6db51032e77145f036c8b7.zip
Stack utils (#3083)
Implement and test utilities for manipulating and analyzing a new stacky form of Binaryen IR that is able to express arbitrary stack machine code. This new Poppy IR will eventually replace Stack IR, and new optimization passes will be built with these utilities. See #3059.
Diffstat (limited to 'src')
-rw-r--r--src/ir/CMakeLists.txt1
-rw-r--r--src/ir/stack-utils.cpp292
-rw-r--r--src/ir/stack-utils.h196
-rw-r--r--src/wasm-type.h30
4 files changed, 516 insertions, 3 deletions
diff --git a/src/ir/CMakeLists.txt b/src/ir/CMakeLists.txt
index 70a57a4ab..5555ef98b 100644
--- a/src/ir/CMakeLists.txt
+++ b/src/ir/CMakeLists.txt
@@ -4,6 +4,7 @@ set(ir_SOURCES
ExpressionManipulator.cpp
LocalGraph.cpp
ReFinalize.cpp
+ stack-utils.cpp
${ir_HEADERS}
)
add_library(ir OBJECT ${ir_SOURCES})
diff --git a/src/ir/stack-utils.cpp b/src/ir/stack-utils.cpp
new file mode 100644
index 000000000..ef871040b
--- /dev/null
+++ b/src/ir/stack-utils.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2020 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stack-utils.h"
+
+namespace wasm {
+
+namespace StackUtils {
+
+void removeNops(Block* block) {
+ size_t newIndex = 0;
+ for (size_t i = 0, size = block->list.size(); i < size; ++i) {
+ if (!block->list[i]->is<Nop>()) {
+ block->list[newIndex++] = block->list[i];
+ }
+ }
+ block->list.resize(newIndex);
+}
+
+} // namespace StackUtils
+
+StackSignature::StackSignature(Expression* expr) {
+ params = Type::none;
+ if (Properties::isControlFlowStructure(expr)) {
+ if (expr->is<If>()) {
+ params = Type::i32;
+ }
+ } else {
+ std::vector<Type> inputs;
+ for (auto* child : ChildIterator(expr)) {
+ assert(child->type.isConcrete());
+ // Children might be tuple pops, so expand their types
+ inputs.insert(inputs.end(), child->type.begin(), child->type.end());
+ }
+ params = Type(inputs);
+ }
+ if (expr->type == Type::unreachable) {
+ unreachable = true;
+ results = Type::none;
+ } else {
+ unreachable = false;
+ results = expr->type;
+ }
+}
+
+bool StackSignature::composes(const StackSignature& next) const {
+ auto checked = std::min(results.size(), next.params.size());
+ return std::equal(results.end() - checked,
+ results.end(),
+ next.params.end() - checked,
+ [](const Type& produced, const Type& consumed) {
+ return Type::isSubType(produced, consumed);
+ });
+}
+
+bool StackSignature::satisfies(Signature sig) const {
+ if (sig.params.size() < params.size() ||
+ sig.results.size() < results.size()) {
+ // Not enough values provided or too many produced
+ return false;
+ }
+ bool paramSuffixMatches =
+ std::equal(params.begin(),
+ params.end(),
+ sig.params.end() - params.size(),
+ [](const Type& consumed, const Type& provided) {
+ return Type::isSubType(provided, consumed);
+ });
+ if (!paramSuffixMatches) {
+ return false;
+ }
+ bool resultSuffixMatches =
+ std::equal(results.begin(),
+ results.end(),
+ sig.results.end() - results.size(),
+ [](const Type& produced, const Type& expected) {
+ return Type::isSubType(produced, expected);
+ });
+ if (!resultSuffixMatches) {
+ return false;
+ }
+ if (unreachable) {
+ // The unreachable can consume any additional provided params and produce
+ // any additional expected results, so we are done.
+ return true;
+ }
+ // Any additional provided params will pass through untouched, so they must be
+ // equivalent to the additional produced results.
+ return std::equal(sig.params.begin(),
+ sig.params.end() - params.size(),
+ sig.results.begin(),
+ sig.results.end() - results.size(),
+ [](const Type& produced, const Type& expected) {
+ return Type::isSubType(produced, expected);
+ });
+}
+
+StackSignature& StackSignature::operator+=(const StackSignature& next) {
+ assert(composes(next));
+ std::vector<Type> stack(results.begin(), results.end());
+ size_t required = next.params.size();
+ // Consume stack values according to next's parameters
+ if (stack.size() >= required) {
+ stack.resize(stack.size() - required);
+ } else {
+ if (!unreachable) {
+ // Prepend the unsatisfied params of `next` to the current params
+ size_t unsatisfied = required - stack.size();
+ std::vector<Type> newParams(next.params.begin(),
+ next.params.begin() + unsatisfied);
+ newParams.insert(newParams.end(), params.begin(), params.end());
+ params = Type(newParams);
+ }
+ stack.clear();
+ }
+ // Add stack values according to next's results
+ if (next.unreachable) {
+ results = next.results;
+ unreachable = true;
+ } else {
+ stack.insert(stack.end(), next.results.begin(), next.results.end());
+ results = Type(stack);
+ }
+ return *this;
+}
+
+StackSignature StackSignature::operator+(const StackSignature& next) const {
+ StackSignature sig = *this;
+ sig += next;
+ return sig;
+}
+
+StackFlow::StackFlow(Block* block) {
+ // Encapsulates the logic for treating the block and its children
+ // uniformly. The end of the block is treated as if it consumed values
+ // corresponding to the its result type and produced no values, which is why
+ // the block's result type is used as the params of its processed stack
+ // signature.
+ auto processBlock = [&block](auto process) {
+ // TODO: Once we support block parameters, set up the stack by calling
+ // `process` before iterating through the block.
+ for (auto* expr : block->list) {
+ process(expr, StackSignature(expr));
+ }
+ bool unreachable = block->type == Type::unreachable;
+ Type params = unreachable ? Type::none : block->type;
+ process(block, StackSignature(params, Type::none, unreachable));
+ };
+
+ // We need to make an initial pass through the block to figure out how many
+ // values each unreachable instruction produces.
+ std::unordered_map<Expression*, size_t> producedByUnreachable;
+ {
+ size_t stackSize = 0;
+ size_t produced = 0;
+ Expression* lastUnreachable = nullptr;
+ processBlock([&](Expression* expr, const StackSignature sig) {
+ // Consume params
+ if (sig.params.size() > stackSize) {
+ // We need more values than are available, so they must come from the
+ // last unreachable.
+ assert(lastUnreachable);
+ produced += sig.params.size() - stackSize;
+ stackSize = 0;
+ } else {
+ stackSize -= sig.params.size();
+ }
+
+ // Handle unreachable or produce results
+ if (sig.unreachable) {
+ if (lastUnreachable) {
+ producedByUnreachable[lastUnreachable] = produced;
+ produced = 0;
+ }
+ assert(produced == 0);
+ lastUnreachable = expr;
+ stackSize = 0;
+ } else {
+ stackSize += sig.results.size();
+ }
+ });
+
+ // Finish record for final unreachable
+ if (lastUnreachable) {
+ producedByUnreachable[lastUnreachable] = produced;
+ }
+ }
+
+ // Take another pass through the block, recording srcs and dests.
+ std::vector<Location> values;
+ Expression* lastUnreachable = nullptr;
+ processBlock([&](Expression* expr, const StackSignature sig) {
+ assert((sig.params.size() <= values.size() || lastUnreachable) &&
+ "Block inputs not yet supported");
+
+ // Unreachable instructions consume all available values
+ size_t consumed = sig.unreachable
+ ? std::max(values.size(), sig.params.size())
+ : sig.params.size();
+
+ // We previously calculated how many values unreachable instructions produce
+ size_t produced =
+ sig.unreachable ? producedByUnreachable[expr] : sig.results.size();
+
+ srcs[expr] = std::vector<Location>(consumed);
+ dests[expr] = std::vector<Location>(produced);
+
+ // Figure out what kind of unreachable values we have
+ assert(sig.params.size() <= consumed);
+ size_t unreachableBeyondStack = 0;
+ size_t unreachableFromStack = 0;
+ if (consumed > values.size()) {
+ assert(consumed == sig.params.size());
+ unreachableBeyondStack = consumed - values.size();
+ } else if (consumed > sig.params.size()) {
+ assert(consumed == values.size());
+ unreachableFromStack = consumed - sig.params.size();
+ }
+
+ // Consume values
+ for (Index i = 0; i < consumed; ++i) {
+ if (i < unreachableBeyondStack) {
+ // This value comes from the polymorphic stack of the last unreachable
+ // because the stack did not have enough values to satisfy this
+ // instruction.
+ assert(lastUnreachable);
+ assert(producedByUnreachable[lastUnreachable] >=
+ unreachableBeyondStack);
+ Index destIndex =
+ producedByUnreachable[lastUnreachable] - unreachableBeyondStack + i;
+ Type type = sig.params[i];
+ srcs[expr][i] = {lastUnreachable, destIndex, type, true};
+ dests[lastUnreachable][destIndex] = {expr, i, type, false};
+ } else {
+ // A normal value from the value stack
+ bool unreachable = i < unreachableFromStack;
+ auto& src = values[values.size() + i - consumed];
+ srcs[expr][i] = src;
+ dests[src.expr][src.index] = {expr, i, src.type, unreachable};
+ }
+ }
+
+ // Update available values
+ if (unreachableBeyondStack) {
+ producedByUnreachable[lastUnreachable] -= unreachableBeyondStack;
+ values.resize(0);
+ } else {
+ values.resize(values.size() - consumed);
+ }
+
+ // Produce values
+ for (Index i = 0; i < sig.results.size(); ++i) {
+ values.push_back({expr, i, sig.results[i], false});
+ }
+
+ // Update the last unreachable instruction
+ if (sig.unreachable) {
+ assert(producedByUnreachable[lastUnreachable] == 0);
+ lastUnreachable = expr;
+ }
+ });
+}
+
+StackSignature StackFlow::getSignature(Expression* expr) {
+ auto exprSrcs = srcs.find(expr);
+ auto exprDests = dests.find(expr);
+ assert(exprSrcs != srcs.end() && exprDests != dests.end());
+ std::vector<Type> params, results;
+ for (auto& src : exprSrcs->second) {
+ params.push_back(src.type);
+ }
+ for (auto& dest : exprDests->second) {
+ results.push_back(dest.type);
+ }
+ bool unreachable = expr->type == Type::unreachable;
+ return StackSignature(Type(params), Type(results), unreachable);
+}
+
+} // namespace wasm
diff --git a/src/ir/stack-utils.h b/src/ir/stack-utils.h
new file mode 100644
index 000000000..89ec7d47e
--- /dev/null
+++ b/src/ir/stack-utils.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2020 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// stack-utils.h: Utilities for manipulating and analyzing stack machine code in
+// the form of Poppy IR.
+//
+// Poppy IR represents stack machine code using normal Binaryen IR types by
+// imposing the following constraints:
+//
+// 1. Function bodies and children of control flow (except If conditions) must
+// be blocks.
+//
+// 2. Blocks may have any Expressions except for Pops as children. The sequence
+// of instructions in a block follows the same validation rules as in
+// WebAssembly. That means that any expression may have a concrete type, not
+// just the final expression in the block.
+//
+// 3. All other children must be Pops, which are used to determine the input
+// stack type of each instruction. Pops may not have `unreachable` type.
+//
+// 4. Only control flow structures and instructions that have polymorphic
+// unreachable behavior in WebAssembly may have unreachable type.
+//
+// For example, the following Binaryen IR Function:
+//
+// (func $foo (result i32)
+// (i32.add
+// (i32.const 42)
+// (i32.const 5)
+// )
+// )
+//
+// would look like this in Poppy IR:
+//
+// (func $foo (result i32)
+// (block
+// (i32.const 42)
+// (i32.const 5)
+// (i32.add
+// (i32.pop)
+// (i32.pop)
+// )
+// )
+// )
+//
+// Notice that the sequence of instructions in the block is now identical to the
+// sequence of instructions in raw WebAssembly.
+//
+
+#ifndef wasm_ir_stack_h
+#define wasm_ir_stack_h
+
+#include "ir/properties.h"
+#include "wasm-type.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace StackUtils {
+
+// Iterate through `block` and remove nops.
+void removeNops(Block* block);
+
+} // namespace StackUtils
+
+// Stack signatures are like regular function signatures, but they are used to
+// represent the stack parameters and results of arbitrary sequences of stacky
+// instructions. They have to record whether they cover an unreachable
+// instruction because their composition takes into account the polymorphic
+// results of unreachable instructions. For example, the following instruction
+// sequences both have params {i32, i32} and results {f32}, but only sequence B
+// is unreachable:
+//
+// A:
+// i32.add
+// drop
+// f32.const 42
+//
+// B:
+// i32.add
+// unreachable
+// f32.const 42
+//
+// Notice that this distinction is important because sequence B can be the body
+// of the blocks below but sequence A cannot. In other words, the stack
+// signature of sequence B satisfies the signatures of these blocks, but the
+// stack signature of sequence A does not.
+//
+// (block (param f64 i32 i32) (result f32) ... )
+// (block (param i32 i32) (result f64 f32) ... )
+// (block (param f64 i32 i32) (result f64 f32) ... )
+//
+struct StackSignature {
+ Type params;
+ Type results;
+ bool unreachable;
+
+ StackSignature()
+ : params(Type::none), results(Type::none), unreachable(false) {}
+ StackSignature(Type params, Type results, bool unreachable = false)
+ : params(params), results(results), unreachable(unreachable) {}
+
+ StackSignature(const StackSignature&) = default;
+ StackSignature& operator=(const StackSignature&) = default;
+
+ // Get the stack signature of `expr`
+ explicit StackSignature(Expression* expr);
+
+ // Get the stack signature of the range of instructions [first, last). The
+ // sequence of instructions is assumed to be valid, i.e. their signatures
+ // compose.
+ template<class InputIt>
+ explicit StackSignature(InputIt first, InputIt last)
+ : params(Type::none), results(Type::none), unreachable(false) {
+ // TODO: It would be more efficient to build the signature directly and
+ // construct the params in reverse to avoid quadratic behavior.
+ while (first != last) {
+ *this += StackSignature(*first++);
+ }
+ }
+
+ // Return `true` iff `next` composes after this stack signature.
+ bool composes(const StackSignature& next) const;
+
+ // Whether a block whose contents have this stack signature could be typed
+ // with `sig`.
+ bool satisfies(Signature sig) const;
+
+ // Compose stack signatures. Assumes they actually compose.
+ StackSignature& operator+=(const StackSignature& next);
+ StackSignature operator+(const StackSignature& next) const;
+
+ bool operator==(const StackSignature& other) const {
+ return params == other.params && results == other.results &&
+ unreachable == other.unreachable;
+ }
+};
+
+// Calculates stack machine data flow, associating the sources and destinations
+// of all values in a block.
+struct StackFlow {
+ // The destination (source) location at which a value of type `type` is
+ // consumed (produced), corresponding to the `index`th value consumed by
+ // (produced by) instruction `expr`. For destination locations, `unreachable`
+ // is true iff the corresponding value is consumed by the polymorphic behavior
+ // of an unreachable instruction rather than being used directly. For source
+ // locations, `unreachable` is true iff the corresponding value is produced by
+ // an unreachable instruction. For produced values that are not consumed
+ // within the block (TODO: also for consumed values that are not produced
+ // within the block), `expr` will be the enclosing block.
+ struct Location {
+ Expression* expr;
+ Index index;
+ Type type;
+ bool unreachable;
+
+ bool operator==(const Location& other) const {
+ return expr == other.expr && index == other.index && type == other.type &&
+ unreachable == other.unreachable;
+ }
+ };
+
+ using LocationMap = std::unordered_map<Expression*, std::vector<Location>>;
+
+ // Maps each instruction to the set of source locations producing its inputs.
+ LocationMap srcs;
+
+ // Maps each instruction to the set of output locations consuming its results.
+ LocationMap dests;
+
+ // Gets the effective stack signature of `expr`, which must be a child of the
+ // block. If `expr` is unreachable, the returned signature will reflect the
+ // values consumed and produced by its polymorphic unreachable behavior.
+ StackSignature getSignature(Expression* expr);
+
+ // Calculates `srcs` and `dests`.
+ StackFlow(Block* curr);
+};
+
+} // namespace wasm
+
+#endif // wasm_ir_stack_h
diff --git a/src/wasm-type.h b/src/wasm-type.h
index 8a9e58ec9..135c2e751 100644
--- a/src/wasm-type.h
+++ b/src/wasm-type.h
@@ -186,15 +186,39 @@ public:
return index == other.index && parent == other.parent;
}
bool operator!=(const Iterator& other) const { return !(*this == other); }
- void operator++() { index++; }
+ Iterator& operator++() {
+ ++index;
+ return *this;
+ }
+ Iterator& operator--() {
+ --index;
+ return *this;
+ }
+ Iterator operator++(int) {
+ auto it = *this;
+ index++;
+ return it;
+ }
+ Iterator operator--(int) {
+ auto it = *this;
+ index--;
+ return it;
+ }
Iterator& operator+=(difference_type off) {
index += off;
return *this;
}
- const Iterator operator+(difference_type off) const {
+ Iterator operator+(difference_type off) const {
return Iterator(*this) += off;
}
- difference_type operator-(const Iterator& other) {
+ Iterator& operator-=(difference_type off) {
+ index -= off;
+ return *this;
+ }
+ Iterator operator-(difference_type off) const {
+ return Iterator(*this) -= off;
+ }
+ difference_type operator-(const Iterator& other) const {
assert(parent == other.parent);
return index - other.index;
}