diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/asm2wasm.h | 16 | ||||
-rw-r--r-- | src/ast/ExpressionManipulator.cpp | 1 | ||||
-rw-r--r-- | src/ast/block-utils.h | 66 | ||||
-rw-r--r-- | src/ast/manipulation.h | 69 | ||||
-rw-r--r-- | src/ast/type-updating.h | 282 | ||||
-rw-r--r-- | src/ast_utils.h | 168 | ||||
-rw-r--r-- | src/cfg/Relooper.cpp | 22 | ||||
-rw-r--r-- | src/pass.h | 10 | ||||
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 11 | ||||
-rw-r--r-- | src/passes/DeadCodeElimination.cpp | 42 | ||||
-rw-r--r-- | src/passes/MergeBlocks.cpp | 21 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 1 | ||||
-rw-r--r-- | src/passes/Precompute.cpp | 6 | ||||
-rw-r--r-- | src/passes/Print.cpp | 13 | ||||
-rw-r--r-- | src/passes/RelooperJumpThreading.cpp | 7 | ||||
-rw-r--r-- | src/passes/RemoveUnusedBrs.cpp | 23 | ||||
-rw-r--r-- | src/passes/RemoveUnusedNames.cpp | 1 | ||||
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 1 | ||||
-rw-r--r-- | src/passes/Vacuum.cpp | 53 | ||||
-rw-r--r-- | src/passes/pass.cpp | 13 | ||||
-rw-r--r-- | src/s2wasm.h | 5 | ||||
-rw-r--r-- | src/tools/asm2wasm.cpp | 7 | ||||
-rw-r--r-- | src/wasm-builder.h | 22 | ||||
-rw-r--r-- | src/wasm-module-building.h | 3 | ||||
-rw-r--r-- | src/wasm-traversal.h | 29 | ||||
-rw-r--r-- | src/wasm-validator.h | 57 | ||||
-rw-r--r-- | src/wasm/wasm-s-parser.cpp | 2 |
27 files changed, 809 insertions, 142 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h index 6327920fe..16e7c95e1 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -35,7 +35,6 @@ #include "wasm-builder.h" #include "wasm-emscripten.h" #include "wasm-printing.h" -#include "wasm-validator.h" #include "wasm-module-building.h" namespace wasm { @@ -1339,6 +1338,12 @@ void Asm2WasmBuilder::processAsm(Ref ast) { add->left = parent->builder.makeConst(Literal((int32_t)parent->functionTableStarts[tableName])); } } + + void visitFunction(Function* curr) { + // changing call types requires we percolate types, and drop stuff. + // we do this in this pass so that we don't look broken between passes + AutoDrop().walkFunctionInModule(curr, getModule()); + } }; // apply debug info, reducing intrinsic calls into annotations on the ast nodes @@ -1400,9 +1405,9 @@ void Asm2WasmBuilder::processAsm(Ref ast) { passRunner.setDebug(true); passRunner.setValidateGlobally(false); } + // finalizeCalls also does autoDrop, which is crucial for the non-optimizing case, + // so that the output of the first pass is valid passRunner.add<FinalizeCalls>(this); - passRunner.add<ReFinalize>(); // FinalizeCalls changes call types, need to percolate - passRunner.add<AutoDrop>(); // FinalizeCalls may cause us to require additional drops if (legalizeJavaScriptFFI) { passRunner.add("legalize-js-interface"); } @@ -1551,8 +1556,6 @@ void Asm2WasmBuilder::processAsm(Ref ast) { body->finalize(); func->body = body; } - - assert(WasmValidator().validate(wasm)); } Function* Asm2WasmBuilder::processFunction(Ref ast) { @@ -2489,7 +2492,6 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { } top->list.push_back(br); - top->finalize(); for (unsigned i = 0; i < cases->size(); i++) { Ref curr = cases[i]; @@ -2564,7 +2566,6 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { top->name = name; next->list.push_back(top); next->list.push_back(case_); - next->finalize(); top = next; nameMapper.popLabelName(name); } @@ -2580,7 +2581,6 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { first->ifFalse = builder.makeBreak(br->default_); brHolder->list.push_back(chain); - brHolder->finalize(); } breakStack.pop_back(); diff --git a/src/ast/ExpressionManipulator.cpp b/src/ast/ExpressionManipulator.cpp index fb861f0d8..160ff55e1 100644 --- a/src/ast/ExpressionManipulator.cpp +++ b/src/ast/ExpressionManipulator.cpp @@ -147,6 +147,7 @@ void ExpressionManipulator::spliceIntoBlock(Block* block, Index index, Expressio } list[index] = add; } + block->finalize(block->type); } } // namespace wasm diff --git a/src/ast/block-utils.h b/src/ast/block-utils.h new file mode 100644 index 000000000..7b1b9dcff --- /dev/null +++ b/src/ast/block-utils.h @@ -0,0 +1,66 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ast_block_h +#define wasm_ast_block_h + +#include "literal.h" +#include "wasm.h" +#include "ast_utils.h" + +namespace wasm { + +namespace BlockUtils { + // if a block has just one element, it can often be replaced + // with that content + template<typename T> + inline Expression* simplifyToContents(Block* block, T* parent, bool allowTypeChange = false) { + auto& list = block->list; + if (list.size() == 1 && !BreakSeeker::has(list[0], block->name)) { + // just one element. try to replace the block + auto* singleton = list[0]; + auto sideEffects = EffectAnalyzer(parent->getPassOptions(), singleton).hasSideEffects(); + if (!sideEffects && !isConcreteWasmType(singleton->type)) { + // no side effects, and singleton is not returning a value, so we can throw away + // the block and its contents, basically + return Builder(*parent->getModule()).replaceWithIdenticalType(block); + } else if (block->type == singleton->type || allowTypeChange) { + return singleton; + } else { + // (side effects +) type change, must be block with declared value but inside is unreachable + // (if both concrete, must match, and since no name on block, we can't be + // branched to, so if singleton is unreachable, so is the block) + assert(isConcreteWasmType(block->type) && singleton->type == unreachable); + // we could replace with unreachable, but would need to update all + // the parent's types + } + } else if (list.size() == 0) { + ExpressionManipulator::nop(block); + } + return block; + } + + // similar, but when we allow the type to change while doing so + template<typename T> + inline Expression* simplifyToContentsWithPossibleTypeChange(Block* block, T* parent) { + return simplifyToContents(block, parent, true); + } +}; + +} // namespace wasm + +#endif // wasm_ast_block_h + diff --git a/src/ast/manipulation.h b/src/ast/manipulation.h new file mode 100644 index 000000000..9e01e4c74 --- /dev/null +++ b/src/ast/manipulation.h @@ -0,0 +1,69 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ast_manipulation_h +#define wasm_ast_manipulation_h + +#include "wasm.h" + +namespace wasm { + +struct ExpressionManipulator { + // Re-use a node's memory. This helps avoid allocation when optimizing. + template<typename InputType, typename OutputType> + static OutputType* convert(InputType *input) { + static_assert(sizeof(OutputType) <= sizeof(InputType), + "Can only convert to a smaller size Expression node"); + input->~InputType(); // arena-allocaed, so no destructor, but avoid UB. + OutputType* output = (OutputType*)(input); + new (output) OutputType; + return output; + } + + // Convenience method for nop, which is a common conversion + template<typename InputType> + static Nop* nop(InputType* target) { + return convert<InputType, Nop>(target); + } + + // Convert a node that allocates + template<typename InputType, typename OutputType> + static OutputType* convert(InputType *input, MixedArena& allocator) { + assert(sizeof(OutputType) <= sizeof(InputType)); + input->~InputType(); // arena-allocaed, so no destructor, but avoid UB. + OutputType* output = (OutputType*)(input); + new (output) OutputType(allocator); + return output; + } + + using CustomCopier = std::function<Expression*(Expression*)>; + static Expression* flexibleCopy(Expression* original, Module& wasm, CustomCopier custom); + + static Expression* copy(Expression* original, Module& wasm) { + auto copy = [](Expression* curr) { + return nullptr; + }; + return flexibleCopy(original, wasm, copy); + } + + // Splice an item into the middle of a block's list + static void spliceIntoBlock(Block* block, Index index, Expression* add); +}; + +} // wasm + +#endif // wams_ast_manipulation_h + diff --git a/src/ast/type-updating.h b/src/ast/type-updating.h new file mode 100644 index 000000000..b8988761a --- /dev/null +++ b/src/ast/type-updating.h @@ -0,0 +1,282 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ast_type_updating_h +#define wasm_ast_type_updating_h + +#include "wasm-traversal.h" + +namespace wasm { + +// a class that tracks type dependencies between nodes, letting you +// update types efficiently when removing and altering code. +// altering code can alter types in the following way: +// * removing a break can make a block unreachable, if nothing else +// reaches it +// * altering the type of a child to unreachable can make the parent +// unreachable +struct TypeUpdater : public ExpressionStackWalker<TypeUpdater, UnifiedExpressionVisitor<TypeUpdater>> { + // Part 1: Scanning + + // track names to their blocks, so that when we remove a break to + // a block, we know how to find it if we need to update it + struct BlockInfo { + Block* block = nullptr; + int numBreaks = 0; + }; + std::map<Name, BlockInfo> blockInfos; + + // track the parent of each node, as child type changes may lead to + // unreachability + std::map<Expression*, Expression*> parents; + + void visitExpression(Expression* curr) { + if (expressionStack.size() > 1) { + parents[curr] = expressionStack[expressionStack.size() - 2]; + } else { + parents[curr] = nullptr; // this is the top level + } + // discover block/break relationships + if (auto* block = curr->dynCast<Block>()) { + if (block->name.is()) { + blockInfos[block->name].block = block; + } + } else if (auto* br = curr->dynCast<Break>()) { + // ensure info exists, discoverBreaks can then fill it + blockInfos[br->name]; + } else if (auto* sw = curr->dynCast<Switch>()) { + // ensure info exists, discoverBreaks can then fill it + for (auto target : sw->targets) { + blockInfos[target]; + } + blockInfos[sw->default_]; + } + // add a break to the info, for break and switch + discoverBreaks(curr, +1); + } + + // Part 2: Updating + + // Node replacements, additions, removals and type changes should be noted. An + // exception is nodes you know will never be looked at again. + + // note the replacement of one node with another. this should be called + // after performing the replacement. + // this does *not* look into the node. you should do so yourself if necessary + void noteReplacement(Expression* from, Expression* to) { + auto parent = parents[from]; + noteRemoval(from); + // if we are replacing with a child, i.e. a node that was already present + // in the ast, then we just have a type and parent to update + if (parents.find(to) != parents.end()) { + parents[to] = parent; + if (from->type != to->type) { + propagateTypesUp(to); + } + } else { + noteAddition(to, parent, from); + } + } + + // note the removal of a node + void noteRemoval(Expression* curr) { + noteRemovalOrAddition(curr, nullptr); + parents.erase(curr); + } + + // note the removal of a node and all its children + void noteRecursiveRemoval(Expression* curr) { + struct Recurser : public PostWalker<Recurser, UnifiedExpressionVisitor<Recurser>> { + TypeUpdater& parent; + + Recurser(TypeUpdater& parent, Expression* root) : parent(parent) { + walk(root); + } + + void visitExpression(Expression* curr) { + parent.noteRemoval(curr); + } + }; + + Recurser(*this, curr); + } + + void noteAddition(Expression* curr, Expression* parent, Expression* previous = nullptr) { + assert(parents.find(curr) == parents.end()); // must not already exist + noteRemovalOrAddition(curr, parent); + // if we didn't replace with the exact same type, propagate types up + if (!(previous && previous->type == curr->type)) { + propagateTypesUp(curr); + } + } + + // if parent is nullptr, this is a removal + void noteRemovalOrAddition(Expression* curr, Expression* parent) { + parents[curr] = parent; + discoverBreaks(curr, parent ? +1 : -1); + } + + // adds (or removes) breaks depending on break/switch contents + void discoverBreaks(Expression* curr, int change) { + if (auto* br = curr->dynCast<Break>()) { + if (!(br->value && br->value->type == unreachable) && + !(br->condition && br->condition->type == unreachable)) { + noteBreakChange(br->name, change, br->value); + } + } else if (auto* sw = curr->dynCast<Switch>()) { + if (!(sw->value && sw->value->type == unreachable) && + sw->condition->type != unreachable) { + applySwitchChanges(sw, change); + } + } + } + + void applySwitchChanges(Switch* sw, int change) { + std::set<Name> seen; + for (auto target : sw->targets) { + if (seen.insert(target).second) { + noteBreakChange(target, change, sw->value); + } + } + if (seen.insert(sw->default_).second) { + noteBreakChange(sw->default_, change, sw->value); + } + } + + // note the addition of a node + void noteBreakChange(Name name, int change, Expression* value) { + auto iter = blockInfos.find(name); + if (iter == blockInfos.end()) { + return; // we can ignore breaks to loops + } + auto& info = iter->second; + info.numBreaks += change; + assert(info.numBreaks >= 0); + auto* block = info.block; + if (block) { // if to a loop, can ignore + if (info.numBreaks == 0) { + // dropped to 0! the block may now be unreachable. that + // requires that it doesn't have a fallthrough + makeBlockUnreachableIfNoFallThrough(block); + } else if (change == 1 && info.numBreaks == 1) { + // bumped to 1! the block may now be reachable + if (block->type != unreachable) { + return; // was already reachable, had a fallthrough + } + changeTypeTo(block, value ? value->type : none); + } + } + } + + // alters the type of a node to a new type. + // this propagates the type change through all the parents. + void changeTypeTo(Expression* curr, WasmType newType) { + if (curr->type == newType) return; // nothing to do + curr->type = newType; + propagateTypesUp(curr); + } + + // given a node that has a new type, or is a new node, update + // all the parents accordingly. the existence of the node and + // any changes to it already occurred, this just updates the + // parents following that. i.e., nothing is done to the + // node we start on, it's done. + // the one thing we need to do here is propagate unreachability, + // no other change is possible + void propagateTypesUp(Expression* curr) { + if (curr->type != unreachable) return; + while (1) { + auto* child = curr; + curr = parents[child]; + if (!curr) return; + // if a child of a break/switch is now unreachable, the + // break may no longer be taken. note that if we get here, + // this is an actually new unreachable child of the + // node, so if there is just 1 such child, it is us, and + // we are newly unreachable + if (auto* br = curr->dynCast<Break>()) { + int unreachableChildren = 0; + if (br->value && br->value->type == unreachable) unreachableChildren++; + if (br->condition && br->condition->type == unreachable) unreachableChildren++; + if (unreachableChildren == 1) { + // the break is no longer taken + noteBreakChange(br->name, -1, br->value); + } + } else if (auto* sw = curr->dynCast<Switch>()) { + int unreachableChildren = 0; + if (sw->value && sw->value->type == unreachable) unreachableChildren++; + if (sw->condition->type == unreachable) unreachableChildren++; + if (unreachableChildren == 1) { + applySwitchChanges(sw, -1); + } + } + // get ready to apply unreachability to this node + if (curr->type == unreachable) { + return; // already unreachable, stop here + } + // most nodes become unreachable if a child is unreachable, + // but exceptions exists + if (auto* block = curr->dynCast<Block>()) { + // if the block has breaks, it can keep its type + if (!block->name.is() || blockInfos[block->name].numBreaks == 0) { + curr->type = unreachable; + } else { + return; // did not turn + } + } else if (auto* iff = curr->dynCast<If>()) { + // may not be unreachable if just one side is + iff->finalize(); + if (curr->type != unreachable) { + return; // did not turn + } + } else { + curr->type = unreachable; + } + } + } + + // efficiently update the type of a block, given the data we know. this + // can remove a concrete type and turn the block unreachable when it is + // unreachable, and it does this efficiently, without scanning the full + // contents + void maybeUpdateTypeToUnreachable(Block* curr) { + if (!isConcreteWasmType(curr->type)) { + return; // nothing concrete to change to unreachable + } + if (curr->name.is() && blockInfos[curr->name].numBreaks > 0) { + return;// has a break, not unreachable + } + // look for a fallthrough + makeBlockUnreachableIfNoFallThrough(curr); + } + + void makeBlockUnreachableIfNoFallThrough(Block* curr) { + if (curr->type == unreachable) { + return; // no change possible + } + for (auto* child : curr->list) { + if (child->type == unreachable) { + // no fallthrough, this block is now unreachable + changeTypeTo(curr, unreachable); + return; + } + } + } +}; + +} // namespace wasm + +#endif // wasm_ast_type_updating_h diff --git a/src/ast_utils.h b/src/ast_utils.h index 5a7c40630..aa4120569 100644 --- a/src/ast_utils.h +++ b/src/ast_utils.h @@ -42,10 +42,18 @@ struct BreakSeeker : public PostWalker<BreakSeeker> { } void visitBreak(Break *curr) { + // ignore an unreachable break + if (curr->condition && curr->condition->type == unreachable) return; + if (curr->value && curr->value->type == unreachable) return; + // check the break if (curr->name == target) noteFound(curr->value); } void visitSwitch(Switch *curr) { + // ignore an unreachable switch + if (curr->condition->type == unreachable) return; + if (curr->value && curr->value->type == unreachable) return; + // check the switch for (auto name : curr->targets) { if (name == target) noteFound(curr->value); } @@ -273,50 +281,6 @@ struct Measurer : public PostWalker<Measurer, UnifiedExpressionVisitor<Measurer> } }; -// Manipulate expressions - -struct ExpressionManipulator { - // Re-use a node's memory. This helps avoid allocation when optimizing. - template<typename InputType, typename OutputType> - static OutputType* convert(InputType *input) { - static_assert(sizeof(OutputType) <= sizeof(InputType), - "Can only convert to a smaller size Expression node"); - input->~InputType(); // arena-allocaed, so no destructor, but avoid UB. - OutputType* output = (OutputType*)(input); - new (output) OutputType; - return output; - } - - // Convenience method for nop, which is a common conversion - template<typename InputType> - static void nop(InputType* target) { - convert<InputType, Nop>(target); - } - - // Convert a node that allocates - template<typename InputType, typename OutputType> - static OutputType* convert(InputType *input, MixedArena& allocator) { - assert(sizeof(OutputType) <= sizeof(InputType)); - input->~InputType(); // arena-allocaed, so no destructor, but avoid UB. - OutputType* output = (OutputType*)(input); - new (output) OutputType(allocator); - return output; - } - - using CustomCopier = std::function<Expression*(Expression*)>; - static Expression* flexibleCopy(Expression* original, Module& wasm, CustomCopier custom); - - static Expression* copy(Expression* original, Module& wasm) { - auto copy = [](Expression* curr) { - return nullptr; - }; - return flexibleCopy(original, wasm, copy); - } - - // Splice an item into the middle of a block's list - static void spliceIntoBlock(Block* block, Index index, Expression* add); -}; - struct ExpressionAnalyzer { // Given a stack of expressions, checks if the topmost is used as a result. // For example, if the parent is a block and the node is before the last position, @@ -357,11 +321,102 @@ struct ExpressionAnalyzer { static uint32_t hash(Expression* curr); }; -// Finalizes a node - +// Re-Finalizes all node types +// This removes "unnecessary' block/if/loop types, i.e., that are added +// specifically, as in +// (block i32 (unreachable)) +// vs +// (block (unreachable)) +// This converts to the latter form. struct ReFinalize : public WalkerPass<PostWalker<ReFinalize>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new ReFinalize; } + ReFinalize() { name = "refinalize"; } + // block finalization is O(bad) if we do each block by itself, so do it in bulk, + // tracking break value types so we just do a linear pass + + std::map<Name, WasmType> breakValues; + + void visitBlock(Block *curr) { + // do this quickly, without any validation + if (curr->name.is()) { + auto iter = breakValues.find(curr->name); + if (iter != breakValues.end()) { + // there is a break to here + curr->type = iter->second; + return; + } + } + // nothing branches here + if (curr->list.size() > 0) { + // if we have an unreachable child, we are unreachable + // (we don't need to recurse into children, they can't + // break to us) + for (auto* child : curr->list) { + if (child->type == unreachable) { + curr->type = unreachable; + return; + } + } + // children are reachable, so last element determines type + curr->type = curr->list.back()->type; + } else { + curr->type = none; + } + } + void visitIf(If *curr) { curr->finalize(); } + void visitLoop(Loop *curr) { curr->finalize(); } + void visitBreak(Break *curr) { + curr->finalize(); + if (curr->value && curr->value->type == unreachable) { + return; // not an actual break + } + if (curr->condition && curr->condition->type == unreachable) { + return; // not an actual break + } + breakValues[curr->name] = getValueType(curr->value); + } + void visitSwitch(Switch *curr) { + curr->finalize(); + if (curr->condition->type == unreachable || (curr->value && curr->value->type == unreachable)) { + return; // not an actual break + } + auto valueType = getValueType(curr->value); + for (auto target : curr->targets) { + breakValues[target] = valueType; + } + breakValues[curr->default_] = valueType; + } + void visitCall(Call *curr) { curr->finalize(); } + void visitCallImport(CallImport *curr) { curr->finalize(); } + void visitCallIndirect(CallIndirect *curr) { curr->finalize(); } + void visitGetLocal(GetLocal *curr) { curr->finalize(); } + void visitSetLocal(SetLocal *curr) { curr->finalize(); } + void visitGetGlobal(GetGlobal *curr) { curr->finalize(); } + void visitSetGlobal(SetGlobal *curr) { curr->finalize(); } + void visitLoad(Load *curr) { curr->finalize(); } + void visitStore(Store *curr) { curr->finalize(); } + void visitConst(Const *curr) { curr->finalize(); } + void visitUnary(Unary *curr) { curr->finalize(); } + void visitBinary(Binary *curr) { curr->finalize(); } + void visitSelect(Select *curr) { curr->finalize(); } + void visitDrop(Drop *curr) { curr->finalize(); } + void visitReturn(Return *curr) { curr->finalize(); } + void visitHost(Host *curr) { curr->finalize(); } + void visitNop(Nop *curr) { curr->finalize(); } + void visitUnreachable(Unreachable *curr) { curr->finalize(); } + + WasmType getValueType(Expression* value) { + return value && value->type != unreachable ? value->type : none; + } +}; + +// Re-finalize a single node. This is slow, if you want to refinalize +// an entire ast, use ReFinalize +struct ReFinalizeNode : public Visitor<ReFinalizeNode> { void visitBlock(Block *curr) { curr->finalize(); } void visitIf(If *curr) { curr->finalize(); } void visitLoop(Loop *curr) { curr->finalize(); } @@ -385,10 +440,21 @@ struct ReFinalize : public WalkerPass<PostWalker<ReFinalize>> { void visitHost(Host *curr) { curr->finalize(); } void visitNop(Nop *curr) { curr->finalize(); } void visitUnreachable(Unreachable *curr) { curr->finalize(); } + + // given a stack of nested expressions, update them all from child to parent + static void updateStack(std::vector<Expression*>& expressionStack) { + for (int i = int(expressionStack.size()) - 1; i >= 0; i--) { + auto* curr = expressionStack[i]; + ReFinalizeNode().visit(curr); + } + } }; // Adds drop() operations where necessary. This lets you not worry about adding drop when // generating code. +// This also refinalizes before and after, as dropping can change types, and depends +// on types being cleaned up - no unnecessary block/if/loop types (see refinalize) +// TODO: optimize that, interleave them struct AutoDrop : public WalkerPass<ExpressionStackWalker<AutoDrop>> { bool isFunctionParallel() override { return true; } @@ -410,10 +476,7 @@ struct AutoDrop : public WalkerPass<ExpressionStackWalker<AutoDrop>> { } void reFinalize() { - for (int i = int(expressionStack.size()) - 1; i >= 0; i--) { - auto* curr = expressionStack[i]; - ReFinalize().visit(curr); - } + ReFinalizeNode::updateStack(expressionStack); } void visitBlock(Block* curr) { @@ -442,10 +505,13 @@ struct AutoDrop : public WalkerPass<ExpressionStackWalker<AutoDrop>> { } } - void visitFunction(Function* curr) { + void doWalkFunction(Function* curr) { + ReFinalize().walkFunctionInModule(curr, getModule()); + walk(curr->body); if (curr->result == none && isConcreteWasmType(curr->body->type)) { curr->body = Builder(*getModule()).makeDrop(curr->body); } + ReFinalize().walkFunctionInModule(curr, getModule()); } }; diff --git a/src/cfg/Relooper.cpp b/src/cfg/Relooper.cpp index a900c6608..9fcc9c8be 100644 --- a/src/cfg/Relooper.cpp +++ b/src/cfg/Relooper.cpp @@ -84,6 +84,7 @@ static wasm::Expression* HandleFollowupMultiples(wasm::Expression* Ret, Shape* P } else { for (auto* Entry : Loop->Entries) { Curr->name = Builder.getBlockBreakName(Entry->Id); + Curr->finalize(); auto* Outer = Builder.makeBlock(Curr); Outer->finalize(); // TODO: not really necessary Curr = Outer; @@ -210,6 +211,10 @@ wasm::Expression* Block::Render(RelooperBuilder& Builder, bool InLoop) { // We'll emit a chain of if-elses wasm::If* CurrIf = nullptr; + // we build an if, then add a child, then add a child to that, etc., so we must + // finalize them in reverse order + std::vector<wasm::If*> finalizeStack; + wasm::Expression* RemainingConditions = nullptr; for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin();; iter++) { @@ -244,6 +249,7 @@ wasm::Expression* Block::Render(RelooperBuilder& Builder, bool InLoop) { wasm::Expression* Now; if (RemainingConditions) { Now = Builder.makeIf(RemainingConditions, CurrContent); + finalizeStack.push_back(Now->cast<wasm::If>()); } else { Now = CurrContent; } @@ -256,6 +262,7 @@ wasm::Expression* Block::Render(RelooperBuilder& Builder, bool InLoop) { } } else { auto* Now = Builder.makeIf(Details->Condition, CurrContent); + finalizeStack.push_back(Now); if (!CurrIf) { assert(!Root); Root = CurrIf = Now; @@ -275,6 +282,14 @@ wasm::Expression* Block::Render(RelooperBuilder& Builder, bool InLoop) { } if (IsDefault) break; } + + // finalize the if-chains + while (finalizeStack.size() > 0) { + wasm::If* curr = finalizeStack.back(); + finalizeStack.pop_back(); + curr->finalize(); + } + } else { // Emit a switch auto Base = std::string("switch$") + std::to_string(Id); @@ -365,11 +380,13 @@ wasm::Expression* MultipleShape::Render(RelooperBuilder& Builder, bool InLoop) { // TODO: consider switch // emit an if-else chain wasm::If *FirstIf = nullptr, *CurrIf = nullptr; + std::vector<wasm::If*> finalizeStack; for (IdShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) { auto* Now = Builder.makeIf( Builder.makeCheckLabel(iter->first), iter->second->Render(Builder, InLoop) ); + finalizeStack.push_back(Now); if (!CurrIf) { FirstIf = CurrIf = Now; } else { @@ -378,6 +395,11 @@ wasm::Expression* MultipleShape::Render(RelooperBuilder& Builder, bool InLoop) { CurrIf = Now; } } + while (finalizeStack.size() > 0) { + wasm::If* curr = finalizeStack.back(); + finalizeStack.pop_back(); + curr->finalize(); + } wasm::Expression* Ret = Builder.makeBlock(FirstIf); Ret = HandleFollowupMultiples(Ret, this, Builder, InLoop); if (Next) { diff --git a/src/pass.h b/src/pass.h index 513979954..198d5dcb5 100644 --- a/src/pass.h +++ b/src/pass.h @@ -131,6 +131,16 @@ struct PassRunner { isNested = nested; } + // BINARYEN_PASS_DEBUG is a convenient commandline way to log out the toplevel passes, their times, + // and validate between each pass. + // (we don't recurse pass debug into sub-passes, as it doesn't help anyhow and + // also is bad for e.g. printing which is a pass) + // this method returns whether we are in passDebug mode, and which value: + // 1: run pass by pass, validating in between + // 2: also save the last pass, so it breakage happens we can print the last one + // 3: also dump out byn-* files for each pass + static int getPassDebug(); + protected: bool isNested = false; diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index ed075c57f..2828c9955 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -159,7 +159,7 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal auto* curr = (*currp)->cast<GetLocal>(); // if in unreachable code, ignore if (!self->currBasicBlock) { - ExpressionManipulator::convert<GetLocal, Unreachable>(curr); + *currp = Builder(*self->getModule()).replaceWithIdenticalType(curr); return; } self->currBasicBlock->contents.actions.emplace_back(Action::Get, curr->index, currp); @@ -169,11 +169,7 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal auto* curr = (*currp)->cast<SetLocal>(); // if in unreachable code, ignore if (!self->currBasicBlock) { - if (curr->isTee()) { - ExpressionManipulator::convert<SetLocal, Unreachable>(curr); - } else { - ExpressionManipulator::nop(curr); - } + *currp = Builder(*self->getModule()).replaceWithIdenticalType(curr); return; } self->currBasicBlock->contents.actions.emplace_back(Action::Set, curr->index, currp); @@ -625,8 +621,9 @@ static void removeIfCopy(Expression** origin, SetLocal* set, If* iff, Expression // replace the origin with the if, and sink the set into the other non-copying arm *origin = iff; set->value = other; + set->finalize(); other = set; - if (!set->isTee()) { + if (!isConcreteWasmType(set->type)) { // we don't need the copy at all copy = nullptr; if (!iff->ifTrue) { diff --git a/src/passes/DeadCodeElimination.cpp b/src/passes/DeadCodeElimination.cpp index 0555ff1f0..42f86ba45 100644 --- a/src/passes/DeadCodeElimination.cpp +++ b/src/passes/DeadCodeElimination.cpp @@ -32,6 +32,8 @@ #include <pass.h> #include <ast_utils.h> #include <wasm-builder.h> +#include <ast/block-utils.h> +#include <ast/type-updating.h> namespace wasm { @@ -40,11 +42,23 @@ struct DeadCodeElimination : public WalkerPass<PostWalker<DeadCodeElimination>> Pass* create() override { return new DeadCodeElimination; } + // as we remove code, we must keep the types of other nodes valid + TypeUpdater typeUpdater; + + Expression* replaceCurrent(Expression* expression) { + auto* old = getCurrent(); + WalkerPass<PostWalker<DeadCodeElimination>>::replaceCurrent(expression); + // also update the type updater + typeUpdater.noteReplacement(old, expression); + return expression; + } + // whether the current code is actually reachable bool reachable; void doWalkFunction(Function* func) { reachable = true; + typeUpdater.walk(func->body); walk(func->body); } @@ -160,10 +174,8 @@ struct DeadCodeElimination : public WalkerPass<PostWalker<DeadCodeElimination>> // see https://github.com/WebAssembly/spec/issues/355 if (!(isConcreteWasmType(block->type) && block->list[i]->type == none)) { block->list.resize(i + 1); - // note that we do *not* finalize here. it is incorrect to re-finalize a block - // after removing elements, as it may no longer have branches to it that would - // determine its type, so re-finalizing would just wipe out an existing type - // that it had. + // note that we still walk the children, so typeUpdater will already + // note they are being removed, and we don't need to do that here } } } @@ -175,8 +187,11 @@ struct DeadCodeElimination : public WalkerPass<PostWalker<DeadCodeElimination>> reachable = reachable || reachableBreaks.count(curr->name); reachableBreaks.erase(curr->name); } - if (curr->list.size() == 1 && isDead(curr->list[0]) && !BreakSeeker::has(curr->list[0], curr->name)) { - replaceCurrent(curr->list[0]); + if (curr->list.size() == 1 && isDead(curr->list[0])) { + replaceCurrent(BlockUtils::simplifyToContentsWithPossibleTypeChange(curr, this)); + } else { + // the block may have had a type, but can now be unreachable, which allows more reduction outside + typeUpdater.maybeUpdateTypeToUnreachable(curr); } } @@ -213,13 +228,18 @@ struct DeadCodeElimination : public WalkerPass<PostWalker<DeadCodeElimination>> if (isDead(curr->condition)) { replaceCurrent(curr->condition); } + // the if may have had a type, but can now be unreachable, which allows more reduction outside + curr->finalize(); } static void scan(DeadCodeElimination* self, Expression** currp) { if (!self->reachable) { // convert to an unreachable. do this without UB, even though we have no destructors on AST nodes - #define DELEGATE(CLASS_TO_VISIT) \ - { ExpressionManipulator::convert<CLASS_TO_VISIT, Unreachable>(static_cast<CLASS_TO_VISIT*>(*currp)); break; } + #define DELEGATE(CLASS_TO_VISIT) { \ + self->typeUpdater.noteRecursiveRemoval(*currp); \ + ExpressionManipulator::convert<CLASS_TO_VISIT, Unreachable>(static_cast<CLASS_TO_VISIT*>(*currp)); \ + break; \ + } switch ((*currp)->_id) { case Expression::Id::BlockId: DELEGATE(Block); case Expression::Id::IfId: DELEGATE(If); @@ -398,6 +418,12 @@ struct DeadCodeElimination : public WalkerPass<PostWalker<DeadCodeElimination>> } } + void visitDrop(Drop* curr) { + if (isDead(curr->value)) { + replaceCurrent(curr->value); + } + } + void visitHost(Host* curr) { handleCall(curr); } diff --git a/src/passes/MergeBlocks.cpp b/src/passes/MergeBlocks.cpp index 494bf5a9e..37ada0174 100644 --- a/src/passes/MergeBlocks.cpp +++ b/src/passes/MergeBlocks.cpp @@ -169,6 +169,7 @@ static void optimizeBlock(Block* curr, Module* module) { // we can do it! // reuse the drop drop->value = child->list.back(); + drop->finalize(); child->list.back() = drop; child->finalize(); curr->list[i] = child; @@ -223,10 +224,17 @@ struct MergeBlocks : public WalkerPass<PostWalker<MergeBlocks>> { if (auto* block = child->dynCast<Block>()) { if (!block->name.is() && block->list.size() >= 2) { child = block->list.back(); + // we modified child )which is *&), which modifies curr, which might change its type + // (e.g. (drop (block i32 .. (unreachable))) + // the child was a block of i32, and is being replaced with an unreachable, so the + // parent will likely need to be unreachable too + auto oldType = curr->type; + ReFinalize().walk(curr); if (outer == nullptr) { // reuse the block, move it out block->list.back() = curr; - block->finalize(); // last block element was our input, and is now our output, which may differ TODO optimize + // we want the block outside to have the same type as curr had + block->finalize(oldType); replaceCurrent(block); return block; } else { @@ -264,7 +272,16 @@ struct MergeBlocks : public WalkerPass<PostWalker<MergeBlocks>> { } void visitSelect(Select* curr) { - optimize(curr, curr->condition, optimize(curr, curr->ifFalse, optimize(curr, curr->ifTrue), &curr->ifTrue), &curr->ifTrue, &curr->ifFalse); + Block* outer = nullptr; + outer = optimize(curr, curr->ifTrue, outer); + if (EffectAnalyzer(getPassOptions(), curr->ifTrue).hasSideEffects()) return; + outer = optimize(curr, curr->ifFalse, outer); + if (EffectAnalyzer(getPassOptions(), curr->ifFalse).hasSideEffects()) return; + /* . */ optimize(curr, curr->condition, outer); + } + + void visitDrop(Drop* curr) { + optimize(curr, curr->value); } void visitBreak(Break* curr) { diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 50a7c5589..0e7c34564 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -27,6 +27,7 @@ #include <ast_utils.h> #include <ast/cost.h> #include <ast/properties.h> +#include <ast/manipulation.h> namespace wasm { diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 37686de1a..e06103c11 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -23,6 +23,7 @@ #include <wasm-builder.h> #include <wasm-interpreter.h> #include <ast_utils.h> +#include "ast/manipulation.h" namespace wasm { @@ -142,6 +143,11 @@ struct Precompute : public WalkerPass<PostWalker<Precompute, UnifiedExpressionVi ExpressionManipulator::nop(curr); } } + + void visitFunction(Function* curr) { + // removing breaks can alter types + ReFinalize().walkFunctionInModule(curr, getModule()); + } }; Pass *createPrecomputePass() { diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 9a2edcb6b..a90cba7a7 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -25,6 +25,13 @@ namespace wasm { +static int forceFull() { + if (getenv("BINARYEN_PRINT_FULL")) { + return std::stoi(getenv("BINARYEN_PRINT_FULL")); + } + return 0; +} + struct PrintSExpression : public Visitor<PrintSExpression> { std::ostream& o; unsigned indent = 0; @@ -41,9 +48,7 @@ struct PrintSExpression : public Visitor<PrintSExpression> { PrintSExpression(std::ostream& o) : o(o) { setMinify(false); - if (getenv("BINARYEN_PRINT_FULL")) { - full = std::stoi(getenv("BINARYEN_PRINT_FULL")); - } + if (!full) full = forceFull(); } void visit(Expression* curr) { @@ -802,7 +807,7 @@ std::ostream& WasmPrinter::printExpression(Expression* expression, std::ostream& } PrintSExpression print(o); print.setMinify(minify); - if (full) { + if (full || forceFull()) { print.setFull(true); o << "[" << printWasmType(expression->type) << "] "; } diff --git a/src/passes/RelooperJumpThreading.cpp b/src/passes/RelooperJumpThreading.cpp index d3fd1844f..3656b5523 100644 --- a/src/passes/RelooperJumpThreading.cpp +++ b/src/passes/RelooperJumpThreading.cpp @@ -22,9 +22,11 @@ #include "wasm.h" #include "pass.h" #include "ast_utils.h" +#include "ast/manipulation.h" namespace wasm { + static Name LABEL("label"); static Name getInnerName(int i) { @@ -147,6 +149,11 @@ struct RelooperJumpThreading : public WalkerPass<ExpressionStackWalker<RelooperJ } } + void visitFunction(Function* curr) { + // we may alter types + ReFinalize().walkFunctionInModule(curr, getModule()); + } + private: bool hasIrreducibleControlFlow(If* iff, Expression* origin) { diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 785b1c92e..4273b9f63 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -305,19 +305,7 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> { if (worked) { // Our work may alter block and if types, they may now return values that we made flow through them - struct TypeUpdater : public WalkerPass<PostWalker<TypeUpdater>> { - void visitBlock(Block* curr) { - curr->finalize(); - } - void visitLoop(Loop* curr) { - curr->finalize(); - } - void visitIf(If* curr) { - curr->finalize(); - } - }; - TypeUpdater typeUpdater; - typeUpdater.walkFunction(func); + ReFinalize().walkFunctionInModule(func, getModule()); } // thread trivial jumps @@ -370,18 +358,22 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> { } } - void finish() { + void finish(Function* func) { for (auto& iter : newNames) { auto* br = iter.first; auto name = iter.second; br->name = name; } + if (newNames.size() > 0) { + // by changing where brs go, we may change block types etc. + ReFinalize().walkFunctionInModule(func, getModule()); + } } }; JumpThreader jumpThreader; jumpThreader.setModule(getModule()); jumpThreader.walkFunction(func); - jumpThreader.finish(); + jumpThreader.finish(func); // perform some final optimizations struct FinalOptimizer : public PostWalker<FinalOptimizer> { @@ -461,6 +453,7 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> { )); curr->name = Name(); ExpressionManipulator::nop(br); + curr->finalize(curr->type); return; } } diff --git a/src/passes/RemoveUnusedNames.cpp b/src/passes/RemoveUnusedNames.cpp index f2c1f8525..39bf068df 100644 --- a/src/passes/RemoveUnusedNames.cpp +++ b/src/passes/RemoveUnusedNames.cpp @@ -72,6 +72,7 @@ struct RemoveUnusedNames : public WalkerPass<PostWalker<RemoveUnusedNames>> { WASM_UNREACHABLE(); } } + child->finalize(child->type); replaceCurrent(child); } } diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index 6609cfe71..a9e9de34b 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -47,6 +47,7 @@ #include <pass.h> #include <ast_utils.h> #include <ast/count.h> +#include <ast/manipulation.h> namespace wasm { diff --git a/src/passes/Vacuum.cpp b/src/passes/Vacuum.cpp index 13b9c62fd..1f820ec9d 100644 --- a/src/passes/Vacuum.cpp +++ b/src/passes/Vacuum.cpp @@ -22,14 +22,17 @@ #include <pass.h> #include <ast_utils.h> #include <wasm-builder.h> +#include <ast/block-utils.h> namespace wasm { -struct Vacuum : public WalkerPass<ExpressionStackWalker<Vacuum>> { +struct Vacuum : public WalkerPass<PostWalker<Vacuum>> { bool isFunctionParallel() override { return true; } Pass* create() override { return new Vacuum; } + bool needRefinalize = false; + // returns nullptr if curr is dead, curr if it must stay as is, or another node if it can be replaced Expression* optimize(Expression* curr, bool resultUsed) { while (1) { @@ -164,46 +167,45 @@ struct Vacuum : public WalkerPass<ExpressionStackWalker<Vacuum>> { list.push_back(last); } needResize = false; + needRefinalize = true; break; } } } if (needResize) { list.resize(size - skip); + // resizing means we drop elements, which may include breaks, which may + // render blocks unreachable now + needRefinalize = true; } - if (!curr->name.is()) { - if (list.size() == 1) { - // just one element. replace the block, either with it or with a nop if it's not needed - if (isConcreteWasmType(curr->type) || EffectAnalyzer(getPassOptions(), list[0]).hasSideEffects()) { - replaceCurrent(list[0]); - } else { - if (curr->type == unreachable) { - ExpressionManipulator::convert<Block, Unreachable>(curr); - } else { - ExpressionManipulator::nop(curr); - } - } - } else if (list.size() == 0) { - ExpressionManipulator::nop(curr); - } - } + // the block may now be a trivial one that we can get rid of and just leave its contents + replaceCurrent(BlockUtils::simplifyToContents(curr, this)); } void visitIf(If* curr) { // if the condition is a constant, just apply it // we can just return the ifTrue or ifFalse. if (auto* value = curr->condition->dynCast<Const>()) { + Expression* child; if (value->value.getInteger()) { - replaceCurrent(curr->ifTrue); - return; + child = curr->ifTrue; } else { if (curr->ifFalse) { - replaceCurrent(curr->ifFalse); + child = curr->ifFalse; } else { ExpressionManipulator::nop(curr); + return; } - return; } + replaceCurrent(child); + if (curr->type != child->type) { + // e.g., if (1) unreachable is none => unreachable + // or if i32 (1) unreachable else 10 is i32 => unreachable + // in which cases we must update our parents. + // we must do this now, so that our parents see valid data + ReFinalize().walk(getFunction()->body); + } + return; } if (curr->ifFalse) { if (curr->ifFalse->is<Nop>()) { @@ -253,8 +255,10 @@ struct Vacuum : public WalkerPass<ExpressionStackWalker<Vacuum>> { // if we are dropping a block's return value, we might be able to remove it entirely if (auto* block = curr->value->dynCast<Block>()) { auto* last = block->list.back(); - if (isConcreteWasmType(last->type)) { - assert(block->type == last->type); + // note that the last element may be concrete but not the block, if the + // block has an unreachable element in the middle, making the block unreachable + // despite later elements and in particular the last + if (isConcreteWasmType(last->type) && block->type == last->type) { last = optimize(last, false); if (!last) { // we may be able to remove this, if there are no brs @@ -303,6 +307,9 @@ struct Vacuum : public WalkerPass<ExpressionStackWalker<Vacuum>> { } void visitFunction(Function* curr) { + if (needRefinalize) { + ReFinalize().walk(curr->body); + } auto* optimized = optimize(curr->body, curr->result != none); if (optimized) { curr->body = optimized; diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 7b4a896eb..2fb0d5c3b 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -166,11 +166,7 @@ static void dumpWast(Name name, Module* wasm) { } void PassRunner::run() { - // BINARYEN_PASS_DEBUG is a convenient commandline way to log out the toplevel passes, their times, - // and validate between each pass. - // (we don't recurse pass debug into sub-passes, as it doesn't help anyhow and - // also is bad for e.g. printing which is a pass) - static const int passDebug = getenv("BINARYEN_PASS_DEBUG") ? atoi(getenv("BINARYEN_PASS_DEBUG")) : 0; + static const int passDebug = getPassDebug(); if (!isNested && (options.debug || passDebug)) { // for debug logging purposes, run each pass in full before running the other auto totalTime = std::chrono::duration<double>(0); @@ -212,7 +208,7 @@ void PassRunner::run() { if (passDebug >= 2) { std::cerr << "Last pass (" << pass->name << ") broke validation. Here is the module before: \n" << moduleBefore.str() << "\n"; } else { - std::cerr << "Last pass (" << pass->name << ") broke validation. Run with BINARYEN_PASS_DEBUG=2 in the env to see the earlier state (FIXME: this is broken, need to prevent recursion of the print pass\n"; + std::cerr << "Last pass (" << pass->name << ") broke validation. Run with BINARYEN_PASS_DEBUG=2 in the env to see the earlier state, or 3 to dump byn-* files for each pass\n"; } abort(); } @@ -300,4 +296,9 @@ void PassRunner::runPassOnFunction(Pass* pass, Function* func) { instance->runFunction(this, wasm, func); } +int PassRunner::getPassDebug() { + static const int passDebug = getenv("BINARYEN_PASS_DEBUG") ? atoi(getenv("BINARYEN_PASS_DEBUG")) : 0; + return passDebug; +} + } // namespace wasm diff --git a/src/s2wasm.h b/src/s2wasm.h index d0579a402..6ba4750ec 100644 --- a/src/s2wasm.h +++ b/src/s2wasm.h @@ -1185,6 +1185,7 @@ class S2WasmBuilder { if (isConcreteWasmType(block->type) && block->list.size() == 0) { // empty blocks that return a value are not valid, fix that up block->list.push_back(allocator->alloc<Unreachable>()); + block->finalize(); } bstack.pop_back(); } else if (peek(".LBB")) { @@ -1207,7 +1208,7 @@ class S2WasmBuilder { } else if (match("end_loop")) { auto* loop = bstack.back()->cast<Loop>(); bstack.pop_back(); - loop->body->finalize(); + loop->body->cast<Block>()->finalize(); loop->finalize(loop->type); } else if (match("br_table")) { auto curr = allocator->alloc<Switch>(); @@ -1293,7 +1294,7 @@ class S2WasmBuilder { bstack.pop_back(); // remove the base block for the function body assert(bstack.empty()); assert(estack.empty()); - func->body->dynCast<Block>()->finalize(); + func->body->cast<Block>()->finalize(); wasm->addFunction(func); } diff --git a/src/tools/asm2wasm.cpp b/src/tools/asm2wasm.cpp index dd2b6a791..24cff6f09 100644 --- a/src/tools/asm2wasm.cpp +++ b/src/tools/asm2wasm.cpp @@ -24,6 +24,7 @@ #include "wasm-builder.h" #include "wasm-printing.h" #include "wasm-io.h" +#include "wasm-validator.h" #include "asm2wasm.h" @@ -189,7 +190,11 @@ int main(int argc, const char *argv[]) { } } - if (options.debug) std::cerr << "printing..." << std::endl; + if (!WasmValidator().validate(wasm)) { + Fatal() << "error in validating output"; + } + + if (options.debug) std::cerr << "emitting..." << std::endl; ModuleWriter writer; writer.setDebug(options.debug); writer.setDebugInfo(passOptions.debugInfo); diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 04e0aa4de..42d3dfe47 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -17,7 +17,8 @@ #ifndef wasm_wasm_builder_h #define wasm_wasm_builder_h -#include <wasm.h> +#include "wasm.h" +#include "ast/manipulation.h" namespace wasm { @@ -379,6 +380,25 @@ public: std::swap(iff->ifTrue, iff->ifFalse); iff->condition = makeUnary(EqZInt32, iff->condition); } + + // returns a replacement with the precise same type, and with + // minimal contents. as a replacement, this may reuse the + // input node + template<typename T> + Expression* replaceWithIdenticalType(T* curr) { + Literal value; + // TODO: reuse node conditionally when possible for literals + switch (curr->type) { + case i32: value = Literal(int32_t(0)); break; + case i64: value = Literal(int64_t(0)); break; + case f32: value = Literal(float(0)); break; + case f64: value = Literal(double(0)); break; + case none: return ExpressionManipulator::nop(curr); + case unreachable: return ExpressionManipulator::convert<T, Unreachable>(curr); + } + return makeConst(value); + } + }; } // namespace wasm diff --git a/src/wasm-module-building.h b/src/wasm-module-building.h index 023a2d020..b501a1ab6 100644 --- a/src/wasm-module-building.h +++ b/src/wasm-module-building.h @@ -94,6 +94,7 @@ public: : wasm(wasm), numFunctions(numFunctions), passOptions(passOptions), addPrePasses(addPrePasses), endMarker(nullptr), list(nullptr), nextFunction(0), numWorkers(0), liveWorkers(0), activeWorkers(0), availableFuncs(0), finishedFuncs(0), finishing(false), debug(debug), validateGlobally(validateGlobally) { + if (!useWorkers()) { // if we shouldn't use threads, don't return; @@ -136,7 +137,7 @@ public: } bool useWorkers() { - return numFunctions > 0 && !debug && ThreadPool::getNumCores() > 1; + return numFunctions > 0 && !debug && ThreadPool::getNumCores() > 1 && !PassRunner::getPassDebug(); } // Add a function to the module, and to be optimized diff --git a/src/wasm-traversal.h b/src/wasm-traversal.h index 522f5092c..f49407cad 100644 --- a/src/wasm-traversal.h +++ b/src/wasm-traversal.h @@ -157,7 +157,11 @@ struct Walker : public VisitorType { // just one visit*() method is called by the traversal; if you replace a node, // and you want to process the output, you must do that explicitly). Expression* replaceCurrent(Expression* expression) { - return replace = expression; + return *replacep = expression; + } + + Expression* getCurrent() { + return *replacep; } // Get the current module @@ -184,6 +188,15 @@ struct Walker : public VisitorType { setFunction(nullptr); } + void walkFunctionInModule(Function* func, Module* module) { + setModule(module); + setFunction(func); + static_cast<SubType*>(this)->doWalkFunction(func); + static_cast<SubType*>(this)->visitFunction(func); + setFunction(nullptr); + setModule(nullptr); + } + // override this to provide custom functionality void doWalkFunction(Function* func) { walk(func->body); @@ -264,12 +277,9 @@ struct Walker : public VisitorType { pushTask(SubType::scan, &root); while (stack.size() > 0) { auto task = popTask(); + replacep = task.currp; assert(*task.currp); task.func(static_cast<SubType*>(this), task.currp); - if (replace) { - *task.currp = replace; - replace = nullptr; - } } } @@ -311,7 +321,7 @@ struct Walker : public VisitorType { } private: - Expression* replace = nullptr; // a node to replace + Expression** replacep = nullptr; // the address of the current node, used to replace it std::vector<Task> stack; // stack of tasks Function* currFunction = nullptr; // current function being processed Module* currModule = nullptr; // current module being processed @@ -571,6 +581,13 @@ struct ExpressionStackWalker : public PostWalker<SubType, VisitorType> { self->pushTask(SubType::doPreVisit, currp); } + + Expression* replaceCurrent(Expression* expression) { + PostWalker<SubType, VisitorType>::replaceCurrent(expression); + // also update the stack + expressionStack.back() = expression; + return expression; + } }; // Traversal in the order of execution. This is quick and simple, but diff --git a/src/wasm-validator.h b/src/wasm-validator.h index b657b67a6..66375790c 100644 --- a/src/wasm-validator.h +++ b/src/wasm-validator.h @@ -42,6 +42,7 @@ #include "support/colors.h" #include "wasm.h" #include "wasm-printing.h" +#include "ast_utils.h" namespace wasm { @@ -68,7 +69,7 @@ struct WasmValidator : public PostWalker<WasmValidator> { void noteLabelName(Name name) { if (!name.is()) return; - shouldBeTrue(labelNames.find(name) == labelNames.end(), name, "names in Binaren IR must be unique - IR generators must ensure that"); + shouldBeTrue(labelNames.find(name) == labelNames.end(), name, "names in Binaryen IR must be unique - IR generators must ensure that"); labelNames.insert(name); } @@ -76,7 +77,13 @@ public: bool validate(Module& module, bool validateWeb_ = false, bool validateGlobally_ = true) { validateWeb = validateWeb_; validateGlobally = validateGlobally_; + // wasm logic validation walkModule(&module); + // validate additional internal IR details when in pass-debug mode + if (PassRunner::getPassDebug()) { + validateBinaryenIR(module); + } + // print if an error occurred if (!valid) { WasmPrinter::printModule(&module, std::cerr); } @@ -222,16 +229,23 @@ public: } } void visitBreak(Break *curr) { - noteBreak(curr->name, curr->value, curr); + // note breaks (that are actually taken) + if ((!curr->value || curr->value->type != unreachable) && + (!curr->condition || curr->condition->type != unreachable)) { + noteBreak(curr->name, curr->value, curr); + } if (curr->condition) { shouldBeTrue(curr->condition->type == unreachable || curr->condition->type == i32, curr, "break condition must be i32"); } } void visitSwitch(Switch *curr) { - for (auto& target : curr->targets) { - noteBreak(target, curr->value, curr); + // note breaks (that are actually taken) + if (curr->condition->type != unreachable && (!curr->value || curr->value->type != unreachable)) { + for (auto& target : curr->targets) { + noteBreak(target, curr->value, curr); + } + noteBreak(curr->default_, curr->value, curr); } - noteBreak(curr->default_, curr->value, curr); shouldBeTrue(curr->condition->type == unreachable || curr->condition->type == i32, curr, "br_table condition must be i32"); } void visitCall(Call *curr) { @@ -612,8 +626,6 @@ public: PostWalker<WasmValidator>::doWalkFunction(func); } -private: - // helpers std::ostream& fail() { @@ -718,6 +730,37 @@ private: default: {} } } + + void validateBinaryenIR(Module& wasm) { + struct BinaryenIRValidator : public PostWalker<BinaryenIRValidator, UnifiedExpressionVisitor<BinaryenIRValidator>> { + WasmValidator& parent; + + BinaryenIRValidator(WasmValidator& parent) : parent(parent) {} + + void visitExpression(Expression* curr) { + // check if a node type is 'stale', i.e., we forgot to finalize() the node. + auto oldType = curr->type; + ReFinalizeNode().visit(curr); + auto newType = curr->type; + if (newType != oldType) { + // We accept concrete => undefined, + // e.g. + // + // (drop (block i32 (unreachable))) + // + // The block has an added type, not derived from the ast itself, so it is + // ok for it to be either i32 or unreachable. + if (!(isConcreteWasmType(oldType) && newType == unreachable)) { + parent.fail() << "stale type found in " << getFunction()->name << " on " << curr << "\n(marked as " << printWasmType(oldType) << ", should be " << printWasmType(newType) << ")\n"; + parent.valid = false; + } + curr->type = oldType; + } + } + }; + BinaryenIRValidator binaryenIRValidator(*this); + binaryenIRValidator.walkModule(&wasm); + } }; } // namespace wasm diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index f9e3912f7..9b053d5b8 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -185,9 +185,11 @@ Element* SExpressionParser::parseString() { input++; std::string str; while (1) { + if (input[0] == 0) throw ParseException("unterminated string", line, start - lineStart); if (input[0] == '"') break; if (input[0] == '\\') { str += input[0]; + if (input[1] == 0) throw ParseException("unterminated string escape", line, start - lineStart); str += input[1]; input += 2; continue; |