diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg/liveness-traversal.h | 16 | ||||
-rw-r--r-- | src/ir/equivalent_sets.h | 94 | ||||
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 8 | ||||
-rw-r--r-- | src/passes/ConstHoisting.cpp | 7 | ||||
-rw-r--r-- | src/passes/SimplifyLocals.cpp | 277 | ||||
-rw-r--r-- | src/passes/pass.cpp | 6 | ||||
-rw-r--r-- | src/wasm/wasm-validator.cpp | 1 |
7 files changed, 329 insertions, 80 deletions
diff --git a/src/cfg/liveness-traversal.h b/src/cfg/liveness-traversal.h index 8ba80e015..edbe52fd6 100644 --- a/src/cfg/liveness-traversal.h +++ b/src/cfg/liveness-traversal.h @@ -26,6 +26,7 @@ #include "wasm-builder.h" #include "wasm-traversal.h" #include "cfg-traversal.h" +#include "ir/utils.h" namespace wasm { @@ -60,6 +61,21 @@ struct LivenessAction { bool isGet() { return what == Get; } bool isSet() { return what == Set; } bool isOther() { return what == Other; } + + // Helper to remove a set that is a copy we know is not needed. This + // updates both the IR and the action. + void removeCopy() { + auto* set = (*origin)->cast<SetLocal>(); + if (set->isTee()) { + *origin = set->value->cast<GetLocal>(); + } else { + ExpressionManipulator::nop(set); + } + // Mark as an other: even if we turned the origin into a get, + // we already have another Action for that get, that properly + // represents it. + what = Other; + } }; // information about liveness in a basic block diff --git a/src/ir/equivalent_sets.h b/src/ir/equivalent_sets.h new file mode 100644 index 000000000..d1a957bbd --- /dev/null +++ b/src/ir/equivalent_sets.h @@ -0,0 +1,94 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_equivalent_sets_h +#define wasm_ir_equivalent_sets_h + +#include <wasm.h> + +namespace wasm { + +// +// A map of each index to all those it is equivalent to, and some helpers. +// +struct EquivalentSets { + // A set of indexes. + typedef std::unordered_set<Index> Set; + + std::unordered_map<Index, std::shared_ptr<Set>> indexSets; + + // Clears the state completely, removing all equivalences. + void clear() { + indexSets.clear(); + } + + // Resets an index, removing any equivalences between it and others. + void reset(Index index) { + auto iter = indexSets.find(index); + if (iter != indexSets.end()) { + auto& set = iter->second; + assert(!set->empty()); // can't be empty - we are equal to ourselves! + if (set->size() > 1) { + // We are not the last item, fix things up + set->erase(index); + } + indexSets.erase(iter); + } + } + + // Adds a new equivalence between two indexes. + // `justReset` is an index that was just reset, and has no + // equivalences. `other` may have existing equivalences. + void add(Index justReset, Index other) { + auto iter = indexSets.find(other); + if (iter != indexSets.end()) { + auto& set = iter->second; + set->insert(justReset); + indexSets[justReset] = set; + } else { + auto set = std::make_shared<Set>(); + set->insert(justReset); + set->insert(other); + indexSets[justReset] = set; + indexSets[other] = set; + } + } + + // Checks whether two indexes contain the same data. + bool check(Index a, Index b) { + if (a == b) return true; + if (auto* set = getEquivalents(a)) { + if (set->find(b) != set->end()) { + return true; + } + } + return false; + } + + // Returns the equivalent set, or nullptr + Set* getEquivalents(Index index) { + auto iter = indexSets.find(index); + if (iter != indexSets.end()) { + return iter->second.get(); + } + return nullptr; + } +}; + +} // namespace wasm + +#endif // wasm_ir_equivalent_sets_h + diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index ef0699436..63912db19 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -342,17 +342,13 @@ void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) if (action.isGet()) { auto* get = (*action.origin)->cast<GetLocal>(); get->index = indices[get->index]; - } else { + } else if (action.isSet()) { auto* set = (*action.origin)->cast<SetLocal>(); set->index = indices[set->index]; // in addition, we can optimize out redundant copies and ineffective sets GetLocal* get; if ((get = set->value->dynCast<GetLocal>()) && get->index == set->index) { - if (set->isTee()) { - *action.origin = get; - } else { - ExpressionManipulator::nop(set); - } + action.removeCopy(); continue; } // remove ineffective actions diff --git a/src/passes/ConstHoisting.cpp b/src/passes/ConstHoisting.cpp index e808f7af4..e598a2995 100644 --- a/src/passes/ConstHoisting.cpp +++ b/src/passes/ConstHoisting.cpp @@ -22,7 +22,12 @@ // WARNING: this often shrinks code size, but can *increase* gzip // size. apparently having the constants in their proper // places lets them be compressed better, across -// functions, etc. +// functions, etc. TODO investigate +// TODO: hoisting a zero does not even require an initial set! +// TODO: hoisting a float or double zero is especially beneficial as there +// is no LEB compression for them, and no need for the set, so +// each f32.const is 5 bytes and f64.const is 9 bytes, while it is +// <= 1 byte to declare the local and 2-3 to use it! // #include <map> diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index af5a9a4c4..27d868da1 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -52,30 +52,12 @@ #include <pass.h> #include <ir/count.h> #include <ir/effects.h> +#include "ir/equivalent_sets.h" #include <ir/find_all.h> #include <ir/manipulation.h> namespace wasm { -// Helper classes - -struct SetLocalRemover : public PostWalker<SetLocalRemover> { - std::vector<Index>* numGetLocals; - - void visitSetLocal(SetLocal *curr) { - if ((*numGetLocals)[curr->index] == 0) { - auto* value = curr->value; - if (curr->isTee()) { - replaceCurrent(value); - } else { - Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr); - drop->value = value; - drop->finalize(); - } - } - } -}; - // Main class template<bool allowTee = true, bool allowStructure = true, bool allowNesting = true> @@ -130,6 +112,7 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a GetLocalCounter getCounter; static void doNoteNonLinear(SimplifyLocals<allowTee, allowStructure, allowNesting>* self, Expression** currp) { + // Main processing. auto* curr = *currp; if (curr->is<Break>()) { auto* br = curr->cast<Break>(); @@ -583,69 +566,219 @@ struct SimplifyLocals : public WalkerPass<LinearExecutionWalker<SimplifyLocals<a // output patterns. further cycles do fully general sinking. firstCycle = true; do { - anotherCycle = false; - // main operation - WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func); - // enlarge blocks that were marked, for the next round - if (blocksToEnlarge.size() > 0) { - for (auto* block : blocksToEnlarge) { - block->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + anotherCycle = runMainOptimizations(func); + // After the special first cycle, definitely do another. + if (firstCycle) { + firstCycle = false; + anotherCycle = true; + } + // If we are all done, run the final optimizations, which may suggest we + // can do more work. + if (!anotherCycle) { + // Don't run multiple cycles of just the final optimizations - in + // particular, get canonicalization is not guaranteed to converge. + // Instead, if final opts help then see if they enable main + // opts; continue only if they do. In other words, do not end up + // doing final opts again and again when no main opts are being + // enabled. + if (runFinalOptimizations(func) && runMainOptimizations(func)) { + anotherCycle = true; } - blocksToEnlarge.clear(); + } + } while (anotherCycle); + } + + bool runMainOptimizations(Function* func) { + anotherCycle = false; + WalkerPass<LinearExecutionWalker<SimplifyLocals<allowTee, allowStructure, allowNesting>>>::doWalkFunction(func); + // enlarge blocks that were marked, for the next round + if (blocksToEnlarge.size() > 0) { + for (auto* block : blocksToEnlarge) { + block->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + blocksToEnlarge.clear(); + anotherCycle = true; + } + // enlarge ifs that were marked, for the next round + if (ifsToEnlarge.size() > 0) { + for (auto* iff : ifsToEnlarge) { + auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue); + iff->ifTrue = ifTrue; + if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) { + ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse); + iff->ifFalse = ifFalse; + if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) { + ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + } + ifsToEnlarge.clear(); + anotherCycle = true; + } + // handle loops. note that a lot happens in this pass, and we can't just modify + // set_locals when we see a loop - it might be tracked - and we can't also just + // assume our loop didn't move either (might be in a block now). So we do this + // when all other work is done - as loop return values are rare, that is fine. + if (!anotherCycle) { + for (auto* currp : loops) { + auto* curr = (*currp)->template cast<Loop>(); + assert(canUseLoopReturnValue(curr)); + auto* set = curr->body->template cast<SetLocal>(); + curr->body = set->value; + set->value = curr; + curr->finalize(curr->body->type); + *currp = set; anotherCycle = true; } - // enlarge ifs that were marked, for the next round - if (ifsToEnlarge.size() > 0) { - for (auto* iff : ifsToEnlarge) { - auto ifTrue = Builder(*this->getModule()).blockify(iff->ifTrue); - iff->ifTrue = ifTrue; - if (ifTrue->list.size() == 0 || !ifTrue->list.back()->template is<Nop>()) { - ifTrue->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } + loops.clear(); + // clean up + sinkables.clear(); + blockBreaks.clear(); + unoptimizableBlocks.clear(); + return anotherCycle; + } + + bool runFinalOptimizations(Function* func) { + // Finally, after optimizing a function we can do some additional + // optimization. + getCounter.analyze(func); + // Remove equivalent copies - assignment of + // a local to another local that already contains that value. Note that + // we do that at the very end, and only after structure, as removing + // the copy here: + // (if + // (get_local $var$0) + // (set_local $var$0 + // (get_local $var$0) + // ) + // (set_local $var$0 + // (i32.const 208) + // ) + // ) + // will inhibit us creating an if return value. + struct EquivalentOptimizer : public LinearExecutionWalker<EquivalentOptimizer> { + std::vector<Index>* numGetLocals; + bool removeEquivalentSets; + Module* module; + + bool anotherCycle = false; + + // We track locals containing the same value. + EquivalentSets equivalences; + + static void doNoteNonLinear(EquivalentOptimizer* self, Expression** currp) { + // TODO do this across non-linear paths too, in coalesce-locals perhaps? (would inhibit structure + // opts here, though. + self->equivalences.clear(); + } + + void visitSetLocal(SetLocal *curr) { + // Remove trivial copies, even through a tee + auto* value = curr->value; + while (auto* subSet = value->dynCast<SetLocal>()) { + value = subSet->value; + } + if (auto* get = value->dynCast<GetLocal>()) { + if (equivalences.check(curr->index, get->index)) { + // This is an unnecessary copy! + if (removeEquivalentSets) { + if (curr->isTee()) { + this->replaceCurrent(value); + } else if (curr->value->is<GetLocal>()) { + ExpressionManipulator::nop(curr); + } else { + this->replaceCurrent(Builder(*module).makeDrop(value)); + } + anotherCycle = true; + } + } else { + // There is a new equivalence now. + equivalences.reset(curr->index); + equivalences.add(curr->index, get->index); } - auto ifFalse = Builder(*this->getModule()).blockify(iff->ifFalse); - iff->ifFalse = ifFalse; - if (ifFalse->list.size() == 0 || !ifFalse->list.back()->template is<Nop>()) { - ifFalse->list.push_back(this->getModule()->allocator.template alloc<Nop>()); + } else { + // A new value is assigned here. + equivalences.reset(curr->index); + } + } + + void visitGetLocal(GetLocal *curr) { + // Canonicalize gets: if some are equivalent, then we can pick more + // then one, and other passes may benefit from having more uniformity. + if (auto *set = equivalences.getEquivalents(curr->index)) { + // Pick the index with the most uses - maximizing the chance to + // lower one's uses to zero. + // Helper method that returns the # of gets *ignoring the current get*, + // as we want to see what is best overall, treating this one as + // to be decided upon. + auto getNumGetsIgnoringCurr = [&](Index index) { + auto ret = (*numGetLocals)[index]; + if (index == curr->index) { + assert(ret >= 1); + ret--; + } + return ret; + }; + Index best = -1; + for (auto index : *set) { + if (best == Index(-1) || + getNumGetsIgnoringCurr(index) > getNumGetsIgnoringCurr(best)) { + best = index; + } + } + assert(best != Index(-1)); + // Due to ordering, the best index may be different from us but have + // the same # of locals - make sure we actually improve. + if (best != curr->index && + getNumGetsIgnoringCurr(best) > getNumGetsIgnoringCurr(curr->index)) { + // Update the get counts. + (*numGetLocals)[best]++; + assert((*numGetLocals)[curr->index] >= 1); + (*numGetLocals)[curr->index]--; + // Make the change. + curr->index = best; + anotherCycle = true; } } - ifsToEnlarge.clear(); - anotherCycle = true; } - // handle loops. note that a lot happens in this pass, and we can't just modify - // set_locals when we see a loop - it might be tracked - and we can't also just - // assume our loop didn't move either (might be in a block now). So we do this - // when all other work is done - as loop return values are rare, that is fine. - if (!anotherCycle) { - for (auto* currp : loops) { - auto* curr = (*currp)->template cast<Loop>(); - assert(canUseLoopReturnValue(curr)); - auto* set = curr->body->template cast<SetLocal>(); - curr->body = set->value; - set->value = curr; - curr->finalize(curr->body->type); - *currp = set; + }; + + EquivalentOptimizer eqOpter; + eqOpter.module = this->getModule(); + eqOpter.numGetLocals = &getCounter.num; + eqOpter.removeEquivalentSets = allowStructure; + eqOpter.walkFunction(func); + + // We may have already had a local with no uses, or we may have just + // gotten there thanks to the EquivalentOptimizer. If there are such + // locals, remove all their sets. + struct UneededSetRemover : public PostWalker<UneededSetRemover> { + std::vector<Index>* numGetLocals; + + bool anotherCycle = false; + + void visitSetLocal(SetLocal *curr) { + if ((*numGetLocals)[curr->index] == 0) { + auto* value = curr->value; + if (curr->isTee()) { + this->replaceCurrent(value); + } else { + Drop* drop = ExpressionManipulator::convert<SetLocal, Drop>(curr); + drop->value = value; + drop->finalize(); + } anotherCycle = true; } } - loops.clear(); - // clean up - sinkables.clear(); - blockBreaks.clear(); - unoptimizableBlocks.clear(); - if (firstCycle) { - firstCycle = false; - anotherCycle = true; - } - } while (anotherCycle); - // Finally, after optimizing a function, we can see if we have set_locals - // for a local with no remaining gets, in which case, we can - // remove the set. - // First, recount get_locals - getCounter.analyze(func); - // Second, remove unneeded sets - SetLocalRemover remover; - remover.numGetLocals = &getCounter.num; - remover.walkFunction(func); + }; + + UneededSetRemover setRemover; + setRemover.numGetLocals = &getCounter.num; + setRemover.walkFunction(func); + + return eqOpter.anotherCycle || setRemover.anotherCycle; } bool canUseLoopReturnValue(Loop* curr) { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 2d93d99ae..5c43aaa4e 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -198,7 +198,11 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { static void dumpWast(Name name, Module* wasm) { // write out the wast static int counter = 0; - auto fullName = std::string("byn-") + std::to_string(counter++) + "-" + name.str + ".wasm"; + std::string numstr = std::to_string(counter++); + while (numstr.size() < 3) { + numstr = '0' + numstr; + } + auto fullName = std::string("byn-") + numstr + "-" + name.str + ".wasm"; Colors::disable(); ModuleWriter writer; writer.setBinary(false); // TODO: add an option for binary diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 07f0a634b..7246a15a6 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -474,6 +474,7 @@ void FunctionValidator::visitCallIndirect(CallIndirect* curr) { void FunctionValidator::visitGetLocal(GetLocal* curr) { shouldBeTrue(curr->index < getFunction()->getNumLocals(), curr, "get_local index must be small enough"); shouldBeTrue(isConcreteType(curr->type), curr, "get_local must have a valid type - check what you provided when you constructed the node"); + shouldBeTrue(curr->type == getFunction()->getLocalType(curr->index), curr, "get_local must have proper type"); } void FunctionValidator::visitSetLocal(SetLocal* curr) { |