/* * Copyright 2017 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Eliminate redundant local.sets: if a local already has a particular // value, we don't need to set it again. A common case here is loops // that start at zero, since the default value is initialized to // zero anyhow. // // A risk here is that we extend live ranges, e.g. we may use the default // value at the very end of a function, keeping that local alive throughout. // For that reason it is probably better to run this near the end of // optimization, and especially after coalesce-locals. A final vaccum // should be done after it, as this pass can leave around drop()s of // values no longer necessary. // // So far this tracks constant values, and for everything else it considers // them unique (so each local.set of a non-constant is a unique value, each // merge is a unique value, etc.; there is no sophisticated value numbering // here). // #include #include #include #include #include #include #include #include #include #include namespace wasm { // Map each local index to its current value number (which is computed in // ValueNumbering). using LocalValues = std::vector; namespace { // information in a basic block struct Info { LocalValues start, end; // the local values at the start and end of the block std::vector items; }; struct RedundantSetElimination : public WalkerPass, Info>> { bool isFunctionParallel() override { return true; } std::unique_ptr create() override { return std::make_unique(); } // Branches outside of the function can be ignored, as we only look at locals // which vanish when we leave. bool ignoreBranchesOutsideOfFunc = true; Index numLocals; // In rare cases we make a change to a type that requires a refinalize. bool refinalize = false; // cfg traversal work static void doVisitLocalGet(RedundantSetElimination* self, Expression** currp) { if (self->currBasicBlock) { self->currBasicBlock->contents.items.push_back(currp); } } static void doVisitLocalSet(RedundantSetElimination* self, Expression** currp) { if (self->currBasicBlock) { self->currBasicBlock->contents.items.push_back(currp); } } // main entry point void doWalkFunction(Function* func) { numLocals = func->getNumLocals(); if (numLocals == 0) { return; // nothing to do } // Create a unique value for use to mark unseen locations. unseenValue = valueNumbering.getUniqueValue(); // create the CFG by walking the IR CFGWalker, Info>:: doWalkFunction(func); // flow values across blocks flowValues(func); // remove redundant sets optimize(func); if (refinalize) { ReFinalize().walkFunctionInModule(func, this->getModule()); } } // Use a value numbering for the values of expressions. ValueNumbering valueNumbering; // In additon to valueNumbering, each block has values for each merge. std::unordered_map> blockMergeValues; // A value that indicates we haven't seen this location yet. Index unseenValue; Index getUniqueValue() { auto value = valueNumbering.getUniqueValue(); #ifdef RSE_DEBUG std::cout << "new unique value " << value << '\n'; #endif return value; } Index getValue(Literals lit) { auto value = valueNumbering.getValue(lit); #ifdef RSE_DEBUG std::cout << "lit value " << value << '\n'; #endif return value; } Index getBlockMergeValue(BasicBlock* block, Index index) { auto& mergeValues = blockMergeValues[block]; auto iter = mergeValues.find(index); if (iter != mergeValues.end()) { return iter->second; } #ifdef RSE_DEBUG std::cout << "new block-merge value for " << block << " : " << index << '\n'; #endif return mergeValues[index] = getUniqueValue(); } bool isBlockMergeValue(BasicBlock* block, Index index, Index value) { auto iter = blockMergeValues.find(block); if (iter == blockMergeValues.end()) { return false; } auto& mergeValues = iter->second; auto iter2 = mergeValues.find(index); if (iter2 == mergeValues.end()) { return false; } return value == iter2->second; } Index getValue(Expression* expr, LocalValues& currValues) { if (auto* get = expr->dynCast()) { // a copy of whatever that was return currValues[get->index]; } auto value = valueNumbering.getValue(expr); #ifdef RSE_DEBUG std::cout << "expr value " << value << '\n'; #endif return value; } // flowing void flowValues(Function* func) { for (auto& block : basicBlocks) { LocalValues& start = block->contents.start; start.resize(numLocals); if (block.get() == entry) { // params are complex values we can't optimize; vars are zeros for (Index i = 0; i < numLocals; i++) { auto type = func->getLocalType(i); if (func->isParam(i)) { #ifdef RSE_DEBUG std::cout << "new param value for " << i << '\n'; #endif start[i] = getUniqueValue(); } else if (!LiteralUtils::canMakeZero(type)) { #ifdef RSE_DEBUG std::cout << "new unique value for non-zeroable " << i << '\n'; #endif start[i] = getUniqueValue(); } else { start[i] = getValue(Literal::makeZeros(type)); } } } else { // other blocks have all unseen values to begin with for (Index i = 0; i < numLocals; i++) { start[i] = unseenValue; } } // the ends all begin unseen LocalValues& end = block->contents.end; end.resize(numLocals); for (Index i = 0; i < numLocals; i++) { end[i] = unseenValue; } } // keep working while stuff is flowing. we use a unique deferred queue // which ensures both FIFO and that we don't do needless work - if // A and B reach C, and both queue C, we only want to do C at the latest // time, when we have information from all those reaching it. UniqueDeferredQueue work; work.push(entry); while (!work.empty()) { auto* curr = work.pop(); #ifdef RSE_DEBUG std::cout << "flow block " << curr << '\n'; #endif // process a block: first, update its start based on those reaching it if (!curr->in.empty()) { if (curr->in.size() == 1) { // just copy the pred, nothing to merge curr->contents.start = (*curr->in.begin())->contents.end; } else { // perform a merge auto in = curr->in; for (Index i = 0; i < numLocals; i++) { auto old = curr->contents.start[i]; // If we already had a merge value here, keep it. // TODO This may have some false positives, as we may e.g. have // a single pred that first gives us x, then later y after // flow led to a merge, and we may see x and y at the same // time due to flow from a successor, and then it looks like // we need a merge but we don't. avoiding that would require // more memory and is probably not worth it, but might be // worth investigating // NB While suboptimal, this simplification provides a simple proof // of convergence. We prove that, in each fixed block+local, // the value number at the end is nondecreasing across // iterations, by induction on the iteration: // * The first iteration is on the entry block. It increases // the value number at the end from 0 (unseen) to something // else (a value number for 0 for locals, a unique value // for params; all >0). // * Induction step: assuming the property holds for all past // iterations, consider the current iteration. Of our // predecessors, those that we iterated on have the property; // those that we haven't will have 0 (unseen). // * If we assign to that local in this block, that will be // the value in the output, forever, and it is greater // than the initial value of 0. // * If we see different values coming in, we create a merge // value number. Its number is higher than everything // else since we give it the next available number, so we // do not decrease in this iteration, and we will output // the same value in the future too (here is where we use // the simplification property). // * Otherwise, we will flow the incoming value through, // and it did not decrease (by induction), so neither do // we. // Finally, given value numbers are nondecreasing, we must // converge since we only keep working as long as we see new // values at the end of a block. // // Not that we don't trust this proof, but the convergence // property (value numbers at block ends do not decrease) is // verified later down. if (isBlockMergeValue(curr, i, old)) { continue; } auto iter = in.begin(); auto value = (*iter)->contents.end[i]; iter++; while (iter != in.end()) { auto otherValue = (*iter)->contents.end[i]; if (value == unseenValue) { value = otherValue; } else if (otherValue == unseenValue) { // nothing to do, other has no information } else if (value != otherValue) { // 2 different values, this is a merged value value = getBlockMergeValue(curr, i); break; // no more work once we see a merge } iter++; } curr->contents.start[i] = value; } } } #ifdef RSE_DEBUG dump("start", curr->contents.start); #endif // flow values through it, then add those we can reach if they need an // update. auto currValues = curr->contents.start; // we'll modify this as we go auto& items = curr->contents.items; for (auto** item : items) { if (auto* set = (*item)->dynCast()) { auto* value = Properties::getFallthrough( set->value, getPassOptions(), *getModule()); currValues[set->index] = getValue(value, currValues); } } if (currValues == curr->contents.end) { // nothing changed, so no more work to do // note that the first iteration this is always not the case, // since end contains unseen (and then the comparison ends on // the first element) continue; } // update the end state and update children #ifndef NDEBUG // verify the convergence property mentioned in the NB comment // above: the value numbers at the end must be nondecreasing for (Index i = 0; i < numLocals; i++) { assert(currValues[i] >= curr->contents.end[i]); } #endif curr->contents.end.swap(currValues); #ifdef RSE_DEBUG dump("end ", curr->contents.end); #endif for (auto* next : curr->out) { work.push(next); } } } // optimizing void optimize(Function* func) { // Find which locals are refinable, that is, that when we see a global.get // of them we may consider switching to another local index that has the // same value but in a refined type. Computing which locals are relevant for // that optimization is efficient because it avoids a bunch of work below // for hashing numbers etc. std::vector isRefinable(numLocals, false); for (Index i = 0; i < numLocals; i++) { // TODO: we could also note which locals have "maximal" types, where no // other local is a refinement of them if (func->getLocalType(i).isRef()) { isRefinable[i] = true; } } // in each block, run the values through the sets, // and remove redundant sets when we see them for (auto& block : basicBlocks) { auto currValues = block->contents.start; // we'll modify this as we go auto& items = block->contents.items; // Set up the equivalences at the beginning of the block. We'll update // them as we go, so we can use them at any point in the middle. This data // structure maps a value number to the local indexes that have that // value. // // Note that the set here must be ordered to avoid nondeterminism when // picking between multiple equally-good indexes (we'll pick the first in // the iteration, which will have the lowest index). std::unordered_map> valueToLocals; assert(currValues.size() == numLocals); for (Index i = 0; i < numLocals; i++) { if (isRefinable[i]) { valueToLocals[currValues[i]].insert(i); } } for (auto** item : items) { if (auto* set = (*item)->dynCast()) { auto oldValue = currValues[set->index]; auto* value = Properties::getFallthrough( set->value, getPassOptions(), *getModule()); auto newValue = getValue(value, currValues); auto index = set->index; if (newValue == oldValue) { remove(item); } else { // update for later steps currValues[index] = newValue; if (isRefinable[index]) { valueToLocals[oldValue].erase(index); valueToLocals[newValue].insert(index); } } continue; } // For gets, see if there is another index with that value, of a more // refined type. auto* get = (*item)->dynCast(); if (!isRefinable[get->index]) { continue; } for (auto i : valueToLocals[getValue(get, currValues)]) { auto currType = func->getLocalType(get->index); auto possibleType = func->getLocalType(i); if (possibleType != currType && Type::isSubType(possibleType, currType)) { // We found an improvement! get->index = i; get->type = possibleType; refinalize = true; } } } } } void remove(Expression** item) { auto* set = (*item)->cast(); auto* value = set->value; if (!set->isTee()) { auto* drop = ExpressionManipulator::convert(set); drop->value = value; drop->finalize(); } else { // If we are replacing the set with something of a more specific type, // then we need to refinalize, for example: // // (struct.get $X 0 // (local.tee $x // (..something of type $Y, a subtype of $X..) // ) // ) // // After the replacement the struct.get will read from $Y, whose field may // have a more refined type. if (value->type != set->type) { refinalize = true; } *item = value; } } // debugging void dump(BasicBlock* block) { std::cout << "====\n"; if (block) { std::cout << "block: " << block << '\n'; for (auto* out : block->out) { std::cout << " goes to " << out << '\n'; } } for (Index i = 0; i < block->contents.start.size(); i++) { std::cout << " start[" << i << "] = " << block->contents.start[i] << '\n'; } for (auto** item : block->contents.items) { std::cout << " " << *item << '\n'; } std::cout << "====\n"; } void dump(const char* desc, LocalValues& values) { std::cout << desc << ": "; for (auto x : values) { std::cout << x << ' '; } std::cout << '\n'; } }; } // namespace Pass* createRedundantSetEliminationPass() { return new RedundantSetElimination(); } } // namespace wasm