diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg/cfg-traversal.h | 38 | ||||
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 428 | ||||
-rw-r--r-- | src/support/timing.h | 55 |
3 files changed, 314 insertions, 207 deletions
diff --git a/src/cfg/cfg-traversal.h b/src/cfg/cfg-traversal.h index b862c9403..74afc7525 100644 --- a/src/cfg/cfg-traversal.h +++ b/src/cfg/cfg-traversal.h @@ -289,15 +289,20 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> { std::map<BasicBlock*, size_t> debugIds; - void dumpCFG(std::string message, Function* func) { - std::cout << "<==\nCFG [" << message << "]:\n"; + void generateDebugIds() { + if (debugIds.size() > 0) return; for (auto& block : basicBlocks) { debugIds[block.get()] = debugIds.size(); } + } + + void dumpCFG(std::string message) { + std::cout << "<==\nCFG [" << message << "]:\n"; + generateDebugIds(); for (auto& block : basicBlocks) { assert(debugIds.count(block.get()) > 0); std::cout << " block " << debugIds[block.get()] << ":\n"; - block->contents.dump(func); + block->contents.dump(static_cast<SubType*>(this)->getFunction()); for (auto& in : block->in) { assert(debugIds.count(in) > 0); assert(std::find(in->out.begin(), in->out.end(), block.get()) != in->out.end()); // must be a parallel link back @@ -307,9 +312,36 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> { std::cout << " out: " << debugIds[out] << "\n"; assert(std::find(out->in.begin(), out->in.end(), block.get()) != out->in.end()); // must be a parallel link back } + checkDuplicates(block->in); + checkDuplicates(block->out); } std::cout << "==>\n"; } + +private: + // links in out and in must be unique + void checkDuplicates(std::vector<BasicBlock*>& list) { + std::unordered_set<BasicBlock*> seen; + for (auto* curr : list) { + assert(seen.count(curr) == 0); + seen.insert(curr); + } + } + + void removeLink(std::vector<BasicBlock*>& list, BasicBlock* toRemove) { + if (list.size() == 1) { + list.clear(); + return; + } + for (size_t i = 0; i < list.size(); i++) { + if (list[i] == toRemove) { + list[i] = list.back(); + list.pop_back(); + return; + } + } + WASM_UNREACHABLE(); + } }; } // namespace wasm diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index 5deb81c45..05756556b 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -29,6 +29,9 @@ #include "pass.h" #include "ast_utils.h" #include "cfg/cfg-traversal.h" +#ifdef CFG_PROFILE +#include "support/timing.h" +#endif namespace wasm { @@ -172,244 +175,261 @@ struct CoalesceLocals : public WalkerPass<CFGWalker<CoalesceLocals, Visitor<Coal // main entry point - void walk(Expression*& root) { - numLocals = getFunction()->getNumLocals(); - // collect initial liveness info - WalkerPass<CFGWalker<CoalesceLocals, Visitor<CoalesceLocals>, Liveness>>::walk(root); - // ignore links to dead blocks, so they don't confuse us and we can see their stores are all ineffective - liveBlocks = findLiveBlocks(); - unlinkDeadBlocks(liveBlocks); -#ifdef CFG_DEBUG - dumpCFG("the cfg", getFunction()); -#endif - // flow liveness across blocks - flowLiveness(); - // pick new indices - auto indices = pickIndices(); - // apply indices - applyIndices(indices, root); - } + void walk(Expression*& root); + + void flowLiveness(); + + // merge starts of a list of blocks, adding new interferences as necessary. return + // whether anything changed vs an old state (which indicates further processing is necessary). + bool mergeStartsAndCheckChange(std::vector<BasicBlock*>& blocks, LocalSet& old, LocalSet& ret); - // inteference state + void scanLivenessThroughActions(std::vector<Action>& actions, LocalSet& live); - std::vector<bool> interferences; + std::vector<Index> pickIndices(); // returns a vector of oldIndex => newIndex + + void applyIndices(std::vector<Index>& indices, Expression* root); + + // interference state + + std::vector<bool> interferences; // canonicalized - accesses should check (low, high) std::unordered_set<BasicBlock*> liveBlocks; void interfere(Index i, Index j) { if (i == j) return; -#ifdef CFG_DEBUG - if (!interferences[i + j * numLocals]) { - std::cout << getFunction()->name << ": interfere " << getFunction()->getLocalName(std::min(i, j)) << " : " << getFunction()->getLocalName(std::max(i, j)) << "\n"; - } -#endif - interferences[i + j * numLocals] = 1; - interferences[j + i * numLocals] = 1; + interferences[std::min(i, j) * numLocals + std::max(i, j)] = 1; } - void flowLiveness() { - interferences.resize(numLocals * numLocals); - std::fill(interferences.begin(), interferences.end(), 0); - // keep working while stuff is flowing - std::vector<BasicBlock*> queue; // TODO set to avoid inserting same block more than once at a time period. TODO optimize, an order might be more efficient - for (auto& curr : basicBlocks) { - if (liveBlocks.count(curr.get()) == 0) continue; // ignore dead blocks - queue.push_back(curr.get()); - // do the first scan through the block, starting with nothing live at the end, and updating the liveness at the start - scanLivenessThroughActions(curr->contents.actions, curr->contents.start); - } - // at every point in time, we assume we already noted interferences between things already known alive at the end, and scanned back throught the block using that - while (queue.size() > 0) { - auto* curr = queue.back(); // TODO: order? - queue.pop_back(); - LocalSet live; - if (!mergeStartsAndCheckChange(curr->out, curr->contents.end, live)) continue; + void interfereLowHigh(Index low, Index high) { // optimized version where you know that low < high + assert(low < high); + interferences[low * numLocals + high] = 1; + } + + bool interferes(Index i, Index j) { + return interferences[std::min(i, j) * numLocals + std::max(i, j)]; + } +}; + +void CoalesceLocals::walk(Expression*& root) { + numLocals = getFunction()->getNumLocals(); + // collect initial liveness info + WalkerPass<CFGWalker<CoalesceLocals, Visitor<CoalesceLocals>, Liveness>>::walk(root); + // ignore links to dead blocks, so they don't confuse us and we can see their stores are all ineffective + liveBlocks = findLiveBlocks(); + unlinkDeadBlocks(liveBlocks); #ifdef CFG_DEBUG - std::cout << "change noticed at end of " << debugIds[curr] << " from " << curr->contents.end.size() << " to " << live.size() << " (out of " << numLocals << ")\n"; + dumpCFG("the cfg"); +#endif + // flow liveness across blocks +#ifdef CFG_PROFILE + static Timer timer("flow"); + timer.start(); +#endif + flowLiveness(); +#ifdef CFG_PROFILE + timer.stop(); + timer.dump(); #endif - assert(curr->contents.end.size() < live.size()); - curr->contents.end = live; - scanLivenessThroughActions(curr->contents.actions, live); - // liveness is now calculated at the start. if something - // changed, all predecessor blocks need recomputation - if (curr->contents.start == live) continue; + // pick new indices + auto indices = pickIndices(); + // apply indices + applyIndices(indices, root); +} + +void CoalesceLocals::flowLiveness() { + interferences.resize(numLocals * numLocals); + std::fill(interferences.begin(), interferences.end(), 0); + // keep working while stuff is flowing + std::unordered_set<BasicBlock*> queue; + for (auto& curr : basicBlocks) { + if (liveBlocks.count(curr.get()) == 0) continue; // ignore dead blocks + queue.insert(curr.get()); + // do the first scan through the block, starting with nothing live at the end, and updating the liveness at the start + scanLivenessThroughActions(curr->contents.actions, curr->contents.start); + } + // at every point in time, we assume we already noted interferences between things already known alive at the end, and scanned back throught the block using that + while (queue.size() > 0) { + auto iter = queue.begin(); + auto* curr = *iter; + queue.erase(iter); + LocalSet live; + if (!mergeStartsAndCheckChange(curr->out, curr->contents.end, live)) continue; #ifdef CFG_DEBUG - std::cout << "change noticed at start of " << debugIds[curr] << " from " << curr->contents.start.size() << " to " << live.size() << ", more work to do\n"; + std::cout << "change noticed at end of " << debugIds[curr] << " from " << curr->contents.end.size() << " to " << live.size() << " (out of " << numLocals << ")\n"; #endif - assert(curr->contents.start.size() < live.size()); - curr->contents.start = live; - for (auto* in : curr->in) { - queue.push_back(in); -#ifdef CFG_DEBUG_ORDER - // alternative code for changing the flow order; results must be the same, as we detect a property of the graph. - if (queue.empty()) { - queue.push_back(in); - } else { - //abort(); - auto* front = queue[0]; - queue[0] = in; - queue.push_back(front); - } + assert(curr->contents.end.size() < live.size()); + curr->contents.end = live; + scanLivenessThroughActions(curr->contents.actions, live); + // liveness is now calculated at the start. if something + // changed, all predecessor blocks need recomputation + if (curr->contents.start == live) continue; +#ifdef CFG_DEBUG + std::cout << "change noticed at start of " << debugIds[curr] << " from " << curr->contents.start.size() << " to " << live.size() << ", more work to do\n"; #endif - } + assert(curr->contents.start.size() < live.size()); + curr->contents.start = live; + for (auto* in : curr->in) { + queue.insert(in); } - // live locals at the entry block include params, obviously, but also - // vars, in which case the 0-init value is actually used. + } + // live locals at the entry block include params, obviously, but also + // vars, in which case the 0-init value is actually used. #ifdef CFG_DEBUG - std::hash<std::vector<bool>> hasher; - std::cout << getFunction()->name << ": interference hash: " << hasher(*(std::vector<bool>*)&interferences) << "\n"; - for (size_t i = 0; i < numLocals; i++) { - std::cout << "int for " << getFunction()->getLocalName(i) << " [" << i << "]: "; - for (size_t j = 0; j < numLocals; j++) { - assert(interferences[i * numLocals + j] == interferences[j * numLocals + i]); - if (interferences[i * numLocals + j]) std::cout << getFunction()->getLocalName(j) << " "; - } - std::cout << "\n"; + std::hash<std::vector<bool>> hasher; + std::cout << getFunction()->name << ": interference hash: " << hasher(*(std::vector<bool>*)&interferences) << "\n"; + for (size_t i = 0; i < numLocals; i++) { + std::cout << "int for " << getFunction()->getLocalName(i) << " [" << i << "]: "; + for (size_t j = 0; j < numLocals; j++) { + if (interferes(i, j)) std::cout << getFunction()->getLocalName(j) << " "; } + std::cout << "\n"; + } #endif +} + +// merge starts of a list of blocks, adding new interferences as necessary. return +// whether anything changed vs an old state (which indicates further processing is necessary). +bool CoalesceLocals::mergeStartsAndCheckChange(std::vector<BasicBlock*>& blocks, LocalSet& old, LocalSet& ret) { + // merge all the live locals, and add interferences that show up from the merging. + // we can assume that locals live together already are already known to be in conflict. + if (blocks.size() == 0) return false; + ret = blocks[0]->contents.start; + if (blocks.size() == 1) { + // new interferences are impossible, they would have already been in conflict in the single predecessor. + return old != ret; } - - // merge starts of a list of blocks, adding new interferences as necessary. return - // whether anything changed vs an old state (which indicates further processing is necessary). - bool mergeStartsAndCheckChange(std::vector<BasicBlock*>& blocks, LocalSet& old, LocalSet& ret) { - // merge all the live locals, and add interferences that show up from the merging. - // we can assume that locals live together already are already known to be in conflict. - if (blocks.size() == 0) return false; - ret = blocks[0]->contents.start; - if (blocks.size() == 1) { - // new interferences are impossible, they would have already been in conflict in the single predecessor. - return old != ret; - } - // more than one, so we must merge - for (size_t i = 1; i < blocks.size(); i++) { - ret = ret.merge(blocks[i]->contents.start); + // more than one, so we must merge + for (size_t i = 1; i < blocks.size(); i++) { + ret = ret.merge(blocks[i]->contents.start); + } + // If there is no change in the merged result, then no new conflicts are possible. + // Assume the opposite, that we would be missing a conflict between x and y. Then + // since the merged result has not changed, x and y were present before as well. + // If they were present in the same origin block, then they were already in + // conflict there, and it is not a new conflict. If not, then they each arrive + // from different origins, with x arriving from X = { b_0..b_i } and y arriving from + // Y = { b_i+1..b_j }, where all those blocks are unique (since x and y never arrive from + // the same block, by assumption). Livenesses are monotonic, i.e., flowing only adds + // locals, never removes, so compared to the past state, we only added to X and Y. + // Mark the past arrivals X' and Y', and note that those two cannot be empty, since + // by assumption the merged result has not changed - x and y were already present + // before. But that means that x and y were already in conflict in the past, so + // this is not a new conflict. + if (ret == old) return false; + // add conflicts + size_t size = ret.size(); + for (size_t i = 0; i < size; i++) { + for (size_t j = i + 1; j < size; j++) { + interfereLowHigh(ret[i], ret[j]); } - // If there is no change in the merged result, then no new conflicts are possible. - // Assume the opposite, that we would be missing a conflict between x and y. Then - // since the merged result has not changed, x and y were present before as well. - // If they were present in the same origin block, then they were already in - // conflict there, and it is not a new conflict. If not, then they each arrive - // from different origins, with x arriving from X = { b_0..b_i } and y arriving from - // Y = { b_i+1..b_j }, where all those blocks are unique (since x and y never arrive from - // the same block, by assumption). Livenesses are monotonic, i.e., flowing only adds - // locals, never removes, so compared to the past state, we only added to X and Y. - // Mark the past arrivals X' and Y', and note that those two cannot be empty, since - // by assumption the merged result has not changed - x and y were already present - // before. But that means that x and y were already in conflict in the past, so - // this is not a new conflict. - if (ret == old) return false; - // add conflicts - for (auto i : ret) { - for (auto j : ret) { - interfere(i, j); + } + return true; +} + +void CoalesceLocals::scanLivenessThroughActions(std::vector<Action>& actions, LocalSet& live) { + // move towards the front + for (int i = int(actions.size()) - 1; i >= 0; i--) { + auto& action = actions[i]; + if (action.isGet()) { + // new live local, interferes with all the rest + for (auto i : live) { + interfere(i, action.index); + } + live.insert(action.index); + assert(live.size() <= numLocals); + } else { + if (live.erase(action.index)) { + action.effective = true; } } - return true; } - - void scanLivenessThroughActions(std::vector<Action>& actions, LocalSet& live) { - // move towards the front - for (int i = int(actions.size()) - 1; i >= 0; i--) { - auto& action = actions[i]; - if (action.isGet()) { - // new live local, interferes with all the rest - for (auto i : live) { - interfere(i, action.index); - } - live.insert(action.index); - assert(live.size() <= numLocals); - } else { - if (live.erase(action.index)) { - action.effective = true; - } - } +} + +// Indices decision making + +std::vector<Index> CoalesceLocals::pickIndices() { // returns a vector of oldIndex => newIndex + // simple greedy coloring + // TODO: multiple orderings + // TODO: take into account eliminated copies + // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip) + std::vector<Index> indices; // old => new + std::vector<WasmType> types; + std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it + indices.resize(numLocals); + types.resize(numLocals); + newInterferences.resize(numLocals * numLocals); + std::fill(newInterferences.begin(), newInterferences.end(), 0); + Index nextFree = 0; + // we can't reorder parameters, they are fixed in order, and cannot coalesce + auto numParams = getFunction()->getNumParams(); + Index i = 0; + for (; i < numParams; i++) { + indices[i] = i; + types[i] = getFunction()->getLocalType(i); + for (size_t j = 0; j < numLocals; j++) { + newInterferences[numLocals * i + j] = interferes(i, j); } + nextFree++; } - - // Indices decision making - - std::vector<Index> pickIndices() { // returns a vector of oldIndex => newIndex - // simple greedy coloring - // TODO: multiple orderings - // TODO: take into account eliminated copies - // TODO: take into account distribution (99-1 is better than 50-50 with two registers, for gzip) - std::vector<Index> indices; // old => new - std::vector<WasmType> types; - std::vector<bool> newInterferences; // new index * numLocals => list of all interferences of locals merged to it - indices.resize(numLocals); - types.resize(numLocals); - newInterferences.resize(numLocals * numLocals); - std::fill(newInterferences.begin(), newInterferences.end(), 0); - Index nextFree = 0; - // we can't reorder parameters, they are fixed in order, and cannot coalesce - auto numParams = getFunction()->getNumParams(); - Index i = 0; - for (; i < numParams; i++) { - indices[i] = i; - types[i] = getFunction()->getLocalType(i); - std::copy(interferences.begin() + numLocals * i, interferences.begin() + numLocals * (i + 1), newInterferences.begin() + numLocals * i); + for (; i < numLocals; i++) { + Index found = -1; + for (Index j = 0; j < nextFree; j++) { + if (!newInterferences[j * numLocals + i] && getFunction()->getLocalType(i) == types[j]) { + indices[i] = found = j; + break; + } + } + if (found == Index(-1)) { + indices[i] = found = nextFree; + types[found] = getFunction()->getLocalType(i); nextFree++; } - for (; i < numLocals; i++) { - Index found = -1; - for (Index j = 0; j < nextFree; j++) { - if (!newInterferences[j * numLocals + i] && getFunction()->getLocalType(i) == types[j]) { - indices[i] = found = j; - break; - } - } - if (found == Index(-1)) { - indices[i] = found = nextFree; - types[found] = getFunction()->getLocalType(i); - nextFree++; - } - // merge new interferences for the new index - for (size_t j = 0; j < numLocals; j++) { - newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferences[i * numLocals + j]; - } + // merge new interferences for the new index + for (size_t j = 0; j < numLocals; j++) { + newInterferences[found * numLocals + j] = newInterferences[found * numLocals + j] | interferes(i, j); } - return indices; } + return indices; +} - void applyIndices(std::vector<Index>& indices, Expression* root) { - assert(indices.size() == numLocals); - for (auto& curr : basicBlocks) { - auto& actions = curr->contents.actions; - for (auto& action : actions) { - if (action.isGet()) { - auto* get = (*action.origin)->cast<GetLocal>(); - get->index = indices[get->index]; - } else { - auto* set = (*action.origin)->cast<SetLocal>(); - set->index = indices[set->index]; - // in addition, we can optimize out redundant copies and ineffective sets - GetLocal* get; - if ((get = set->value->dynCast<GetLocal>()) && get->index == set->index) { - *action.origin = get; // further optimizations may get rid of the get, if this is in a place where the output does not matter - } else if (!action.effective) { - *action.origin = set->value; // value may have no side effects, further optimizations can eliminate it - } +void CoalesceLocals::applyIndices(std::vector<Index>& indices, Expression* root) { + assert(indices.size() == numLocals); + for (auto& curr : basicBlocks) { + auto& actions = curr->contents.actions; + for (auto& action : actions) { + if (action.isGet()) { + auto* get = (*action.origin)->cast<GetLocal>(); + get->index = indices[get->index]; + } else { + auto* set = (*action.origin)->cast<SetLocal>(); + set->index = indices[set->index]; + // in addition, we can optimize out redundant copies and ineffective sets + GetLocal* get; + if ((get = set->value->dynCast<GetLocal>()) && get->index == set->index) { + *action.origin = get; // further optimizations may get rid of the get, if this is in a place where the output does not matter + } else if (!action.effective) { + *action.origin = set->value; // value may have no side effects, further optimizations can eliminate it } } } - // update type list - auto numParams = getFunction()->getNumParams(); - Index newNumLocals = 0; - for (auto index : indices) { - newNumLocals = std::max(newNumLocals, index + 1); - } - auto oldVars = getFunction()->vars; - getFunction()->vars.resize(newNumLocals - numParams); - for (size_t index = numParams; index < numLocals; index++) { - Index newIndex = indices[index]; - if (newIndex >= numParams) { - getFunction()->vars[newIndex - numParams] = oldVars[index - numParams]; - } + } + // update type list + auto numParams = getFunction()->getNumParams(); + Index newNumLocals = 0; + for (auto index : indices) { + newNumLocals = std::max(newNumLocals, index + 1); + } + auto oldVars = getFunction()->vars; + getFunction()->vars.resize(newNumLocals - numParams); + for (size_t index = numParams; index < numLocals; index++) { + Index newIndex = indices[index]; + if (newIndex >= numParams) { + getFunction()->vars[newIndex - numParams] = oldVars[index - numParams]; } - // names are gone - getFunction()->localNames.clear(); - getFunction()->localIndices.clear(); } -}; + // names are gone + getFunction()->localNames.clear(); + getFunction()->localIndices.clear(); +} static RegisterPass<CoalesceLocals> registerPass("coalesce-locals", "reduce # of locals by coalescing"); diff --git a/src/support/timing.h b/src/support/timing.h new file mode 100644 index 000000000..e8b48ec5f --- /dev/null +++ b/src/support/timing.h @@ -0,0 +1,55 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Timing helper +// + +#ifndef wasm_support_timing_h +#define wasm_support_timing_h + +#include <chrono> + +namespace wasm { + +class Timer { + std::string name; + std::chrono::high_resolution_clock::time_point startTime; + double total = 0; + +public: + Timer(std::string name) : name(name) {} + + void start() { + startTime = std::chrono::high_resolution_clock::now(); + } + + void stop() { + total += std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - startTime).count(); + } + + double getTotal() { + return total; + } + + void dump() { + std::cerr << "<Timer " << name << ": " << getTotal() << ">\n"; + } +}; + +} // namespace wasm + +#endif // wasm_support_timing_h |