diff options
author | Thomas Lively <tlively@google.com> | 2024-05-13 17:10:23 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-13 17:10:23 -0700 |
commit | 020d08e01ff506099c8293e69cd03f5f75f562d9 (patch) | |
tree | c61320e25d5945e9cd56a69d1fb013f867898951 /src | |
parent | 924533fbcd0181f4460a13adc5762ee52f97de58 (diff) | |
download | binaryen-020d08e01ff506099c8293e69cd03f5f75f562d9.tar.gz binaryen-020d08e01ff506099c8293e69cd03f5f75f562d9.tar.bz2 binaryen-020d08e01ff506099c8293e69cd03f5f75f562d9.zip |
Simplify scratch local calculation (#6583)
Change `countScratchLocals` to return the count and type of necessary scratch
locals. It used to encode them as keys in the global map from scratch local
types to local indices, which could not handle having more than one scratch
local of a given type and was generally harder to reason about due to its use of
global state. Take the opportunity to avoid emitting unnecessary scratch locals
for `TupleExtract` expressions that will be optimized to not use them.
Also simplify and better document the calculation of the mapping from IR indices
to binary indices for all locals, scratch and non-scratch.
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm-stack.h | 9 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 118 |
2 files changed, 71 insertions, 56 deletions
diff --git a/src/wasm-stack.h b/src/wasm-stack.h index ae453f9de..fc38d07a4 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -146,13 +146,14 @@ private: // type => number of locals of that type in the compact form std::unordered_map<Type, size_t> numLocalsByType; - void noteLocalType(Type type); + void noteLocalType(Type type, Index count = 1); // Keeps track of the binary index of the scratch locals used to lower - // tuple.extract. + // tuple.extract. If there are multiple scratch locals of the same type, they + // are contiguous and this map holds the index of the first. InsertOrderedMap<Type, Index> scratchLocals; - void countScratchLocals(); - void setScratchLocals(); + // Return the type and number of required scratch locals. + InsertOrderedMap<Type, Index> countScratchLocals(); // local.get, local.tee, and glboal.get expressions that will be followed by // tuple.extracts. We can optimize these by getting only the local for the diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 3cdd1f718..53a25d52b 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2569,6 +2569,8 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { mappedLocals[std::make_pair(i, 0)] = i; } + auto scratches = countScratchLocals(); + // Normally we map all locals of the same type into a range of adjacent // addresses, which is more compact. However, if we need to keep DWARF valid, // do not do any reordering at all - instead, do a trivial mapping that @@ -2584,25 +2586,26 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { for (Index i = func->getVarIndexBase(); i < func->getNumLocals(); i++) { size_t size = func->getLocalType(i).size(); for (Index j = 0; j < size; j++) { - mappedLocals[std::make_pair(i, j)] = mappedIndex + j; + mappedLocals[std::make_pair(i, j)] = mappedIndex++; } - mappedIndex += size; } - countScratchLocals(); size_t numBinaryLocals = - mappedIndex - func->getVarIndexBase() + scratchLocals.size(); + mappedIndex - func->getVarIndexBase() + scratches.size(); + o << U32LEB(numBinaryLocals); + for (Index i = func->getVarIndexBase(); i < func->getNumLocals(); i++) { for (const auto& type : func->getLocalType(i)) { o << U32LEB(1); parent.writeType(type); } } - for (auto& [type, _] : scratchLocals) { - o << U32LEB(1); + for (auto& [type, count] : scratches) { + o << U32LEB(count); parent.writeType(type); - scratchLocals[type] = mappedIndex++; + scratchLocals[type] = mappedIndex; + mappedIndex += count; } return; } @@ -2612,7 +2615,10 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { noteLocalType(t); } } - countScratchLocals(); + + for (auto& [type, count] : scratches) { + noteLocalType(type, count); + } if (parent.getModule()->features.hasReferenceTypes()) { // Sort local types in a way that keeps all MVP types together and all @@ -2636,23 +2642,28 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { }); } - std::unordered_map<Type, size_t> currLocalsByType; + // Map IR (local index, tuple index) pairs to binary local indices. Since + // locals are grouped by type, start by calculating the base indices for each + // type. + std::unordered_map<Type, Index> nextFreeIndex; + Index baseIndex = func->getVarIndexBase(); + for (auto& type : localTypes) { + nextFreeIndex[type] = baseIndex; + baseIndex += numLocalsByType[type]; + } + + // Map the IR index pairs to indices. for (Index i = func->getVarIndexBase(); i < func->getNumLocals(); i++) { Index j = 0; for (const auto& type : func->getLocalType(i)) { - auto fullIndex = std::make_pair(i, j++); - Index index = func->getVarIndexBase(); - for (auto& localType : localTypes) { - if (type == localType) { - mappedLocals[fullIndex] = index + currLocalsByType[localType]; - currLocalsByType[type]++; - break; - } - index += numLocalsByType.at(localType); - } + mappedLocals[{i, j++}] = nextFreeIndex[type]++; } } - setScratchLocals(); + + // Map scratch locals to the remaining indices. + for (auto& [type, _] : scratches) { + scratchLocals[type] = nextFreeIndex[type]; + } o << U32LEB(numLocalsByType.size()); for (auto& localType : localTypes) { @@ -2661,44 +2672,47 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { } } -void BinaryInstWriter::noteLocalType(Type type) { - if (!numLocalsByType.count(type)) { +void BinaryInstWriter::noteLocalType(Type type, Index count) { + auto& num = numLocalsByType[type]; + if (num == 0) { localTypes.push_back(type); } - numLocalsByType[type]++; + num += count; } -void BinaryInstWriter::countScratchLocals() { - // Add a scratch register in `numLocalsByType` for each type of - // tuple.extract with nonzero index present. - FindAll<TupleExtract> extracts(func->body); - for (auto* extract : extracts.list) { - if (extract->type != Type::unreachable && extract->index != 0) { - scratchLocals[extract->type] = 0; - } - } - for (auto& [type, _] : scratchLocals) { - noteLocalType(type); - } - // While we have all the tuple.extracts, also find extracts of local.gets, - // local.tees, and global.gets that we can optimize. - for (auto* extract : extracts.list) { - auto* tuple = extract->tuple; - if (tuple->is<LocalGet>() || tuple->is<LocalSet>() || - tuple->is<GlobalGet>()) { - extractedGets.insert({tuple, extract->index}); - } - } -} +InsertOrderedMap<Type, Index> BinaryInstWriter::countScratchLocals() { + struct ScratchLocalFinder : PostWalker<ScratchLocalFinder> { + BinaryInstWriter& parent; + InsertOrderedMap<Type, Index> scratches; -void BinaryInstWriter::setScratchLocals() { - Index index = func->getVarIndexBase(); - for (auto& localType : localTypes) { - index += numLocalsByType[localType]; - if (scratchLocals.find(localType) != scratchLocals.end()) { - scratchLocals[localType] = index - 1; + ScratchLocalFinder(BinaryInstWriter& parent) : parent(parent) {} + + void visitTupleExtract(TupleExtract* curr) { + if (curr->type == Type::unreachable) { + // We will not emit this instruction anyway. + return; + } + // Extracts from locals or globals are optimizable and do not require + // scratch locals. Record them. + auto* tuple = curr->tuple; + if (tuple->is<LocalGet>() || tuple->is<LocalSet>() || + tuple->is<GlobalGet>()) { + parent.extractedGets.insert({tuple, curr->index}); + return; + } + // Include a scratch register for each type of tuple.extract with nonzero + // index present. + if (curr->index != 0) { + auto& count = scratches[curr->type]; + count = std::max(count, 1u); + } } - } + }; + + ScratchLocalFinder finder(*this); + finder.walk(func->body); + + return std::move(finder.scratches); } void BinaryInstWriter::emitMemoryAccess(size_t alignment, |