diff options
author | Thomas Lively <tlively@google.com> | 2023-09-18 21:54:14 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-18 21:54:14 -0700 |
commit | a51bd6df919a5b79574f0996a760cc20cb05697e (patch) | |
tree | 32f67c9568aaee16b67ce0a4e6df1df2fb487036 /src/wasm/wasm-stack.cpp | |
parent | 6bab227860a6873204141e5ec9b14ed08d77fc62 (diff) | |
download | binaryen-a51bd6df919a5b79574f0996a760cc20cb05697e.tar.gz binaryen-a51bd6df919a5b79574f0996a760cc20cb05697e.tar.bz2 binaryen-a51bd6df919a5b79574f0996a760cc20cb05697e.zip |
Reland "Optimize tuple.extract of gets in BinaryInstWriter" (#5955)
In general, the binary lowering of tuple.extract expects that all the tuple
values are on top of the stack, so it inserts drops and possibly uses a scratch
local to ensure only the extracted value is left. However, when the extracted
tuple expression is a local.get, local.tee, or global.get, it's much more
efficient to change the lowering of the get or tee to ensure that only the
extracted value is on the stack to begin with. Implement that optimization in
the binary writer.
Diffstat (limited to 'src/wasm/wasm-stack.cpp')
-rw-r--r-- | src/wasm/wasm-stack.cpp | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 2042189fd..1f1141821 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -87,6 +87,13 @@ void BinaryInstWriter::visitCallIndirect(CallIndirect* curr) { } void BinaryInstWriter::visitLocalGet(LocalGet* curr) { + if (auto it = extractedGets.find(curr); it != extractedGets.end()) { + // We have a tuple of locals to get, but we will only end up using one of + // them, so we can just emit that one. + o << int8_t(BinaryConsts::LocalGet) + << U32LEB(mappedLocals[std::make_pair(curr->index, it->second)]); + return; + } size_t numValues = func->getLocalType(curr->index).size(); for (Index i = 0; i < numValues; ++i) { o << int8_t(BinaryConsts::LocalGet) @@ -96,14 +103,28 @@ void BinaryInstWriter::visitLocalGet(LocalGet* curr) { void BinaryInstWriter::visitLocalSet(LocalSet* curr) { size_t numValues = func->getLocalType(curr->index).size(); + // If this is a tuple, set all the elements with nonzero index. for (Index i = numValues - 1; i >= 1; --i) { o << int8_t(BinaryConsts::LocalSet) << U32LEB(mappedLocals[std::make_pair(curr->index, i)]); } if (!curr->isTee()) { + // This is not a tee, so just finish setting the values. o << int8_t(BinaryConsts::LocalSet) << U32LEB(mappedLocals[std::make_pair(curr->index, 0)]); + } else if (auto it = extractedGets.find(curr); it != extractedGets.end()) { + // We only need to get the single extracted value. + if (it->second == 0) { + o << int8_t(BinaryConsts::LocalTee) + << U32LEB(mappedLocals[std::make_pair(curr->index, 0)]); + } else { + o << int8_t(BinaryConsts::LocalSet) + << U32LEB(mappedLocals[std::make_pair(curr->index, 0)]); + o << int8_t(BinaryConsts::LocalGet) + << U32LEB(mappedLocals[std::make_pair(curr->index, it->second)]); + } } else { + // We need to get all the values. o << int8_t(BinaryConsts::LocalTee) << U32LEB(mappedLocals[std::make_pair(curr->index, 0)]); for (Index i = 1; i < numValues; ++i) { @@ -114,8 +135,14 @@ void BinaryInstWriter::visitLocalSet(LocalSet* curr) { } void BinaryInstWriter::visitGlobalGet(GlobalGet* curr) { - // Emit a global.get for each element if this is a tuple global Index index = parent.getGlobalIndex(curr->name); + if (auto it = extractedGets.find(curr); it != extractedGets.end()) { + // We have a tuple of globals to get, but we will only end up using one of + // them, so we can just emit that one. + o << int8_t(BinaryConsts::GlobalGet) << U32LEB(index + it->second); + return; + } + // Emit a global.get for each element if this is a tuple global size_t numValues = curr->type.size(); for (Index i = 0; i < numValues; ++i) { o << int8_t(BinaryConsts::GlobalGet) << U32LEB(index + i); @@ -1975,6 +2002,10 @@ void BinaryInstWriter::visitTupleMake(TupleMake* curr) { } void BinaryInstWriter::visitTupleExtract(TupleExtract* curr) { + if (extractedGets.count(curr->tuple)) { + // We already have just the extracted value on the stack. + return; + } size_t numVals = curr->tuple->type.size(); // Drop all values after the one we want for (size_t i = curr->index + 1; i < numVals; ++i) { @@ -2511,6 +2542,7 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { } } setScratchLocals(); + o << U32LEB(numLocalsByType.size()); for (auto& localType : localTypes) { o << U32LEB(numLocalsByType.at(localType)); @@ -2537,6 +2569,15 @@ void BinaryInstWriter::countScratchLocals() { for (auto& [type, _] : scratchLocals) { noteLocalType(type); } + // While we have all the tuple.extracts, also find extracts of local.gets, + // local.tees, and global.gets that we can optimize. + for (auto* extract : extracts.list) { + auto* tuple = extract->tuple; + if (tuple->is<LocalGet>() || tuple->is<LocalSet>() || + tuple->is<GlobalGet>()) { + extractedGets.insert({tuple, extract->index}); + } + } } void BinaryInstWriter::setScratchLocals() { |