summaryrefslogtreecommitdiff
path: root/src/wasm/wasm-stack.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2023-09-18 21:54:14 -0700
committerGitHub <noreply@github.com>2023-09-18 21:54:14 -0700
commita51bd6df919a5b79574f0996a760cc20cb05697e (patch)
tree32f67c9568aaee16b67ce0a4e6df1df2fb487036 /src/wasm/wasm-stack.cpp
parent6bab227860a6873204141e5ec9b14ed08d77fc62 (diff)
downloadbinaryen-a51bd6df919a5b79574f0996a760cc20cb05697e.tar.gz
binaryen-a51bd6df919a5b79574f0996a760cc20cb05697e.tar.bz2
binaryen-a51bd6df919a5b79574f0996a760cc20cb05697e.zip
Reland "Optimize tuple.extract of gets in BinaryInstWriter" (#5955)
In general, the binary lowering of tuple.extract expects that all the tuple values are on top of the stack, so it inserts drops and possibly uses a scratch local to ensure only the extracted value is left. However, when the extracted tuple expression is a local.get, local.tee, or global.get, it's much more efficient to change the lowering of the get or tee to ensure that only the extracted value is on the stack to begin with. Implement that optimization in the binary writer.
Diffstat (limited to 'src/wasm/wasm-stack.cpp')
-rw-r--r--src/wasm/wasm-stack.cpp43
1 files changed, 42 insertions, 1 deletions
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index 2042189fd..1f1141821 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -87,6 +87,13 @@ void BinaryInstWriter::visitCallIndirect(CallIndirect* curr) {
}
void BinaryInstWriter::visitLocalGet(LocalGet* curr) {
+ if (auto it = extractedGets.find(curr); it != extractedGets.end()) {
+ // We have a tuple of locals to get, but we will only end up using one of
+ // them, so we can just emit that one.
+ o << int8_t(BinaryConsts::LocalGet)
+ << U32LEB(mappedLocals[std::make_pair(curr->index, it->second)]);
+ return;
+ }
size_t numValues = func->getLocalType(curr->index).size();
for (Index i = 0; i < numValues; ++i) {
o << int8_t(BinaryConsts::LocalGet)
@@ -96,14 +103,28 @@ void BinaryInstWriter::visitLocalGet(LocalGet* curr) {
void BinaryInstWriter::visitLocalSet(LocalSet* curr) {
size_t numValues = func->getLocalType(curr->index).size();
+ // If this is a tuple, set all the elements with nonzero index.
for (Index i = numValues - 1; i >= 1; --i) {
o << int8_t(BinaryConsts::LocalSet)
<< U32LEB(mappedLocals[std::make_pair(curr->index, i)]);
}
if (!curr->isTee()) {
+ // This is not a tee, so just finish setting the values.
o << int8_t(BinaryConsts::LocalSet)
<< U32LEB(mappedLocals[std::make_pair(curr->index, 0)]);
+ } else if (auto it = extractedGets.find(curr); it != extractedGets.end()) {
+ // We only need to get the single extracted value.
+ if (it->second == 0) {
+ o << int8_t(BinaryConsts::LocalTee)
+ << U32LEB(mappedLocals[std::make_pair(curr->index, 0)]);
+ } else {
+ o << int8_t(BinaryConsts::LocalSet)
+ << U32LEB(mappedLocals[std::make_pair(curr->index, 0)]);
+ o << int8_t(BinaryConsts::LocalGet)
+ << U32LEB(mappedLocals[std::make_pair(curr->index, it->second)]);
+ }
} else {
+ // We need to get all the values.
o << int8_t(BinaryConsts::LocalTee)
<< U32LEB(mappedLocals[std::make_pair(curr->index, 0)]);
for (Index i = 1; i < numValues; ++i) {
@@ -114,8 +135,14 @@ void BinaryInstWriter::visitLocalSet(LocalSet* curr) {
}
void BinaryInstWriter::visitGlobalGet(GlobalGet* curr) {
- // Emit a global.get for each element if this is a tuple global
Index index = parent.getGlobalIndex(curr->name);
+ if (auto it = extractedGets.find(curr); it != extractedGets.end()) {
+ // We have a tuple of globals to get, but we will only end up using one of
+ // them, so we can just emit that one.
+ o << int8_t(BinaryConsts::GlobalGet) << U32LEB(index + it->second);
+ return;
+ }
+ // Emit a global.get for each element if this is a tuple global
size_t numValues = curr->type.size();
for (Index i = 0; i < numValues; ++i) {
o << int8_t(BinaryConsts::GlobalGet) << U32LEB(index + i);
@@ -1975,6 +2002,10 @@ void BinaryInstWriter::visitTupleMake(TupleMake* curr) {
}
void BinaryInstWriter::visitTupleExtract(TupleExtract* curr) {
+ if (extractedGets.count(curr->tuple)) {
+ // We already have just the extracted value on the stack.
+ return;
+ }
size_t numVals = curr->tuple->type.size();
// Drop all values after the one we want
for (size_t i = curr->index + 1; i < numVals; ++i) {
@@ -2511,6 +2542,7 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() {
}
}
setScratchLocals();
+
o << U32LEB(numLocalsByType.size());
for (auto& localType : localTypes) {
o << U32LEB(numLocalsByType.at(localType));
@@ -2537,6 +2569,15 @@ void BinaryInstWriter::countScratchLocals() {
for (auto& [type, _] : scratchLocals) {
noteLocalType(type);
}
+ // While we have all the tuple.extracts, also find extracts of local.gets,
+ // local.tees, and global.gets that we can optimize.
+ for (auto* extract : extracts.list) {
+ auto* tuple = extract->tuple;
+ if (tuple->is<LocalGet>() || tuple->is<LocalSet>() ||
+ tuple->is<GlobalGet>()) {
+ extractedGets.insert({tuple, extract->index});
+ }
+ }
}
void BinaryInstWriter::setScratchLocals() {