diff options
-rw-r--r-- | src/wasm-stack.h | 9 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 118 | ||||
-rw-r--r-- | test/lit/binary/dwarf-multivalue.test | 11 | ||||
-rw-r--r-- | test/lit/multivalue.wast | 37 |
4 files changed, 92 insertions, 83 deletions
diff --git a/src/wasm-stack.h b/src/wasm-stack.h index ae453f9de..fc38d07a4 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -146,13 +146,14 @@ private: // type => number of locals of that type in the compact form std::unordered_map<Type, size_t> numLocalsByType; - void noteLocalType(Type type); + void noteLocalType(Type type, Index count = 1); // Keeps track of the binary index of the scratch locals used to lower - // tuple.extract. + // tuple.extract. If there are multiple scratch locals of the same type, they + // are contiguous and this map holds the index of the first. InsertOrderedMap<Type, Index> scratchLocals; - void countScratchLocals(); - void setScratchLocals(); + // Return the type and number of required scratch locals. + InsertOrderedMap<Type, Index> countScratchLocals(); // local.get, local.tee, and glboal.get expressions that will be followed by // tuple.extracts. We can optimize these by getting only the local for the diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 3cdd1f718..53a25d52b 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2569,6 +2569,8 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { mappedLocals[std::make_pair(i, 0)] = i; } + auto scratches = countScratchLocals(); + // Normally we map all locals of the same type into a range of adjacent // addresses, which is more compact. However, if we need to keep DWARF valid, // do not do any reordering at all - instead, do a trivial mapping that @@ -2584,25 +2586,26 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { for (Index i = func->getVarIndexBase(); i < func->getNumLocals(); i++) { size_t size = func->getLocalType(i).size(); for (Index j = 0; j < size; j++) { - mappedLocals[std::make_pair(i, j)] = mappedIndex + j; + mappedLocals[std::make_pair(i, j)] = mappedIndex++; } - mappedIndex += size; } - countScratchLocals(); size_t numBinaryLocals = - mappedIndex - func->getVarIndexBase() + scratchLocals.size(); + mappedIndex - func->getVarIndexBase() + scratches.size(); + o << U32LEB(numBinaryLocals); + for (Index i = func->getVarIndexBase(); i < func->getNumLocals(); i++) { for (const auto& type : func->getLocalType(i)) { o << U32LEB(1); parent.writeType(type); } } - for (auto& [type, _] : scratchLocals) { - o << U32LEB(1); + for (auto& [type, count] : scratches) { + o << U32LEB(count); parent.writeType(type); - scratchLocals[type] = mappedIndex++; + scratchLocals[type] = mappedIndex; + mappedIndex += count; } return; } @@ -2612,7 +2615,10 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { noteLocalType(t); } } - countScratchLocals(); + + for (auto& [type, count] : scratches) { + noteLocalType(type, count); + } if (parent.getModule()->features.hasReferenceTypes()) { // Sort local types in a way that keeps all MVP types together and all @@ -2636,23 +2642,28 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { }); } - std::unordered_map<Type, size_t> currLocalsByType; + // Map IR (local index, tuple index) pairs to binary local indices. Since + // locals are grouped by type, start by calculating the base indices for each + // type. + std::unordered_map<Type, Index> nextFreeIndex; + Index baseIndex = func->getVarIndexBase(); + for (auto& type : localTypes) { + nextFreeIndex[type] = baseIndex; + baseIndex += numLocalsByType[type]; + } + + // Map the IR index pairs to indices. for (Index i = func->getVarIndexBase(); i < func->getNumLocals(); i++) { Index j = 0; for (const auto& type : func->getLocalType(i)) { - auto fullIndex = std::make_pair(i, j++); - Index index = func->getVarIndexBase(); - for (auto& localType : localTypes) { - if (type == localType) { - mappedLocals[fullIndex] = index + currLocalsByType[localType]; - currLocalsByType[type]++; - break; - } - index += numLocalsByType.at(localType); - } + mappedLocals[{i, j++}] = nextFreeIndex[type]++; } } - setScratchLocals(); + + // Map scratch locals to the remaining indices. + for (auto& [type, _] : scratches) { + scratchLocals[type] = nextFreeIndex[type]; + } o << U32LEB(numLocalsByType.size()); for (auto& localType : localTypes) { @@ -2661,44 +2672,47 @@ void BinaryInstWriter::mapLocalsAndEmitHeader() { } } -void BinaryInstWriter::noteLocalType(Type type) { - if (!numLocalsByType.count(type)) { +void BinaryInstWriter::noteLocalType(Type type, Index count) { + auto& num = numLocalsByType[type]; + if (num == 0) { localTypes.push_back(type); } - numLocalsByType[type]++; + num += count; } -void BinaryInstWriter::countScratchLocals() { - // Add a scratch register in `numLocalsByType` for each type of - // tuple.extract with nonzero index present. - FindAll<TupleExtract> extracts(func->body); - for (auto* extract : extracts.list) { - if (extract->type != Type::unreachable && extract->index != 0) { - scratchLocals[extract->type] = 0; - } - } - for (auto& [type, _] : scratchLocals) { - noteLocalType(type); - } - // While we have all the tuple.extracts, also find extracts of local.gets, - // local.tees, and global.gets that we can optimize. - for (auto* extract : extracts.list) { - auto* tuple = extract->tuple; - if (tuple->is<LocalGet>() || tuple->is<LocalSet>() || - tuple->is<GlobalGet>()) { - extractedGets.insert({tuple, extract->index}); - } - } -} +InsertOrderedMap<Type, Index> BinaryInstWriter::countScratchLocals() { + struct ScratchLocalFinder : PostWalker<ScratchLocalFinder> { + BinaryInstWriter& parent; + InsertOrderedMap<Type, Index> scratches; -void BinaryInstWriter::setScratchLocals() { - Index index = func->getVarIndexBase(); - for (auto& localType : localTypes) { - index += numLocalsByType[localType]; - if (scratchLocals.find(localType) != scratchLocals.end()) { - scratchLocals[localType] = index - 1; + ScratchLocalFinder(BinaryInstWriter& parent) : parent(parent) {} + + void visitTupleExtract(TupleExtract* curr) { + if (curr->type == Type::unreachable) { + // We will not emit this instruction anyway. + return; + } + // Extracts from locals or globals are optimizable and do not require + // scratch locals. Record them. + auto* tuple = curr->tuple; + if (tuple->is<LocalGet>() || tuple->is<LocalSet>() || + tuple->is<GlobalGet>()) { + parent.extractedGets.insert({tuple, curr->index}); + return; + } + // Include a scratch register for each type of tuple.extract with nonzero + // index present. + if (curr->index != 0) { + auto& count = scratches[curr->type]; + count = std::max(count, 1u); + } } - } + }; + + ScratchLocalFinder finder(*this); + finder.walk(func->body); + + return std::move(finder.scratches); } void BinaryInstWriter::emitMemoryAccess(size_t alignment, diff --git a/test/lit/binary/dwarf-multivalue.test b/test/lit/binary/dwarf-multivalue.test index 435aebe25..c803dea14 100644 --- a/test/lit/binary/dwarf-multivalue.test +++ b/test/lit/binary/dwarf-multivalue.test @@ -75,12 +75,10 @@ ;; (local $11 i32) ;; Previous (local $11 (tuple i32 f32))'s first element ;; (local $12 f32) ;; Previous (local $11 (tuple i32 f32))'s second element ;; (local $13 i32) ;; Previous (local $12 i32) -;; (local $14 f32) ;; scratch local for f32 ;; We parse this binary again into Binaryen IR, roundtripping the original -;; binary. Here until (local $14) is the same with the previous list, and $15 is -;; is added for tuple parsing and $16 is added for stacky IR resolving during -;; binary reading process. +;; binary. Locals $14 and $15 are added for stacky IR resolving during binary +;; reading process. ;; RUN: wasm-opt -all -g --roundtrip %s.wasm -S -o - | filecheck %s --check-prefix=ROUNDTRIP ;; ROUNDTRIP: (func $test ;; ROUNDTRIP-NEXT: (local $0 i32) @@ -97,9 +95,8 @@ ;; ROUNDTRIP-NEXT: (local $11 i32) ;; ROUNDTRIP-NEXT: (local $12 f32) ;; ROUNDTRIP-NEXT: (local $13 i32) -;; ROUNDTRIP-NEXT: (local $14 f32) -;; ROUNDTRIP-NEXT: (local $15 (tuple i32 f32)) -;; ROUNDTRIP-NEXT: (local $16 i32) +;; ROUNDTRIP-NEXT: (local $14 (tuple i32 f32)) +;; ROUNDTRIP-NEXT: (local $15 i32) ;; We can see that we don't reorder the locals during the process and the ;; original list of locals, local $0~$10, is untouched, to NOT invalidate DWARF diff --git a/test/lit/multivalue.wast b/test/lit/multivalue.wast index af69c1366..0cfafb2a9 100644 --- a/test/lit/multivalue.wast +++ b/test/lit/multivalue.wast @@ -149,42 +149,40 @@ ;; CHECK: (func $reverse (type $4) (result f32 i64 i32) ;; CHECK-NEXT: (local $x i32) ;; CHECK-NEXT: (local $1 i64) - ;; CHECK-NEXT: (local $2 i64) - ;; CHECK-NEXT: (local $3 f32) - ;; CHECK-NEXT: (local $4 f32) - ;; CHECK-NEXT: (local $5 (tuple i32 i64 f32)) - ;; CHECK-NEXT: (local $6 i64) - ;; CHECK-NEXT: (local $7 i32) - ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (local $2 f32) + ;; CHECK-NEXT: (local $3 (tuple i32 i64 f32)) + ;; CHECK-NEXT: (local $4 i64) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (local.set $3 ;; CHECK-NEXT: (call $triple) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $x ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.set $5 ;; CHECK-NEXT: (tuple.extract 3 0 - ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (block (result i64) - ;; CHECK-NEXT: (local.set $6 + ;; CHECK-NEXT: (local.set $4 ;; CHECK-NEXT: (tuple.extract 3 1 - ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.set $2 ;; CHECK-NEXT: (tuple.extract 3 2 - ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: (local.get $4) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $7) + ;; CHECK-NEXT: (local.get $5) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (tuple.make 3 - ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) @@ -228,17 +226,16 @@ ;; Test multivalue globals ;; CHECK: (func $global (type $0) (result i32 i64) - ;; CHECK-NEXT: (local $0 i64) - ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (global.set $g1 ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (global.set $g2 ;; CHECK-NEXT: (i64.const 7) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop |