diff options
-rw-r--r-- | src/passes/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/passes/ReorderGlobals.cpp | 164 | ||||
-rw-r--r-- | src/passes/pass.cpp | 9 | ||||
-rw-r--r-- | src/passes/passes.h | 2 | ||||
-rw-r--r-- | test/lit/help/wasm-opt.test | 3 | ||||
-rw-r--r-- | test/lit/help/wasm2js.test | 3 | ||||
-rw-r--r-- | test/lit/passes/reorder-globals.wast | 294 |
7 files changed, 477 insertions, 1 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index cedb7d764..cb180179c 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -91,8 +91,9 @@ set(passes_SOURCES RemoveUnusedBrs.cpp RemoveUnusedNames.cpp RemoveUnusedModuleElements.cpp - ReorderLocals.cpp ReorderFunctions.cpp + ReorderGlobals.cpp + ReorderLocals.cpp ReReloop.cpp TrapMode.cpp TypeRefining.cpp diff --git a/src/passes/ReorderGlobals.cpp b/src/passes/ReorderGlobals.cpp new file mode 100644 index 000000000..d1c26b2cc --- /dev/null +++ b/src/passes/ReorderGlobals.cpp @@ -0,0 +1,164 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Sorts globals by their static use count. This helps reduce the size of wasm +// binaries because fewer bytes are needed to encode references to frequently +// used globals. +// + +#include "memory" + +#include "ir/find_all.h" +#include "pass.h" +#include "support/topological_sort.h" +#include "wasm.h" + +namespace wasm { + +using NameCountMap = std::unordered_map<Name, std::atomic<Index>>; + +struct UseCountScanner : public WalkerPass<PostWalker<UseCountScanner>> { + bool isFunctionParallel() override { return true; } + + bool modifiesBinaryenIR() override { return false; } + + UseCountScanner(NameCountMap& counts) : counts(counts) {} + + std::unique_ptr<Pass> create() override { + return std::make_unique<UseCountScanner>(counts); + } + + void visitGlobalGet(GlobalGet* curr) { + // We can't add a new element to the map in parallel. + assert(counts.count(curr->name) > 0); + counts[curr->name]++; + } + void visitGlobalSet(GlobalSet* curr) { + assert(counts.count(curr->name) > 0); + counts[curr->name]++; + } + +private: + NameCountMap& counts; +}; + +struct ReorderGlobals : public Pass { + // Whether to always reorder globals, even if there are very few and the + // benefit is minor. That is useful for testing. + bool always; + + ReorderGlobals(bool always) : always(always) {} + + void run(Module* module) override { + if (module->globals.size() < 128 && !always) { + // The module has so few globals that they all fit in a single-byte U32LEB + // value, so no reordering we can do can actually decrease code size. Note + // that this is the common case with wasm MVP modules where the only + // globals are typically the stack pointer and perhaps a handful of others + // (however, features like wasm GC there may be a great many globals). + return; + } + + NameCountMap counts; + // Fill in info, as we'll operate on it in parallel. + for (auto& global : module->globals) { + counts[global->name]; + } + + // Count uses. + UseCountScanner scanner(counts); + scanner.run(getPassRunner(), module); + scanner.runOnModuleCode(getPassRunner(), module); + + // Do a toplogical sort to ensure we keep dependencies before the things + // that need them. For example, if $b's definition depends on $a then $b + // must appear later: + // + // (global $a i32 (i32.const 10)) + // (global $b i32 (global.get $a)) ;; $b depends on $a + // + struct DependencySort : TopologicalSort<Name, DependencySort> { + Module& wasm; + const NameCountMap& counts; + + std::unordered_map<Name, std::vector<Name>> deps; + + DependencySort(Module& wasm, const NameCountMap& counts) + : wasm(wasm), counts(counts) { + // Sort a list of global names by their counts. + auto sort = [&](std::vector<Name>& globals) { + std::stable_sort( + globals.begin(), globals.end(), [&](const Name& a, const Name& b) { + return counts.at(a) < counts.at(b); + }); + }; + + // Sort the globals. + std::vector<Name> sortedNames; + for (auto& global : wasm.globals) { + sortedNames.push_back(global->name); + } + sort(sortedNames); + + // Everything is a root (we need to emit all globals). + for (auto global : sortedNames) { + push(global); + } + + // The dependencies are the globals referred to. + for (auto& global : wasm.globals) { + if (global->imported()) { + continue; + } + std::vector<Name> vec; + for (auto* get : FindAll<GlobalGet>(global->init).list) { + vec.push_back(get->name); + } + sort(vec); + deps[global->name] = std::move(vec); + } + } + + void pushPredecessors(Name global) { + for (auto pred : deps[global]) { + push(pred); + } + } + }; + + std::unordered_map<Name, Index> sortedIndexes; + for (auto global : DependencySort(*module, counts)) { + auto index = sortedIndexes.size(); + sortedIndexes[global] = index; + } + + std::sort( + module->globals.begin(), + module->globals.end(), + [&](const std::unique_ptr<Global>& a, const std::unique_ptr<Global>& b) { + return sortedIndexes[a->name] < sortedIndexes[b->name]; + }); + + module->updateMaps(); + } +}; + +Pass* createReorderGlobalsPass() { return new ReorderGlobals(false); } + +Pass* createReorderGlobalsAlwaysPass() { return new ReorderGlobals(true); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index c6d94ddf7..7190201a1 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -359,6 +359,12 @@ void PassRegistry::registerPasses() { registerPass("reorder-functions", "sorts functions by access frequency", createReorderFunctionsPass); + registerPass("reorder-globals", + "sorts globals by access frequency", + createReorderGlobalsPass); + registerTestPass("reorder-globals-always", + "sorts globals by access frequency (even if there are few)", + createReorderGlobalsAlwaysPass); registerPass("reorder-locals", "sorts locals by access frequency", createReorderLocalsPass); @@ -621,6 +627,9 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { addIfNoDWARFIssues("simplify-globals"); } addIfNoDWARFIssues("remove-unused-module-elements"); + if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { + addIfNoDWARFIssues("reorder-globals"); + } // may allow more inlining/dae/etc., need --converge for that addIfNoDWARFIssues("directize"); // perform Stack IR optimizations here, at the very end of the diff --git a/src/passes/passes.h b/src/passes/passes.h index 20dfe0249..12e8b77c7 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -113,6 +113,8 @@ Pass* createRemoveUnusedModuleElementsPass(); Pass* createRemoveUnusedNonFunctionModuleElementsPass(); Pass* createRemoveUnusedNamesPass(); Pass* createReorderFunctionsPass(); +Pass* createReorderGlobalsPass(); +Pass* createReorderGlobalsAlwaysPass(); Pass* createReorderLocalsPass(); Pass* createReReloopPass(); Pass* createRedundantSetEliminationPass(); diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 7ee35ab1a..281411cca 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -355,6 +355,9 @@ ;; CHECK-NEXT: --reorder-functions sorts functions by access ;; CHECK-NEXT: frequency ;; CHECK-NEXT: +;; CHECK-NEXT: --reorder-globals sorts globals by access +;; CHECK-NEXT: frequency +;; CHECK-NEXT: ;; CHECK-NEXT: --reorder-locals sorts locals by access frequency ;; CHECK-NEXT: ;; CHECK-NEXT: --rereloop re-optimize control flow using diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 0d61348b8..f190c8fed 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -314,6 +314,9 @@ ;; CHECK-NEXT: --reorder-functions sorts functions by access ;; CHECK-NEXT: frequency ;; CHECK-NEXT: +;; CHECK-NEXT: --reorder-globals sorts globals by access +;; CHECK-NEXT: frequency +;; CHECK-NEXT: ;; CHECK-NEXT: --reorder-locals sorts locals by access frequency ;; CHECK-NEXT: ;; CHECK-NEXT: --rereloop re-optimize control flow using diff --git a/test/lit/passes/reorder-globals.wast b/test/lit/passes/reorder-globals.wast new file mode 100644 index 000000000..bdfd03d8c --- /dev/null +++ b/test/lit/passes/reorder-globals.wast @@ -0,0 +1,294 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: foreach %s %t wasm-opt --reorder-globals-always -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --reorder-globals-always --roundtrip -S -o - | filecheck %s + +;; Also check roundtripping here, so verify we don't end up emitting invalid +;; binaries. + +;; Global $b has more uses, so it should be sorted first. +(module + + ;; CHECK: (global $b i32 (i32.const 20)) + + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + (global $b i32 (i32.const 20)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + ) +) + +;; As above, but now with global.sets. Again $b should be sorted first. +(module + + ;; CHECK: (global $b (mut i32) (i32.const 20)) + + ;; CHECK: (global $a (mut i32) (i32.const 10)) + (global $a (mut i32) (i32.const 10)) + (global $b (mut i32) (i32.const 20)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (global.set $b + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.set $b + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (global.set $b + (i32.const 30) + ) + (global.set $b + (i32.const 40) + ) + (drop + (global.get $a) + ) + ) +) + +;; As above, but flipped so now $a has more, and should remain first. +(module + ;; CHECK: (global $a (mut i32) (i32.const 10)) + (global $a (mut i32) (i32.const 10)) + ;; CHECK: (global $b (mut i32) (i32.const 20)) + (global $b (mut i32) (i32.const 20)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (global.set $a + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.set $a + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (global.set $a + (i32.const 30) + ) + (global.set $a + (i32.const 40) + ) + (drop + (global.get $b) + ) + ) +) + +;; $b has more uses, but it depends on $a and cannot be sorted before it. +(module + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + ;; CHECK: (global $b i32 (global.get $a)) + (global $b i32 (global.get $a)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + ) +) + +;; $c has more uses, but it depends on $b and $a and cannot be sorted before +;; them. Likewise $b cannot be before $a. +(module + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + ;; CHECK: (global $b i32 (global.get $a)) + (global $b i32 (global.get $a)) + ;; CHECK: (global $c i32 (global.get $b)) + (global $c i32 (global.get $b)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + (drop + (global.get $c) + ) + (drop + (global.get $c) + ) + ) +) + +;; As above, but without dependencies, so now $c is first and then $b. +(module + + + ;; CHECK: (global $c i32 (i32.const 30)) + + ;; CHECK: (global $b i32 (i32.const 20)) + + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + (global $b i32 (i32.const 20)) + (global $c i32 (i32.const 30)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + (drop + (global.get $c) + ) + (drop + (global.get $c) + ) + ) +) + +;; As above, but a mixed case: $b depends on $a but $c has no dependencies. $c +;; can be first. +(module + + ;; CHECK: (global $c i32 (i32.const 30)) + + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + ;; CHECK: (global $b i32 (global.get $a)) + (global $b i32 (global.get $a)) + (global $c i32 (i32.const 30)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + (drop + (global.get $c) + ) + (drop + (global.get $c) + ) + ) +) + +;; Another mixed case, now with $c depending on $b. $b can be before $a. +(module + + + ;; CHECK: (global $b i32 (i32.const 20)) + + ;; CHECK: (global $c i32 (global.get $b)) + + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + (global $b i32 (i32.const 20)) + (global $c i32 (global.get $b)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + (drop + (global.get $c) + ) + (drop + (global.get $c) + ) + ) +) + +;; $b has more uses, but $a is an import and must remain first. +(module + ;; CHECK: (import "a" "b" (global $a i32)) + (import "a" "b" (global $a i32)) + ;; CHECK: (global $b i32 (i32.const 10)) + (global $b i32 (i32.const 10)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $b) + ) + ) +) + +;; As above, but with a and b's names flipped, to check that the names do not +;; matter, and we keep imports first. +(module + ;; CHECK: (import "a" "b" (global $b i32)) + (import "a" "b" (global $b i32)) + + ;; CHECK: (global $a i32 (i32.const 10)) + (global $a i32 (i32.const 10)) + + ;; CHECK: (func $uses + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $uses + (drop + (global.get $a) + ) + ) +) |