summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2024-01-08 16:03:33 -0800
committerGitHub <noreply@github.com>2024-01-09 00:03:33 +0000
commit185019919b1fa2efe8d02056827a31f4f50dd01e (patch)
tree19caab62ddbc1481d676b8e49e51bd5ddf8ebb78
parentcc0fab918aed3643abdc3566ade3f70f06d1d954 (diff)
downloadbinaryen-185019919b1fa2efe8d02056827a31f4f50dd01e.tar.gz
binaryen-185019919b1fa2efe8d02056827a31f4f50dd01e.tar.bz2
binaryen-185019919b1fa2efe8d02056827a31f4f50dd01e.zip
Fix global effect computation with -O flags (#6211)
We tested --generate-global-effects --vacuum and such, but not --generate-global-effects -O3 or the other -O flags. Unfortunately, our targeted testing missed a bug because of that. Specifically, we have special logic for -O flags to make sure the passes they expand into run with the proper opt and shrink levels, but that logic happened to also interfere with global effect computation. It would also interfere with allowing GUFA info or other things to be stored on the side, which we've proposed. This PR fixes that + future issues. The fix is to just allow a pass runner to execute more than once. We thought to avoid that and assert against it to keep the model "hermetic" (you create a pass runner, you run the passes, and you throw it out), which feels nice in a way, but it led to the bug here, and I'm not sure it would prevent any other ones really. It is also more code. It is simpler to allow a runner to execute more than once, and add a method to clear it. With that, the logic for -O3 execution is both simpler and does not interfere with anything but the opt and shrink level flags: we create a single runner, give it the proper options, and then keep using that runner + those options as we go, normally.
-rw-r--r--src/pass.h11
-rw-r--r--src/passes/pass.cpp5
-rw-r--r--src/tools/optimization-options.h44
-rw-r--r--test/lit/passes/global-effects-O.wast198
4 files changed, 229 insertions, 29 deletions
diff --git a/src/pass.h b/src/pass.h
index 83f53c23a..2c2fa0619 100644
--- a/src/pass.h
+++ b/src/pass.h
@@ -235,8 +235,9 @@ struct PassOptions {
// other passes later can benefit from it. It is up to the sequence of passes
// to update or discard this when necessary - in particular, when new effects
// are added to a function this must be changed or we may optimize
- // incorrectly (however, it is extremely rare for a pass to *add* effects;
- // passes normally only remove effects).
+ // incorrectly. However, it is extremely rare for a pass to *add* effects;
+ // passes normally only remove effects. Passes that do add effects must set
+ // addsEffects() so the pass runner is aware of them.
std::shared_ptr<FuncEffectsMap> funcEffectsMap;
// -Os is our default
@@ -318,6 +319,9 @@ struct PassRunner {
// Add a pass given an instance.
void add(std::unique_ptr<Pass> pass) { doAdd(std::move(pass)); }
+ // Clears away all passes that have been added.
+ void clear();
+
// Adds the pass if there are no DWARF-related issues. There is an issue if
// there is DWARF and if the pass does not support DWARF (as defined by the
// pass returning true from invalidatesDWARF); otherwise, if there is no
@@ -387,9 +391,6 @@ private:
// yet) have removed DWARF.
bool addedPassesRemovedDWARF = false;
- // Whether this pass runner has run. A pass runner should only be run once.
- bool ran = false;
-
void runPass(Pass* pass);
void runPassOnFunction(Pass* pass, Function* func);
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index d7391a390..62b3683fa 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -739,9 +739,6 @@ static void dumpWasm(Name name, Module* wasm) {
}
void PassRunner::run() {
- assert(!ran);
- ran = true;
-
static const int passDebug = getPassDebug();
// Emit logging information when asked for. At passDebug level 1+ we log
// the main passes, while in 2 we also log nested ones. Note that for
@@ -885,6 +882,8 @@ void PassRunner::doAdd(std::unique_ptr<Pass> pass) {
passes.emplace_back(std::move(pass));
}
+void PassRunner::clear() { passes.clear(); }
+
// Checks that the state is valid before and after a
// pass runs on a function. We run these extra checks when
// pass-debug mode is enabled.
diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h
index 045542ac2..0a47d9f70 100644
--- a/src/tools/optimization-options.h
+++ b/src/tools/optimization-options.h
@@ -328,44 +328,46 @@ struct OptimizationOptions : public ToolOptions {
bool runningPasses() { return passes.size() > 0; }
void runPasses(Module& wasm) {
- std::unique_ptr<PassRunner> passRunner;
+ PassRunner passRunner(&wasm, passOptions);
+ if (debug) {
+ passRunner.setDebug(true);
+ }
// Flush anything in the current pass runner, and then reset it to a fresh
// state so it is ready for new things.
- auto flushAndReset = [&]() {
- if (passRunner) {
- passRunner->run();
- }
- passRunner = std::make_unique<PassRunner>(&wasm, passOptions);
- if (debug) {
- passRunner->setDebug(true);
- }
+ auto flush = [&]() {
+ passRunner.run();
+ passRunner.clear();
};
- flushAndReset();
-
for (auto& pass : passes) {
if (pass.name == DEFAULT_OPT_PASSES) {
// This is something like -O3 or -Oz. We must run this now, in order to
// set the proper opt and shrink levels. To do that, first reset the
// runner so that anything already queued is run (since we can only run
// after those things).
- flushAndReset();
+ flush();
// -O3/-Oz etc. always set their own optimize/shrinkLevels.
assert(pass.optimizeLevel);
assert(pass.shrinkLevel);
- passRunner->options.optimizeLevel = *pass.optimizeLevel;
- passRunner->options.shrinkLevel = *pass.shrinkLevel;
- // Run our optimizations now, and reset the runner so that the default
- // pass options are used later (and not the temporary optimize/
- // shrinkLevels we just set).
- passRunner->addDefaultOptimizationPasses();
- flushAndReset();
+ // Temporarily override the default levels.
+ assert(passRunner.options.optimizeLevel == passOptions.optimizeLevel);
+ assert(passRunner.options.shrinkLevel == passOptions.shrinkLevel);
+ passRunner.options.optimizeLevel = *pass.optimizeLevel;
+ passRunner.options.shrinkLevel = *pass.shrinkLevel;
+
+ // Run our optimizations now with the custom levels.
+ passRunner.addDefaultOptimizationPasses();
+ flush();
+
+ // Restore the default optimize/shrinkLevels.
+ passRunner.options.optimizeLevel = passOptions.optimizeLevel;
+ passRunner.options.shrinkLevel = passOptions.shrinkLevel;
} else {
// This is a normal pass. Add it to the queue for execution.
- passRunner->add(pass.name);
+ passRunner.add(pass.name);
// Normal passes do not set their own optimize/shrinkLevels.
assert(!pass.optimizeLevel);
@@ -373,7 +375,7 @@ struct OptimizationOptions : public ToolOptions {
}
}
- flushAndReset();
+ flush();
}
};
diff --git a/test/lit/passes/global-effects-O.wast b/test/lit/passes/global-effects-O.wast
new file mode 100644
index 000000000..c24159eac
--- /dev/null
+++ b/test/lit/passes/global-effects-O.wast
@@ -0,0 +1,198 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects | filecheck %s --check-prefix CHECK_0
+;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -O1 | filecheck %s --check-prefix CHECK_1
+;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -O3 | filecheck %s --check-prefix CHECK_3
+;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -Os | filecheck %s --check-prefix CHECK_s
+;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -O | filecheck %s --check-prefix CHECK_O
+
+;; Test that global effects benefit -O1 and related modes.
+
+(module
+ ;; CHECK_0: (type $0 (func))
+
+ ;; CHECK_0: (type $1 (func (result i32)))
+
+ ;; CHECK_0: (export "main" (func $main))
+ ;; CHECK_1: (type $0 (func))
+
+ ;; CHECK_1: (type $1 (func (result i32)))
+
+ ;; CHECK_1: (export "main" (func $main))
+ ;; CHECK_3: (type $0 (func))
+
+ ;; CHECK_3: (type $1 (func (result i32)))
+
+ ;; CHECK_3: (export "main" (func $main))
+ ;; CHECK_s: (type $0 (func))
+
+ ;; CHECK_s: (type $1 (func (result i32)))
+
+ ;; CHECK_s: (export "main" (func $main))
+ ;; CHECK_O: (type $0 (func))
+
+ ;; CHECK_O: (type $1 (func (result i32)))
+
+ ;; CHECK_O: (export "main" (func $main))
+ (export "main" (func $main))
+
+ ;; CHECK_0: (export "pointless-work" (func $pointless-work))
+ ;; CHECK_1: (export "pointless-work" (func $pointless-work))
+ ;; CHECK_3: (export "pointless-work" (func $pointless-work))
+ ;; CHECK_s: (export "pointless-work" (func $pointless-work))
+ ;; CHECK_O: (export "pointless-work" (func $pointless-work))
+ (export "pointless-work" (func $pointless-work))
+
+ ;; CHECK_0: (func $main (type $0)
+ ;; CHECK_0-NEXT: (if
+ ;; CHECK_0-NEXT: (call $pointless-work)
+ ;; CHECK_0-NEXT: (then
+ ;; CHECK_0-NEXT: (drop
+ ;; CHECK_0-NEXT: (call $pointless-work)
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_1: (func $main (type $0)
+ ;; CHECK_1-NEXT: (nop)
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_3: (func $main (type $0) (; has Stack IR ;)
+ ;; CHECK_3-NEXT: (nop)
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_s: (func $main (type $0) (; has Stack IR ;)
+ ;; CHECK_s-NEXT: (nop)
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_O: (func $main (type $0) (; has Stack IR ;)
+ ;; CHECK_O-NEXT: (nop)
+ ;; CHECK_O-NEXT: )
+ (func $main
+ ;; This calls a function that does pointless work. After generating global
+ ;; effects we can see that it is pointless and remove this entire if (except
+ ;; for -O0).
+ (if
+ (call $pointless-work)
+ (then
+ (drop
+ (call $pointless-work)
+ )
+ )
+ )
+ )
+
+ ;; CHECK_0: (func $pointless-work (type $1) (result i32)
+ ;; CHECK_0-NEXT: (local $x i32)
+ ;; CHECK_0-NEXT: (loop $loop
+ ;; CHECK_0-NEXT: (local.set $x
+ ;; CHECK_0-NEXT: (i32.add
+ ;; CHECK_0-NEXT: (local.get $x)
+ ;; CHECK_0-NEXT: (i32.const 1)
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: (if
+ ;; CHECK_0-NEXT: (i32.ge_u
+ ;; CHECK_0-NEXT: (local.get $x)
+ ;; CHECK_0-NEXT: (i32.const 12345678)
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: (then
+ ;; CHECK_0-NEXT: (return
+ ;; CHECK_0-NEXT: (local.get $x)
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: (br $loop)
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_0-NEXT: )
+ ;; CHECK_1: (func $pointless-work (type $1) (result i32)
+ ;; CHECK_1-NEXT: (local $0 i32)
+ ;; CHECK_1-NEXT: (loop $loop (result i32)
+ ;; CHECK_1-NEXT: (br_if $loop
+ ;; CHECK_1-NEXT: (i32.lt_u
+ ;; CHECK_1-NEXT: (local.tee $0
+ ;; CHECK_1-NEXT: (i32.add
+ ;; CHECK_1-NEXT: (local.get $0)
+ ;; CHECK_1-NEXT: (i32.const 1)
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_1-NEXT: (i32.const 12345678)
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_1-NEXT: (local.get $0)
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_1-NEXT: )
+ ;; CHECK_3: (func $pointless-work (type $1) (; has Stack IR ;) (result i32)
+ ;; CHECK_3-NEXT: (local $0 i32)
+ ;; CHECK_3-NEXT: (loop $loop (result i32)
+ ;; CHECK_3-NEXT: (br_if $loop
+ ;; CHECK_3-NEXT: (i32.lt_u
+ ;; CHECK_3-NEXT: (local.tee $0
+ ;; CHECK_3-NEXT: (i32.add
+ ;; CHECK_3-NEXT: (local.get $0)
+ ;; CHECK_3-NEXT: (i32.const 1)
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_3-NEXT: (i32.const 12345678)
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_3-NEXT: (local.get $0)
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_3-NEXT: )
+ ;; CHECK_s: (func $pointless-work (type $1) (; has Stack IR ;) (result i32)
+ ;; CHECK_s-NEXT: (local $0 i32)
+ ;; CHECK_s-NEXT: (loop $loop (result i32)
+ ;; CHECK_s-NEXT: (br_if $loop
+ ;; CHECK_s-NEXT: (i32.lt_u
+ ;; CHECK_s-NEXT: (local.tee $0
+ ;; CHECK_s-NEXT: (i32.add
+ ;; CHECK_s-NEXT: (local.get $0)
+ ;; CHECK_s-NEXT: (i32.const 1)
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_s-NEXT: (i32.const 12345678)
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_s-NEXT: (local.get $0)
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_s-NEXT: )
+ ;; CHECK_O: (func $pointless-work (type $1) (; has Stack IR ;) (result i32)
+ ;; CHECK_O-NEXT: (local $0 i32)
+ ;; CHECK_O-NEXT: (loop $loop (result i32)
+ ;; CHECK_O-NEXT: (br_if $loop
+ ;; CHECK_O-NEXT: (i32.lt_u
+ ;; CHECK_O-NEXT: (local.tee $0
+ ;; CHECK_O-NEXT: (i32.add
+ ;; CHECK_O-NEXT: (local.get $0)
+ ;; CHECK_O-NEXT: (i32.const 1)
+ ;; CHECK_O-NEXT: )
+ ;; CHECK_O-NEXT: )
+ ;; CHECK_O-NEXT: (i32.const 12345678)
+ ;; CHECK_O-NEXT: )
+ ;; CHECK_O-NEXT: )
+ ;; CHECK_O-NEXT: (local.get $0)
+ ;; CHECK_O-NEXT: )
+ ;; CHECK_O-NEXT: )
+ (func $pointless-work (result i32)
+ (local $x i32)
+ ;; Some pointless work, with no side effects, that cannot be inlined. (The
+ ;; changes here are not important for this test.)
+ (loop $loop
+ (local.set $x
+ (i32.add
+ (local.get $x)
+ (i32.const 1)
+ )
+ )
+ (if
+ (i32.ge_u
+ (local.get $x)
+ (i32.const 12345678)
+ )
+ (then
+ (return
+ (local.get $x)
+ )
+ )
+ )
+ (br $loop)
+ )
+ )
+)