summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2019-06-15 12:04:16 -0700
committerGitHub <noreply@github.com>2019-06-15 12:04:16 -0700
commit1cd34c211dffa66fa2f2e45f3f9291e8ad836e07 (patch)
tree74fc2c7c15872d2c23d8b7eed7865486069549ce /src
parent22ba24118ef04720e6c7605dbaf90b22cdba006f (diff)
downloadbinaryen-1cd34c211dffa66fa2f2e45f3f9291e8ad836e07.tar.gz
binaryen-1cd34c211dffa66fa2f2e45f3f9291e8ad836e07.tar.bz2
binaryen-1cd34c211dffa66fa2f2e45f3f9291e8ad836e07.zip
Bysyncify: async transform for wasm (#2172)
This adds a new pass, Bysyncify, which transforms code to allow unwind and rewinding the call stack and local state. This allows things like coroutines, turning synchronous code asynchronous, etc. The new pass file itself has a large comment on top with docs. So far the tests here seem to show this works, but this hasn't been tested heavily yet. My next step is to hook this up to emscripten as a replacement for asyncify/emterpreter, see emscripten-core/emscripten#8561 Note that this is completely usable by itself, so it could be useful for any language that needs coroutines etc., and not just ones using LLVM and/or emscripten. See docs on the ABI in the pass source.
Diffstat (limited to 'src')
-rw-r--r--src/ir/effects.h4
-rw-r--r--src/ir/module-utils.h57
-rw-r--r--src/pass.h7
-rw-r--r--src/passes/Bysyncify.cpp909
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/Inlining.cpp2
-rw-r--r--src/passes/pass.cpp3
-rw-r--r--src/passes/passes.h1
-rw-r--r--src/wasm2js.h2
9 files changed, 981 insertions, 5 deletions
diff --git a/src/ir/effects.h b/src/ir/effects.h
index 7c64beafd..b7c420100 100644
--- a/src/ir/effects.h
+++ b/src/ir/effects.h
@@ -27,7 +27,7 @@ namespace wasm {
struct EffectAnalyzer
: public PostWalker<EffectAnalyzer, OverriddenVisitor<EffectAnalyzer>> {
- EffectAnalyzer(PassOptions& passOptions, Expression* ast = nullptr) {
+ EffectAnalyzer(const PassOptions& passOptions, Expression* ast = nullptr) {
ignoreImplicitTraps = passOptions.ignoreImplicitTraps;
debugInfo = passOptions.debugInfo;
if (ast) {
@@ -372,7 +372,7 @@ struct EffectAnalyzer
// Helpers
static bool
- canReorder(PassOptions& passOptions, Expression* a, Expression* b) {
+ canReorder(const PassOptions& passOptions, Expression* a, Expression* b) {
EffectAnalyzer aEffects(passOptions, a);
EffectAnalyzer bEffects(passOptions, b);
return !aEffects.invalidates(bEffects);
diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h
index c13e9f1ca..88298fd43 100644
--- a/src/ir/module-utils.h
+++ b/src/ir/module-utils.h
@@ -19,6 +19,7 @@
#include "ir/find_all.h"
#include "ir/manipulation.h"
+#include "pass.h"
#include "wasm.h"
namespace wasm {
@@ -287,6 +288,62 @@ template<typename T> inline void iterDefinedEvents(Module& wasm, T visitor) {
}
}
+// Performs a parallel map on function in the module, emitting a map object
+// of function => result.
+// TODO: use in inlining and elsewhere
+template<typename T> struct ParallelFunctionMap {
+
+ typedef std::map<Function*, T> Map;
+ Map map;
+
+ typedef std::function<void(Function*, T&)> Func;
+
+ struct Info {
+ Map* map;
+ Func work;
+ };
+
+ ParallelFunctionMap(Module& wasm, Func work) {
+ // Fill in map, as we operate on it in parallel (each function to its own
+ // entry).
+ for (auto& func : wasm.functions) {
+ map[func.get()];
+ }
+
+ // Run on the imports first. TODO: parallelize this too
+ for (auto& func : wasm.functions) {
+ if (func->imported()) {
+ work(func.get(), map[func.get()]);
+ }
+ }
+
+ // Run on the implemented functions.
+ struct Mapper : public WalkerPass<PostWalker<Mapper>> {
+
+ bool isFunctionParallel() override { return true; }
+
+ Mapper(Info* info) : info(info) {}
+
+ Mapper* create() override { return new Mapper(info); }
+
+ void doWalkFunction(Function* curr) {
+ assert((*info->map).count(curr));
+ info->work(curr, (*info->map)[curr]);
+ }
+
+ private:
+ Info* info;
+ };
+
+ Info info = {&map, work};
+
+ PassRunner runner(&wasm);
+ runner.setIsNested(true);
+ runner.add<Mapper>(&info);
+ runner.run();
+ }
+};
+
} // namespace ModuleUtils
} // namespace wasm
diff --git a/src/pass.h b/src/pass.h
index 9f1be9a3b..0a9cc251f 100644
--- a/src/pass.h
+++ b/src/pass.h
@@ -127,6 +127,13 @@ struct PassOptions {
}
return arguments[key];
}
+
+ std::string getArgumentOrDefault(std::string key, std::string default_) {
+ if (arguments.count(key) == 0) {
+ return default_;
+ }
+ return arguments[key];
+ }
};
//
diff --git a/src/passes/Bysyncify.cpp b/src/passes/Bysyncify.cpp
new file mode 100644
index 000000000..ffbf6db1c
--- /dev/null
+++ b/src/passes/Bysyncify.cpp
@@ -0,0 +1,909 @@
+/*
+ * Copyright 2019 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Bysyncify: async/await style code transformation, that allows pausing
+// and resuming. This lets a language support synchronous operations in
+// an async manner, for example, you can do a blocking wait, and that will
+// be turned into code that unwinds the stack at the "blocking" operation,
+// then is able to rewind it back up when the actual async operation
+// comples, the so the code appears to have been running synchronously
+// all the while. Use cases for this include coroutines, python generators,
+// etc.
+//
+// The approach here is a third-generation design after Emscripten's original
+// Asyncify and then Emterpreter-Async approaches:
+//
+// * Asyncify rewrote control flow in LLVM IR. A problem is that this needs
+// to save all SSA registers as part of the local state, which can be
+// very costly. A further increase can happen because of phis that are
+// added because of control flow transformations. As a result we saw
+// pathological cases where the code size increase was unacceptable.
+// * Emterpreter-Async avoids any risk of code size increase by running it
+// in a little bytecode VM, which also makes pausing/resuming trivial.
+// Speed is the main downside here; however, the approach did prove that by
+// *selectively* emterpretifying only certain code, many projects can run
+// at full speed - often just the "main loop" must be emterpreted, while
+// high-speed code that it calls, and in which cannot be an async operation,
+// remain at full speed.
+//
+// Bysyncify's design learns from both of those:
+//
+// * The code rewrite is done in Binaryen, that is, at the wasm level. At
+// this level we will only need to save wasm locals, which is a much smaller
+// number than LLVM's SSA registers.
+// * We aim for low but non-zero overhead. Low overhead is very important
+// for obvious reasons, while Emterpreter-Async proved it is tolerable to
+// have *some* overhead, if the transform can be applied selectively.
+//
+// The specific transform implemented here is simpler than Asyncify but should
+// still have low overhead when properly optimized. Asyncify worked at the CFG
+// level and added branches there; Bysyncify on the other hand works on the
+// structured control flow of wasm and simply "skips over" code when rewinding
+// the stack, and jumps out when unwinding. The transformed code looks
+// conceptually like this:
+//
+// void foo(int x) {
+// // new prelude
+// if (rewinding) {
+// loadLocals();
+// }
+// // main body starts here
+// if (!rewinding) {
+// // some code we must skip while rewinding
+// x = x + 1;
+// x = x / 2;
+// }
+// // If rewinding, do this call only if it is the right one.
+// if (!rewinding or nextCall == 0) {
+// bar(x);
+// if (unwinding) {
+// noteUnWound(0);
+// saveLocals();
+// return;
+// }
+// }
+// if (!rewinding) {
+// // more code we must skip while rewinding
+// while (x & 7) {
+// x = x + 1;
+// }
+// }
+// [..]
+//
+// The general idea is that while rewinding we just "keep going forward",
+// skipping code we should not run. That is, instead of jumping directly
+// to the right place, we have ifs that skip along instead. The ifs for
+// rewinding and unwinding should be well-predicted, so the overhead should
+// not be very high. However, we do need to reach the right location via
+// such skipping, which means that in a very large function the rewind
+// takes some extra time. On the other hand, though, code that cannot
+// unwind or rewind (like that loop near the end) can run at full speed.
+// Overall, this should allow good performance with small overhead that is
+// mostly noticed at rewind time.
+//
+// After this pass is run a new i32 global "__bysyncify_state" is added, which
+// has the following values:
+//
+// 0: normal execution
+// 1: unwinding the stack
+// 2: rewinding the stack
+//
+// There is also "__bysyncify_data" which when rewinding and unwinding
+// contains a pointer to a data structure with the info needed to rewind
+// and unwind:
+//
+// { // offsets
+// i32 - current bysyncify stack location // 0
+// i32 - bysyncify stack end // 4
+// }
+//
+// The bysyncify stack is a representation of the call frame, as a list of
+// indexes of calls. In the example above, we saw index "0" for calling "bar"
+// from "foo". When unwinding, the indexes are added to the stack; when
+// rewinding, they are popped off; the current bysyncify stack location is
+// undated while doing both operations. The bysyncify stack is also used to
+// save locals. Note that the stack end location is provided, which is for
+// error detection.
+//
+// Note: all pointers are assumed to be 4-byte aligned.
+//
+// When you start an unwinding operation, you must set the initial fields
+// of the data structure, that is, set the current stack location to the
+// proper place, and the end to the proper end based on how much memory
+// you reserved. Note that bysyncify will grow the stack up.
+//
+// The pass will also create three functions that let you control unwinding
+// and rewinding:
+//
+// * bysyncify_start_unwind(data : i32): call this to start unwinding the
+// stack from the current location. "data" must point to a data
+// structure as described above (with fields containing valid data).
+//
+// * bysyncify_start_rewind(data : i32): call this to start rewinding the
+// stack vack up to the location stored in the provided data. This prepares
+// for the rewind; to start it, you must call the first function in the
+// call stack to be unwound.
+//
+// * bysyncify_stop_rewind(): call this to note that rewinding has
+// concluded, and normal execution can resume.
+//
+// These three functions are exported so that you can call them from the
+// outside. If you want to manage things from inside the wasm, then you
+// couldn't have called them before they were created by this pass. To work
+// around that, you can create imports to bysyncify.start_unwind,
+// bysyncify.start_rewind, and bysyncify.stop_rewind; if those exist when
+// this pass runs then it will turn those into direct calls to the functions
+// that it creates.
+//
+// To use this API, call bysyncify_start_unwind when you want to. The call
+// stack will then be unwound, and so execution will resume in the JS or
+// other host environment on the outside that called into wasm. Then when
+// you want to rewind, call bysyncify_start_rewind, and then call the same
+// initial function you called before, so that unwinding can begin. The
+// unwinding will reach the same function from which you started, since
+// we are recreating the call stack. At that point you should call
+// bysyncify_stop_rewind and then execution can resume normally.
+//
+// By default this pass assumes that any import may call any of the
+// exported bysyncify methods, that is, any import may start an unwind/rewind.
+// To customize this, you can provide an argument to wasm-opt (or another
+// tool that can run this pass),
+//
+// --pass-arg=bysyncify@module1.base1,module2.base2,module3.base3
+//
+// Each module.base in that comma-separated list will be considered to
+// be an import that can unwind/rewind, and all others are assumed not to
+// (aside from the bysyncify.* imports, which are always assumed to). To
+// say that no import (aside from bysyncify.*) can do so (that is, the
+// opposite of the default behavior), you can simply provide an import
+// that doesn't exist (say,
+// --pass-arg=bysyncify@no.imports
+//
+
+#include "ir/effects.h"
+#include "ir/literal-utils.h"
+#include "ir/module-utils.h"
+#include "ir/utils.h"
+#include "pass.h"
+#include "support/unique_deferring_queue.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace {
+
+static const Name BYSYNCIFY_STATE = "__bysyncify_state";
+static const Name BYSYNCIFY_DATA = "__bysyncify_data";
+static const Name BYSYNCIFY_START_UNWIND = "bysyncify_start_unwind";
+static const Name BYSYNCIFY_START_REWIND = "bysyncify_start_rewind";
+static const Name BYSYNCIFY_STOP_REWIND = "bysyncify_stop_rewind";
+static const Name BYSYNCIFY_UNWIND = "__bysyncify_unwind";
+static const Name BYSYNCIFY = "bysyncify";
+static const Name START_UNWIND = "start_unwind";
+static const Name START_REWIND = "start_rewind";
+static const Name STOP_REWIND = "stop_rewind";
+static const Name BYSYNCIFY_GET_CALL_INDEX = "__bysyncify_get_call_index";
+static const Name BYSYNCIFY_CHECK_CALL_INDEX = "__bysyncify_check_call_index";
+
+// TODO: having just normal/unwind_or_rewind would decrease code
+// size, but make debugging harder
+enum class State { Normal = 0, Unwinding = 1, Rewinding = 2 };
+
+enum class DataOffset { BStackPos = 0, BStackEnd = 4 };
+
+const auto STACK_ALIGN = 4;
+
+// Analyze the entire module to see which calls may change the state, that
+// is, start an unwind or rewind), either in itself or in something called
+// by it.
+class ModuleAnalyzer {
+ Module& module;
+
+ struct Info {
+ bool canChangeState = false;
+ std::set<Function*> callsTo;
+ std::set<Function*> calledBy;
+ };
+
+ typedef std::map<Function*, Info> Map;
+ Map map;
+
+public:
+ ModuleAnalyzer(Module& module,
+ std::function<bool(Name, Name)> canImportChangeState)
+ : module(module) {
+ // Scan to see which functions can directly change the state.
+ // Also handle the bysyncify imports, removing them (as we will implement
+ // them later), and replace calls to them with calls to the later proper
+ // name.
+ ModuleUtils::ParallelFunctionMap<Info> scanner(
+ module, [&](Function* func, Info& info) {
+ if (func->imported()) {
+ // The bysyncify imports to start an unwind or rewind can definitely
+ // change the state.
+ if (func->module == BYSYNCIFY &&
+ (func->base == START_UNWIND || func->base == START_REWIND)) {
+ info.canChangeState = true;
+ } else {
+ info.canChangeState =
+ canImportChangeState(func->module, func->base);
+ }
+ return;
+ }
+ struct Walker : PostWalker<Walker> {
+ void visitCall(Call* curr) {
+ auto* target = module->getFunction(curr->target);
+ if (target->imported() && target->module == BYSYNCIFY) {
+ // Redirect the imports to the functions we'll add later.
+ if (target->base == START_UNWIND) {
+ curr->target = BYSYNCIFY_START_UNWIND;
+ } else if (target->base == START_REWIND) {
+ curr->target = BYSYNCIFY_START_REWIND;
+ } else if (target->base == STOP_REWIND) {
+ curr->target = BYSYNCIFY_STOP_REWIND;
+ // TODO: in theory, this does not change the state
+ } else {
+ Fatal() << "call to unidenfied bysyncify import: "
+ << target->base;
+ }
+ info->canChangeState = true;
+ return;
+ }
+ info->callsTo.insert(target);
+ }
+ void visitCallIndirect(CallIndirect* curr) {
+ // TODO optimize
+ info->canChangeState = true;
+ }
+ Info* info;
+ Module* module;
+ };
+ Walker walker;
+ walker.info = &info;
+ walker.module = &module;
+ walker.walk(func->body);
+ });
+ map.swap(scanner.map);
+
+ // Remove the bysyncify imports, if any.
+ for (auto& pair : map) {
+ auto* func = pair.first;
+ if (func->imported() && func->module == BYSYNCIFY) {
+ module.removeFunction(func->name);
+ }
+ }
+
+ // Find what is called by what.
+ for (auto& pair : map) {
+ auto* func = pair.first;
+ auto& info = pair.second;
+ for (auto* target : info.callsTo) {
+ map[target].calledBy.insert(func);
+ }
+ }
+
+ // Close over, finding all functions that can reach something that can
+ // change the state.
+ // The work queue contains an item we just learned can change the state.
+ UniqueDeferredQueue<Function*> work;
+ for (auto& func : module.functions) {
+ if (map[func.get()].canChangeState) {
+ work.push(func.get());
+ }
+ }
+ while (!work.empty()) {
+ auto* func = work.pop();
+ for (auto* caller : map[func].calledBy) {
+ if (!map[caller].canChangeState) {
+ map[caller].canChangeState = true;
+ work.push(caller);
+ }
+ }
+ }
+ }
+
+ bool canChangeState(Function* func) { return map[func].canChangeState; }
+
+ bool canChangeState(Expression* curr) {
+ // Look inside to see if we call any of the things we know can change the
+ // state.
+ // TODO: caching, this is O(N^2)
+ struct Walker : PostWalker<Walker> {
+ void visitCall(Call* curr) {
+ // We only implement these at the very end, but we know that they
+ // definitely change the state.
+ if (curr->target == BYSYNCIFY_START_UNWIND ||
+ curr->target == BYSYNCIFY_START_REWIND ||
+ curr->target == BYSYNCIFY_GET_CALL_INDEX ||
+ curr->target == BYSYNCIFY_CHECK_CALL_INDEX) {
+ canChangeState = true;
+ return;
+ }
+ // The target may not exist if it is one of our temporary intrinsics.
+ auto* target = module->getFunctionOrNull(curr->target);
+ if (target && (*map)[target].canChangeState) {
+ canChangeState = true;
+ }
+ }
+ void visitCallIndirect(CallIndirect* curr) {
+ // TODO optimize
+ canChangeState = true;
+ }
+ Module* module;
+ Map* map;
+ bool canChangeState = false;
+ };
+ Walker walker;
+ walker.module = &module;
+ walker.map = &map;
+ walker.walk(curr);
+ return walker.canChangeState;
+ }
+};
+
+// Checks if something performs a call: either a direct or indirect call,
+// and perhaps it is dropped or assigned to a local. This captures all the
+// cases of a call in flat IR.
+static bool doesCall(Expression* curr) {
+ if (auto* set = curr->dynCast<LocalSet>()) {
+ curr = set->value;
+ } else if (auto* drop = curr->dynCast<Drop>()) {
+ curr = drop->value;
+ }
+ return curr->is<Call>() || curr->is<CallIndirect>();
+}
+
+class BysyncifyBuilder : public Builder {
+public:
+ BysyncifyBuilder(Module& wasm) : Builder(wasm) {}
+
+ Expression* makeGetStackPos() {
+ return makeLoad(4,
+ false,
+ int32_t(DataOffset::BStackPos),
+ 4,
+ makeGlobalGet(BYSYNCIFY_DATA, i32),
+ i32);
+ }
+
+ Expression* makeIncStackPos(int32_t by) {
+ if (by == 0) {
+ return makeNop();
+ }
+ return makeStore(
+ 4,
+ int32_t(DataOffset::BStackPos),
+ 4,
+ makeGlobalGet(BYSYNCIFY_DATA, i32),
+ makeBinary(AddInt32, makeGetStackPos(), makeConst(Literal(by))),
+ i32);
+ }
+
+ Expression* makeStateCheck(State value) {
+ return makeBinary(EqInt32,
+ makeGlobalGet(BYSYNCIFY_STATE, i32),
+ makeConst(Literal(int32_t(value))));
+ }
+};
+
+// Instrument control flow, around calls and adding skips for rewinding.
+struct BysyncifyFlow : public Pass {
+ bool isFunctionParallel() override { return true; }
+
+ ModuleAnalyzer* analyzer;
+
+ BysyncifyFlow* create() override { return new BysyncifyFlow(analyzer); }
+
+ BysyncifyFlow(ModuleAnalyzer* analyzer) : analyzer(analyzer) {}
+
+ void
+ runOnFunction(PassRunner* runner, Module* module_, Function* func) override {
+ module = module_;
+ // If the function cannot change our state, we have nothing to do -
+ // we will never unwind or rewind the stack here.
+ if (!analyzer->canChangeState(func)) {
+ return;
+ }
+ // Rewrite the function body.
+ builder = make_unique<BysyncifyBuilder>(*module);
+ // Each function we enter will pop one from the stack, which is the index
+ // of the next call to make.
+ auto* block = builder->makeBlock(
+ {builder->makeIf(builder->makeStateCheck(
+ State::Rewinding), // TODO: such checks can be !normal
+ makeCallIndexPop()),
+ process(func->body)});
+ if (func->result != none) {
+ // Rewriting control flow may alter things; make sure the function ends in
+ // something valid (which the optimizer can remove later).
+ block->list.push_back(builder->makeUnreachable());
+ }
+ block->finalize();
+ func->body = block;
+ // Making things like returns conditional may alter types.
+ ReFinalize().walkFunctionInModule(func, module);
+ }
+
+private:
+ std::unique_ptr<BysyncifyBuilder> builder;
+
+ Module* module;
+
+ // Each call in the function has an index, noted during unwind and checked
+ // during rewind.
+ Index callIndex = 0;
+
+ Expression* process(Expression* curr) {
+ if (!analyzer->canChangeState(curr)) {
+ return makeMaybeSkip(curr);
+ }
+ // The IR is in flat form, which makes this much simpler. We basically
+ // need to add skips to avoid code when rewinding, and checks around calls
+ // with unwinding/rewinding support.
+ //
+ // Aside from that, we must "linearize" all control flow so that we can
+ // reach the right part when unwinding. For example, for an if we do this:
+ //
+ // if (condition()) {
+ // side1();
+ // } else {
+ // side2();
+ // }
+ // =>
+ // if (!rewinding) {
+ // temp = condition();
+ // }
+ // if (rewinding || temp) {
+ // side1();
+ // }
+ // if (rewinding || !temp) {
+ // side2();
+ // }
+ //
+ // This way we will linearly get through all the code in the function,
+ // if we are rewinding. In a similar way we skip over breaks, etc.; just
+ // "keep on truckin'".
+ //
+ // Note that temp locals added in this way are just normal locals, and in
+ // particular they are saved and loaded. That way if we resume from the
+ // first if arm we will avoid the second.
+ if (auto* block = curr->dynCast<Block>()) {
+ // At least one of our children may change the state. Clump them as
+ // necessary.
+ Index i = 0;
+ auto& list = block->list;
+ while (i < list.size()) {
+ if (analyzer->canChangeState(list[i])) {
+ list[i] = process(list[i]);
+ i++;
+ } else {
+ Index end = i + 1;
+ while (end < list.size() && !analyzer->canChangeState(list[end])) {
+ end++;
+ }
+ // We have a range of [i, end) in which the state cannot change,
+ // so all we need to do is skip it if rewinding.
+ if (end == i + 1) {
+ list[i] = makeMaybeSkip(list[i]);
+ } else {
+ auto* block = builder->makeBlock();
+ for (auto j = i; j < end; j++) {
+ block->list.push_back(list[j]);
+ }
+ block->finalize();
+ list[i] = makeMaybeSkip(block);
+ for (auto j = i + 1; j < end; j++) {
+ list[j] = builder->makeNop();
+ }
+ }
+ i = end;
+ }
+ }
+ return block;
+ } else if (auto* iff = curr->dynCast<If>()) {
+ // The state change cannot be in the condition due to flat form, so it
+ // must be in one of the children.
+ assert(!analyzer->canChangeState(iff->condition));
+ // We must linearize this, which means we pass through both arms if we
+ // are rewinding. This is pretty simple as in flat form the if condition
+ // is either a const or a get, so easily copyable.
+ // Start with the first arm, for which we reuse the original if.
+ auto* otherArm = iff->ifFalse;
+ iff->ifFalse = nullptr;
+ auto* originalCondition = iff->condition;
+ iff->condition = builder->makeBinary(
+ OrInt32, originalCondition, builder->makeStateCheck(State::Rewinding));
+ iff->ifTrue = process(iff->ifTrue);
+ iff->finalize();
+ if (!otherArm) {
+ return iff;
+ }
+ // Add support for the second arm as well.
+ auto* otherIf = builder->makeIf(
+ builder->makeBinary(
+ OrInt32,
+ builder->makeUnary(
+ EqZInt32, ExpressionManipulator::copy(originalCondition, *module)),
+ builder->makeStateCheck(State::Rewinding)),
+ process(otherArm));
+ otherIf->finalize();
+ return builder->makeSequence(iff, otherIf);
+ } else if (auto* loop = curr->dynCast<Loop>()) {
+ loop->body = process(loop->body);
+ return loop;
+ } else if (doesCall(curr)) {
+ return makeCallSupport(curr);
+ }
+ // We must handle all control flow above, and all things that can change
+ // the state, so there should be nothing that can reach here - add it
+ // earlier as necessary.
+ WASM_UNREACHABLE();
+ }
+
+ // Possibly skip some code, if rewinding.
+ Expression* makeMaybeSkip(Expression* curr) {
+ return builder->makeIf(builder->makeStateCheck(State::Normal), curr);
+ }
+
+ Expression* makeCallSupport(Expression* curr) {
+ // TODO: stop doing this after code can no longer reach a call that may
+ // change the state
+ assert(doesCall(curr));
+ assert(curr->type == none);
+ auto index = callIndex++;
+ // Execute the call, if either normal execution, or if rewinding and this
+ // is the right call to go into.
+ // TODO: we can read the next call index once in each function (but should
+ // avoid saving/restoring that local later)
+ return builder->makeIf(
+ builder->makeIf(builder->makeStateCheck(State::Normal),
+ builder->makeConst(Literal(int32_t(1))),
+ makeCallIndexPeek(index)),
+ builder->makeSequence(curr, makePossibleUnwind(index)));
+ }
+
+ Expression* makePossibleUnwind(Index index) {
+ // In this pass we emit an "unwind" as a call to a fake intrinsic. We
+ // will implement it in the later pass. (We can't create the unwind block
+ // target here, as the optimizer would remove it later; we can only add
+ // it when we add its contents, later.)
+ return builder->makeIf(
+ builder->makeStateCheck(State::Unwinding),
+ builder->makeCall(
+ BYSYNCIFY_UNWIND, {builder->makeConst(Literal(int32_t(index)))}, none));
+ }
+
+ Expression* makeCallIndexPeek(Index index) {
+ // Emit an intrinsic for this, as we store the index into a local, and
+ // don't want it to be seen by bysyncify itself.
+ return builder->makeCall(BYSYNCIFY_CHECK_CALL_INDEX,
+ {builder->makeConst(Literal(int32_t(index)))},
+ i32);
+ }
+
+ Expression* makeCallIndexPop() {
+ // Emit an intrinsic for this, as we store the index into a local, and
+ // don't want it to be seen by bysyncify itself.
+ return builder->makeCall(BYSYNCIFY_GET_CALL_INDEX, {}, none);
+ }
+};
+
+// Instrument local saving/restoring.
+struct BysyncifyLocals : public WalkerPass<PostWalker<BysyncifyLocals>> {
+ bool isFunctionParallel() override { return true; }
+
+ ModuleAnalyzer* analyzer;
+
+ BysyncifyLocals* create() override { return new BysyncifyLocals(analyzer); }
+
+ BysyncifyLocals(ModuleAnalyzer* analyzer) : analyzer(analyzer) {}
+
+ void visitCall(Call* curr) {
+ // Replace calls to the fake intrinsics.
+ if (curr->target == BYSYNCIFY_UNWIND) {
+ replaceCurrent(builder->makeBreak(BYSYNCIFY_UNWIND, curr->operands[0]));
+ } else if (curr->target == BYSYNCIFY_GET_CALL_INDEX) {
+ replaceCurrent(builder->makeSequence(
+ builder->makeIncStackPos(-4),
+ builder->makeLocalSet(
+ rewindIndex,
+ builder->makeLoad(4, false, 0, 4, builder->makeGetStackPos(), i32))));
+ } else if (curr->target == BYSYNCIFY_CHECK_CALL_INDEX) {
+ replaceCurrent(builder->makeBinary(
+ EqInt32,
+ builder->makeLocalGet(rewindIndex, i32),
+ builder->makeConst(
+ Literal(int32_t(curr->operands[0]->cast<Const>()->value.geti32())))));
+ }
+ }
+
+ void doWalkFunction(Function* func) {
+ // If the function cannot change our state, we have nothing to do -
+ // we will never unwind or rewind the stack here.
+ if (!analyzer->canChangeState(func->body)) {
+ return;
+ }
+ // Note the locals we want to preserve, before we add any more helpers.
+ numPreservableLocals = func->getNumLocals();
+ // The new function body has a prelude to load locals if rewinding,
+ // then the actual main body, which does all its unwindings by breaking
+ // to the unwind block, which then handles pushing the call index, as
+ // well as saving the locals.
+ // An index is needed for getting the unwinding and rewinding call indexes
+ // around TODO: can this be the same index?
+ auto unwindIndex = builder->addVar(func, i32);
+ rewindIndex = builder->addVar(func, i32);
+ // Rewrite the function body.
+ builder = make_unique<BysyncifyBuilder>(*getModule());
+ walk(func->body);
+ // After the normal function body, emit a barrier before the postamble.
+ Expression* barrier;
+ if (func->result == none) {
+ // The function may have ended without a return; ensure one.
+ barrier = builder->makeReturn();
+ } else {
+ // The function must have returned or hit an unreachable, but emit one
+ // to make possible bugs easier to figure out (as this should never be
+ // reached). The optimizer can remove this anyhow.
+ barrier = builder->makeUnreachable();
+ }
+ auto* newBody = builder->makeBlock(
+ {builder->makeIf(builder->makeStateCheck(State::Rewinding),
+ makeLocalLoading()),
+ builder->makeLocalSet(
+ unwindIndex,
+ builder->makeBlock(BYSYNCIFY_UNWIND,
+ builder->makeSequence(func->body, barrier))),
+ makeCallIndexPush(unwindIndex),
+ makeLocalSaving()});
+ if (func->result != none) {
+ // If we unwind, we must still "return" a value, even if it will be
+ // ignored on the outside.
+ newBody->list.push_back(
+ LiteralUtils::makeZero(func->result, *getModule()));
+ newBody->finalize(func->result);
+ }
+ func->body = newBody;
+ // Making things like returns conditional may alter types.
+ ReFinalize().walkFunctionInModule(func, getModule());
+ }
+
+private:
+ std::unique_ptr<BysyncifyBuilder> builder;
+
+ Index rewindIndex;
+ Index numPreservableLocals;
+
+ Expression* makeLocalLoading() {
+ if (numPreservableLocals == 0) {
+ return builder->makeNop();
+ }
+ auto* func = getFunction();
+ Index total = 0;
+ for (Index i = 0; i < numPreservableLocals; i++) {
+ auto type = func->getLocalType(i);
+ auto size = getTypeSize(type);
+ total += size;
+ }
+ auto* block = builder->makeBlock();
+ block->list.push_back(builder->makeIncStackPos(-total));
+ auto tempIndex = builder->addVar(func, i32);
+ block->list.push_back(
+ builder->makeLocalSet(tempIndex, builder->makeGetStackPos()));
+ Index offset = 0;
+ for (Index i = 0; i < numPreservableLocals; i++) {
+ auto type = func->getLocalType(i);
+ auto size = getTypeSize(type);
+ assert(size % STACK_ALIGN == 0);
+ // TODO: higher alignment?
+ block->list.push_back(builder->makeLocalSet(
+ i,
+ builder->makeLoad(size,
+ true,
+ offset,
+ STACK_ALIGN,
+ builder->makeLocalGet(tempIndex, i32),
+ type)));
+ offset += size;
+ }
+ block->finalize();
+ return block;
+ }
+
+ Expression* makeLocalSaving() {
+ if (numPreservableLocals == 0) {
+ return builder->makeNop();
+ }
+ auto* func = getFunction();
+ auto* block = builder->makeBlock();
+ auto tempIndex = builder->addVar(func, i32);
+ block->list.push_back(
+ builder->makeLocalSet(tempIndex, builder->makeGetStackPos()));
+ Index offset = 0;
+ for (Index i = 0; i < numPreservableLocals; i++) {
+ auto type = func->getLocalType(i);
+ auto size = getTypeSize(type);
+ assert(size % STACK_ALIGN == 0);
+ // TODO: higher alignment?
+ block->list.push_back(
+ builder->makeStore(size,
+ offset,
+ STACK_ALIGN,
+ builder->makeLocalGet(tempIndex, i32),
+ builder->makeLocalGet(i, type),
+ type));
+ offset += size;
+ }
+ block->list.push_back(builder->makeIncStackPos(offset));
+ block->finalize();
+ return block;
+ }
+
+ Expression* makeCallIndexPush(Index tempIndex) {
+ // TODO: add a check against the stack end here
+ return builder->makeSequence(
+ builder->makeStore(4,
+ 0,
+ 4,
+ builder->makeGetStackPos(),
+ builder->makeLocalGet(tempIndex, i32),
+ i32),
+ builder->makeIncStackPos(4));
+ }
+};
+
+} // anonymous namespace
+
+struct Bysyncify : public Pass {
+ void run(PassRunner* runner, Module* module) override {
+ bool optimize = runner->options.optimizeLevel > 0;
+ // Find which imports can change the state.
+ const char* ALL_IMPORTS_CAN_CHANGE_STATE = "__bysyncify_all_imports";
+ auto stateChangingImports = runner->options.getArgumentOrDefault(
+ "bysyncify", ALL_IMPORTS_CAN_CHANGE_STATE);
+ bool allImportsCanChangeState =
+ stateChangingImports == ALL_IMPORTS_CAN_CHANGE_STATE;
+ std::string separator = ",";
+ stateChangingImports = separator + stateChangingImports + separator;
+
+ // Scan the module.
+ ModuleAnalyzer analyzer(*module, [&](Name module, Name base) {
+ if (allImportsCanChangeState) {
+ return true;
+ }
+ std::string full = separator + module.str + '.' + base.str + separator;
+ return stateChangingImports.find(full) != std::string::npos;
+ });
+
+ // Add necessary globals before we emit code to use them.
+ addGlobals(module);
+
+ // Instrument the flow of code, adding code instrumentation and
+ // skips for when rewinding. We do this on flat IR so that it is
+ // practical to add code around each call, without affecting
+ // anything else.
+ {
+ PassRunner runner(module);
+ runner.add("flatten");
+ // Dce is useful here, since BysyncifyFlow makes control flow conditional,
+ // which may make unreachable code look reachable. We also do some other
+ // minimal optimization here in an unconditional way here, to counteract
+ // the flattening.
+ runner.add("dce");
+ runner.add("simplify-locals-nonesting");
+ runner.add("merge-blocks");
+ runner.add("vacuum");
+ runner.add<BysyncifyFlow>(&analyzer);
+ runner.setIsNested(true);
+ runner.setValidateGlobally(false);
+ runner.run();
+ }
+ // Next, add local saving/restoring logic. We optimize before doing this,
+ // to undo the extra code generated by flattening, and to arrive at the
+ // minimal amount of locals (which is important as we must save and
+ // restore those locals). We also and optimize after as well to simplify
+ // the code as much as possible.
+ {
+ PassRunner runner(module);
+ if (optimize) {
+ runner.addDefaultFunctionOptimizationPasses();
+ }
+ runner.add<BysyncifyLocals>(&analyzer);
+ if (optimize) {
+ runner.addDefaultFunctionOptimizationPasses();
+ }
+ runner.setIsNested(true);
+ runner.setValidateGlobally(false);
+ runner.run();
+ }
+ // Finally, add function support (that should not have been seen by
+ // the previous passes).
+ addFunctions(module);
+ }
+
+private:
+ void addGlobals(Module* module) {
+ Builder builder(*module);
+ module->addGlobal(builder.makeGlobal(BYSYNCIFY_STATE,
+ i32,
+ builder.makeConst(Literal(int32_t(0))),
+ Builder::Mutable));
+ module->addGlobal(builder.makeGlobal(BYSYNCIFY_DATA,
+ i32,
+ builder.makeConst(Literal(int32_t(0))),
+ Builder::Mutable));
+ }
+
+ void addFunctions(Module* module) {
+ Builder builder(*module);
+ auto makeFunction = [&](Name name,
+ bool setData,
+ State state,
+ Expression* extra) {
+ std::vector<Type> params;
+ if (setData) {
+ params.push_back(i32);
+ }
+ auto* body = builder.makeBlock();
+ if (setData) {
+ // Verify the data is valid.
+ auto* stackPos = builder.makeLoad(4,
+ false,
+ int32_t(DataOffset::BStackPos),
+ 4,
+ builder.makeLocalGet(0, i32),
+ i32);
+ auto* stackEnd = builder.makeLoad(4,
+ false,
+ int32_t(DataOffset::BStackEnd),
+ 4,
+ builder.makeLocalGet(0, i32),
+ i32);
+ body->list.push_back(
+ builder.makeIf(builder.makeBinary(GtUInt32, stackPos, stackEnd),
+ builder.makeUnreachable()));
+ }
+ body->list.push_back(builder.makeGlobalSet(
+ BYSYNCIFY_STATE, builder.makeConst(Literal(int32_t(state)))));
+ if (extra) {
+ body->list.push_back(extra);
+ }
+ body->finalize();
+ auto* func = builder.makeFunction(
+ name, std::move(params), none, std::vector<Type>{}, body);
+ module->addFunction(func);
+ module->addExport(builder.makeExport(name, name, ExternalKind::Function));
+ };
+
+ makeFunction(
+ BYSYNCIFY_START_UNWIND,
+ true,
+ State::Unwinding,
+ builder.makeGlobalSet(BYSYNCIFY_DATA, builder.makeLocalGet(0, i32)));
+ makeFunction(
+ BYSYNCIFY_START_REWIND,
+ true,
+ State::Rewinding,
+ builder.makeGlobalSet(BYSYNCIFY_DATA, builder.makeLocalGet(0, i32)));
+ makeFunction(BYSYNCIFY_STOP_REWIND, false, State::Normal, nullptr);
+ }
+};
+
+Pass* createBysyncifyPass() { return new Bysyncify(); }
+
+} // namespace wasm
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 9120328d3..6a993c125 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -7,6 +7,7 @@ SET(passes_SOURCES
pass.cpp
AlignmentLowering.cpp
AvoidReinterprets.cpp
+ Bysyncify.cpp
CoalesceLocals.cpp
CodePushing.cpp
CodeFolding.cpp
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp
index 2aa800d62..1e5089b97 100644
--- a/src/passes/Inlining.cpp
+++ b/src/passes/Inlining.cpp
@@ -160,8 +160,6 @@ struct Planner : public WalkerPass<PostWalker<Planner>> {
}
}
- void doWalkFunction(Function* func) { walk(func->body); }
-
private:
InliningState* state;
};
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index c29bb1e1a..b46a03b5e 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -77,6 +77,9 @@ void PassRegistry::registerPasses() {
registerPass("avoid-reinterprets",
"Tries to avoid reinterpret operations via more loads",
createAvoidReinterpretsPass);
+ registerPass("bysyncify",
+ "async/await style transform, allowing pausing and resuming",
+ createBysyncifyPass);
registerPass(
"dae", "removes arguments to calls in an lto-like manner", createDAEPass);
registerPass("dae-optimizing",
diff --git a/src/passes/passes.h b/src/passes/passes.h
index f7c8fae77..84aabf0e8 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -24,6 +24,7 @@ class Pass;
// All passes:
Pass* createAlignmentLoweringPass();
Pass* createAvoidReinterpretsPass();
+Pass* createBysyncifyPass();
Pass* createCoalesceLocalsPass();
Pass* createCoalesceLocalsWithLearningPass();
Pass* createCodeFoldingPass();
diff --git a/src/wasm2js.h b/src/wasm2js.h
index ccd54b35f..a14aad2a4 100644
--- a/src/wasm2js.h
+++ b/src/wasm2js.h
@@ -311,7 +311,7 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) {
runner.add("avoid-reinterprets");
}
// Finally, get the code into the flat form we need for wasm2js itself, and
- // optimize that a little in a way that keeps flat property.
+ // optimize that a little in a way that keeps that property.
runner.add("flatten");
// Regardless of optimization level, run some simple optimizations to undo
// some of the effects of flattening.