diff options
Diffstat (limited to 'src/passes/Bysyncify.cpp')
-rw-r--r-- | src/passes/Bysyncify.cpp | 909 |
1 files changed, 909 insertions, 0 deletions
diff --git a/src/passes/Bysyncify.cpp b/src/passes/Bysyncify.cpp new file mode 100644 index 000000000..ffbf6db1c --- /dev/null +++ b/src/passes/Bysyncify.cpp @@ -0,0 +1,909 @@ +/* + * Copyright 2019 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Bysyncify: async/await style code transformation, that allows pausing +// and resuming. This lets a language support synchronous operations in +// an async manner, for example, you can do a blocking wait, and that will +// be turned into code that unwinds the stack at the "blocking" operation, +// then is able to rewind it back up when the actual async operation +// comples, the so the code appears to have been running synchronously +// all the while. Use cases for this include coroutines, python generators, +// etc. +// +// The approach here is a third-generation design after Emscripten's original +// Asyncify and then Emterpreter-Async approaches: +// +// * Asyncify rewrote control flow in LLVM IR. A problem is that this needs +// to save all SSA registers as part of the local state, which can be +// very costly. A further increase can happen because of phis that are +// added because of control flow transformations. As a result we saw +// pathological cases where the code size increase was unacceptable. +// * Emterpreter-Async avoids any risk of code size increase by running it +// in a little bytecode VM, which also makes pausing/resuming trivial. +// Speed is the main downside here; however, the approach did prove that by +// *selectively* emterpretifying only certain code, many projects can run +// at full speed - often just the "main loop" must be emterpreted, while +// high-speed code that it calls, and in which cannot be an async operation, +// remain at full speed. +// +// Bysyncify's design learns from both of those: +// +// * The code rewrite is done in Binaryen, that is, at the wasm level. At +// this level we will only need to save wasm locals, which is a much smaller +// number than LLVM's SSA registers. +// * We aim for low but non-zero overhead. Low overhead is very important +// for obvious reasons, while Emterpreter-Async proved it is tolerable to +// have *some* overhead, if the transform can be applied selectively. +// +// The specific transform implemented here is simpler than Asyncify but should +// still have low overhead when properly optimized. Asyncify worked at the CFG +// level and added branches there; Bysyncify on the other hand works on the +// structured control flow of wasm and simply "skips over" code when rewinding +// the stack, and jumps out when unwinding. The transformed code looks +// conceptually like this: +// +// void foo(int x) { +// // new prelude +// if (rewinding) { +// loadLocals(); +// } +// // main body starts here +// if (!rewinding) { +// // some code we must skip while rewinding +// x = x + 1; +// x = x / 2; +// } +// // If rewinding, do this call only if it is the right one. +// if (!rewinding or nextCall == 0) { +// bar(x); +// if (unwinding) { +// noteUnWound(0); +// saveLocals(); +// return; +// } +// } +// if (!rewinding) { +// // more code we must skip while rewinding +// while (x & 7) { +// x = x + 1; +// } +// } +// [..] +// +// The general idea is that while rewinding we just "keep going forward", +// skipping code we should not run. That is, instead of jumping directly +// to the right place, we have ifs that skip along instead. The ifs for +// rewinding and unwinding should be well-predicted, so the overhead should +// not be very high. However, we do need to reach the right location via +// such skipping, which means that in a very large function the rewind +// takes some extra time. On the other hand, though, code that cannot +// unwind or rewind (like that loop near the end) can run at full speed. +// Overall, this should allow good performance with small overhead that is +// mostly noticed at rewind time. +// +// After this pass is run a new i32 global "__bysyncify_state" is added, which +// has the following values: +// +// 0: normal execution +// 1: unwinding the stack +// 2: rewinding the stack +// +// There is also "__bysyncify_data" which when rewinding and unwinding +// contains a pointer to a data structure with the info needed to rewind +// and unwind: +// +// { // offsets +// i32 - current bysyncify stack location // 0 +// i32 - bysyncify stack end // 4 +// } +// +// The bysyncify stack is a representation of the call frame, as a list of +// indexes of calls. In the example above, we saw index "0" for calling "bar" +// from "foo". When unwinding, the indexes are added to the stack; when +// rewinding, they are popped off; the current bysyncify stack location is +// undated while doing both operations. The bysyncify stack is also used to +// save locals. Note that the stack end location is provided, which is for +// error detection. +// +// Note: all pointers are assumed to be 4-byte aligned. +// +// When you start an unwinding operation, you must set the initial fields +// of the data structure, that is, set the current stack location to the +// proper place, and the end to the proper end based on how much memory +// you reserved. Note that bysyncify will grow the stack up. +// +// The pass will also create three functions that let you control unwinding +// and rewinding: +// +// * bysyncify_start_unwind(data : i32): call this to start unwinding the +// stack from the current location. "data" must point to a data +// structure as described above (with fields containing valid data). +// +// * bysyncify_start_rewind(data : i32): call this to start rewinding the +// stack vack up to the location stored in the provided data. This prepares +// for the rewind; to start it, you must call the first function in the +// call stack to be unwound. +// +// * bysyncify_stop_rewind(): call this to note that rewinding has +// concluded, and normal execution can resume. +// +// These three functions are exported so that you can call them from the +// outside. If you want to manage things from inside the wasm, then you +// couldn't have called them before they were created by this pass. To work +// around that, you can create imports to bysyncify.start_unwind, +// bysyncify.start_rewind, and bysyncify.stop_rewind; if those exist when +// this pass runs then it will turn those into direct calls to the functions +// that it creates. +// +// To use this API, call bysyncify_start_unwind when you want to. The call +// stack will then be unwound, and so execution will resume in the JS or +// other host environment on the outside that called into wasm. Then when +// you want to rewind, call bysyncify_start_rewind, and then call the same +// initial function you called before, so that unwinding can begin. The +// unwinding will reach the same function from which you started, since +// we are recreating the call stack. At that point you should call +// bysyncify_stop_rewind and then execution can resume normally. +// +// By default this pass assumes that any import may call any of the +// exported bysyncify methods, that is, any import may start an unwind/rewind. +// To customize this, you can provide an argument to wasm-opt (or another +// tool that can run this pass), +// +// --pass-arg=bysyncify@module1.base1,module2.base2,module3.base3 +// +// Each module.base in that comma-separated list will be considered to +// be an import that can unwind/rewind, and all others are assumed not to +// (aside from the bysyncify.* imports, which are always assumed to). To +// say that no import (aside from bysyncify.*) can do so (that is, the +// opposite of the default behavior), you can simply provide an import +// that doesn't exist (say, +// --pass-arg=bysyncify@no.imports +// + +#include "ir/effects.h" +#include "ir/literal-utils.h" +#include "ir/module-utils.h" +#include "ir/utils.h" +#include "pass.h" +#include "support/unique_deferring_queue.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +namespace { + +static const Name BYSYNCIFY_STATE = "__bysyncify_state"; +static const Name BYSYNCIFY_DATA = "__bysyncify_data"; +static const Name BYSYNCIFY_START_UNWIND = "bysyncify_start_unwind"; +static const Name BYSYNCIFY_START_REWIND = "bysyncify_start_rewind"; +static const Name BYSYNCIFY_STOP_REWIND = "bysyncify_stop_rewind"; +static const Name BYSYNCIFY_UNWIND = "__bysyncify_unwind"; +static const Name BYSYNCIFY = "bysyncify"; +static const Name START_UNWIND = "start_unwind"; +static const Name START_REWIND = "start_rewind"; +static const Name STOP_REWIND = "stop_rewind"; +static const Name BYSYNCIFY_GET_CALL_INDEX = "__bysyncify_get_call_index"; +static const Name BYSYNCIFY_CHECK_CALL_INDEX = "__bysyncify_check_call_index"; + +// TODO: having just normal/unwind_or_rewind would decrease code +// size, but make debugging harder +enum class State { Normal = 0, Unwinding = 1, Rewinding = 2 }; + +enum class DataOffset { BStackPos = 0, BStackEnd = 4 }; + +const auto STACK_ALIGN = 4; + +// Analyze the entire module to see which calls may change the state, that +// is, start an unwind or rewind), either in itself or in something called +// by it. +class ModuleAnalyzer { + Module& module; + + struct Info { + bool canChangeState = false; + std::set<Function*> callsTo; + std::set<Function*> calledBy; + }; + + typedef std::map<Function*, Info> Map; + Map map; + +public: + ModuleAnalyzer(Module& module, + std::function<bool(Name, Name)> canImportChangeState) + : module(module) { + // Scan to see which functions can directly change the state. + // Also handle the bysyncify imports, removing them (as we will implement + // them later), and replace calls to them with calls to the later proper + // name. + ModuleUtils::ParallelFunctionMap<Info> scanner( + module, [&](Function* func, Info& info) { + if (func->imported()) { + // The bysyncify imports to start an unwind or rewind can definitely + // change the state. + if (func->module == BYSYNCIFY && + (func->base == START_UNWIND || func->base == START_REWIND)) { + info.canChangeState = true; + } else { + info.canChangeState = + canImportChangeState(func->module, func->base); + } + return; + } + struct Walker : PostWalker<Walker> { + void visitCall(Call* curr) { + auto* target = module->getFunction(curr->target); + if (target->imported() && target->module == BYSYNCIFY) { + // Redirect the imports to the functions we'll add later. + if (target->base == START_UNWIND) { + curr->target = BYSYNCIFY_START_UNWIND; + } else if (target->base == START_REWIND) { + curr->target = BYSYNCIFY_START_REWIND; + } else if (target->base == STOP_REWIND) { + curr->target = BYSYNCIFY_STOP_REWIND; + // TODO: in theory, this does not change the state + } else { + Fatal() << "call to unidenfied bysyncify import: " + << target->base; + } + info->canChangeState = true; + return; + } + info->callsTo.insert(target); + } + void visitCallIndirect(CallIndirect* curr) { + // TODO optimize + info->canChangeState = true; + } + Info* info; + Module* module; + }; + Walker walker; + walker.info = &info; + walker.module = &module; + walker.walk(func->body); + }); + map.swap(scanner.map); + + // Remove the bysyncify imports, if any. + for (auto& pair : map) { + auto* func = pair.first; + if (func->imported() && func->module == BYSYNCIFY) { + module.removeFunction(func->name); + } + } + + // Find what is called by what. + for (auto& pair : map) { + auto* func = pair.first; + auto& info = pair.second; + for (auto* target : info.callsTo) { + map[target].calledBy.insert(func); + } + } + + // Close over, finding all functions that can reach something that can + // change the state. + // The work queue contains an item we just learned can change the state. + UniqueDeferredQueue<Function*> work; + for (auto& func : module.functions) { + if (map[func.get()].canChangeState) { + work.push(func.get()); + } + } + while (!work.empty()) { + auto* func = work.pop(); + for (auto* caller : map[func].calledBy) { + if (!map[caller].canChangeState) { + map[caller].canChangeState = true; + work.push(caller); + } + } + } + } + + bool canChangeState(Function* func) { return map[func].canChangeState; } + + bool canChangeState(Expression* curr) { + // Look inside to see if we call any of the things we know can change the + // state. + // TODO: caching, this is O(N^2) + struct Walker : PostWalker<Walker> { + void visitCall(Call* curr) { + // We only implement these at the very end, but we know that they + // definitely change the state. + if (curr->target == BYSYNCIFY_START_UNWIND || + curr->target == BYSYNCIFY_START_REWIND || + curr->target == BYSYNCIFY_GET_CALL_INDEX || + curr->target == BYSYNCIFY_CHECK_CALL_INDEX) { + canChangeState = true; + return; + } + // The target may not exist if it is one of our temporary intrinsics. + auto* target = module->getFunctionOrNull(curr->target); + if (target && (*map)[target].canChangeState) { + canChangeState = true; + } + } + void visitCallIndirect(CallIndirect* curr) { + // TODO optimize + canChangeState = true; + } + Module* module; + Map* map; + bool canChangeState = false; + }; + Walker walker; + walker.module = &module; + walker.map = ↦ + walker.walk(curr); + return walker.canChangeState; + } +}; + +// Checks if something performs a call: either a direct or indirect call, +// and perhaps it is dropped or assigned to a local. This captures all the +// cases of a call in flat IR. +static bool doesCall(Expression* curr) { + if (auto* set = curr->dynCast<LocalSet>()) { + curr = set->value; + } else if (auto* drop = curr->dynCast<Drop>()) { + curr = drop->value; + } + return curr->is<Call>() || curr->is<CallIndirect>(); +} + +class BysyncifyBuilder : public Builder { +public: + BysyncifyBuilder(Module& wasm) : Builder(wasm) {} + + Expression* makeGetStackPos() { + return makeLoad(4, + false, + int32_t(DataOffset::BStackPos), + 4, + makeGlobalGet(BYSYNCIFY_DATA, i32), + i32); + } + + Expression* makeIncStackPos(int32_t by) { + if (by == 0) { + return makeNop(); + } + return makeStore( + 4, + int32_t(DataOffset::BStackPos), + 4, + makeGlobalGet(BYSYNCIFY_DATA, i32), + makeBinary(AddInt32, makeGetStackPos(), makeConst(Literal(by))), + i32); + } + + Expression* makeStateCheck(State value) { + return makeBinary(EqInt32, + makeGlobalGet(BYSYNCIFY_STATE, i32), + makeConst(Literal(int32_t(value)))); + } +}; + +// Instrument control flow, around calls and adding skips for rewinding. +struct BysyncifyFlow : public Pass { + bool isFunctionParallel() override { return true; } + + ModuleAnalyzer* analyzer; + + BysyncifyFlow* create() override { return new BysyncifyFlow(analyzer); } + + BysyncifyFlow(ModuleAnalyzer* analyzer) : analyzer(analyzer) {} + + void + runOnFunction(PassRunner* runner, Module* module_, Function* func) override { + module = module_; + // If the function cannot change our state, we have nothing to do - + // we will never unwind or rewind the stack here. + if (!analyzer->canChangeState(func)) { + return; + } + // Rewrite the function body. + builder = make_unique<BysyncifyBuilder>(*module); + // Each function we enter will pop one from the stack, which is the index + // of the next call to make. + auto* block = builder->makeBlock( + {builder->makeIf(builder->makeStateCheck( + State::Rewinding), // TODO: such checks can be !normal + makeCallIndexPop()), + process(func->body)}); + if (func->result != none) { + // Rewriting control flow may alter things; make sure the function ends in + // something valid (which the optimizer can remove later). + block->list.push_back(builder->makeUnreachable()); + } + block->finalize(); + func->body = block; + // Making things like returns conditional may alter types. + ReFinalize().walkFunctionInModule(func, module); + } + +private: + std::unique_ptr<BysyncifyBuilder> builder; + + Module* module; + + // Each call in the function has an index, noted during unwind and checked + // during rewind. + Index callIndex = 0; + + Expression* process(Expression* curr) { + if (!analyzer->canChangeState(curr)) { + return makeMaybeSkip(curr); + } + // The IR is in flat form, which makes this much simpler. We basically + // need to add skips to avoid code when rewinding, and checks around calls + // with unwinding/rewinding support. + // + // Aside from that, we must "linearize" all control flow so that we can + // reach the right part when unwinding. For example, for an if we do this: + // + // if (condition()) { + // side1(); + // } else { + // side2(); + // } + // => + // if (!rewinding) { + // temp = condition(); + // } + // if (rewinding || temp) { + // side1(); + // } + // if (rewinding || !temp) { + // side2(); + // } + // + // This way we will linearly get through all the code in the function, + // if we are rewinding. In a similar way we skip over breaks, etc.; just + // "keep on truckin'". + // + // Note that temp locals added in this way are just normal locals, and in + // particular they are saved and loaded. That way if we resume from the + // first if arm we will avoid the second. + if (auto* block = curr->dynCast<Block>()) { + // At least one of our children may change the state. Clump them as + // necessary. + Index i = 0; + auto& list = block->list; + while (i < list.size()) { + if (analyzer->canChangeState(list[i])) { + list[i] = process(list[i]); + i++; + } else { + Index end = i + 1; + while (end < list.size() && !analyzer->canChangeState(list[end])) { + end++; + } + // We have a range of [i, end) in which the state cannot change, + // so all we need to do is skip it if rewinding. + if (end == i + 1) { + list[i] = makeMaybeSkip(list[i]); + } else { + auto* block = builder->makeBlock(); + for (auto j = i; j < end; j++) { + block->list.push_back(list[j]); + } + block->finalize(); + list[i] = makeMaybeSkip(block); + for (auto j = i + 1; j < end; j++) { + list[j] = builder->makeNop(); + } + } + i = end; + } + } + return block; + } else if (auto* iff = curr->dynCast<If>()) { + // The state change cannot be in the condition due to flat form, so it + // must be in one of the children. + assert(!analyzer->canChangeState(iff->condition)); + // We must linearize this, which means we pass through both arms if we + // are rewinding. This is pretty simple as in flat form the if condition + // is either a const or a get, so easily copyable. + // Start with the first arm, for which we reuse the original if. + auto* otherArm = iff->ifFalse; + iff->ifFalse = nullptr; + auto* originalCondition = iff->condition; + iff->condition = builder->makeBinary( + OrInt32, originalCondition, builder->makeStateCheck(State::Rewinding)); + iff->ifTrue = process(iff->ifTrue); + iff->finalize(); + if (!otherArm) { + return iff; + } + // Add support for the second arm as well. + auto* otherIf = builder->makeIf( + builder->makeBinary( + OrInt32, + builder->makeUnary( + EqZInt32, ExpressionManipulator::copy(originalCondition, *module)), + builder->makeStateCheck(State::Rewinding)), + process(otherArm)); + otherIf->finalize(); + return builder->makeSequence(iff, otherIf); + } else if (auto* loop = curr->dynCast<Loop>()) { + loop->body = process(loop->body); + return loop; + } else if (doesCall(curr)) { + return makeCallSupport(curr); + } + // We must handle all control flow above, and all things that can change + // the state, so there should be nothing that can reach here - add it + // earlier as necessary. + WASM_UNREACHABLE(); + } + + // Possibly skip some code, if rewinding. + Expression* makeMaybeSkip(Expression* curr) { + return builder->makeIf(builder->makeStateCheck(State::Normal), curr); + } + + Expression* makeCallSupport(Expression* curr) { + // TODO: stop doing this after code can no longer reach a call that may + // change the state + assert(doesCall(curr)); + assert(curr->type == none); + auto index = callIndex++; + // Execute the call, if either normal execution, or if rewinding and this + // is the right call to go into. + // TODO: we can read the next call index once in each function (but should + // avoid saving/restoring that local later) + return builder->makeIf( + builder->makeIf(builder->makeStateCheck(State::Normal), + builder->makeConst(Literal(int32_t(1))), + makeCallIndexPeek(index)), + builder->makeSequence(curr, makePossibleUnwind(index))); + } + + Expression* makePossibleUnwind(Index index) { + // In this pass we emit an "unwind" as a call to a fake intrinsic. We + // will implement it in the later pass. (We can't create the unwind block + // target here, as the optimizer would remove it later; we can only add + // it when we add its contents, later.) + return builder->makeIf( + builder->makeStateCheck(State::Unwinding), + builder->makeCall( + BYSYNCIFY_UNWIND, {builder->makeConst(Literal(int32_t(index)))}, none)); + } + + Expression* makeCallIndexPeek(Index index) { + // Emit an intrinsic for this, as we store the index into a local, and + // don't want it to be seen by bysyncify itself. + return builder->makeCall(BYSYNCIFY_CHECK_CALL_INDEX, + {builder->makeConst(Literal(int32_t(index)))}, + i32); + } + + Expression* makeCallIndexPop() { + // Emit an intrinsic for this, as we store the index into a local, and + // don't want it to be seen by bysyncify itself. + return builder->makeCall(BYSYNCIFY_GET_CALL_INDEX, {}, none); + } +}; + +// Instrument local saving/restoring. +struct BysyncifyLocals : public WalkerPass<PostWalker<BysyncifyLocals>> { + bool isFunctionParallel() override { return true; } + + ModuleAnalyzer* analyzer; + + BysyncifyLocals* create() override { return new BysyncifyLocals(analyzer); } + + BysyncifyLocals(ModuleAnalyzer* analyzer) : analyzer(analyzer) {} + + void visitCall(Call* curr) { + // Replace calls to the fake intrinsics. + if (curr->target == BYSYNCIFY_UNWIND) { + replaceCurrent(builder->makeBreak(BYSYNCIFY_UNWIND, curr->operands[0])); + } else if (curr->target == BYSYNCIFY_GET_CALL_INDEX) { + replaceCurrent(builder->makeSequence( + builder->makeIncStackPos(-4), + builder->makeLocalSet( + rewindIndex, + builder->makeLoad(4, false, 0, 4, builder->makeGetStackPos(), i32)))); + } else if (curr->target == BYSYNCIFY_CHECK_CALL_INDEX) { + replaceCurrent(builder->makeBinary( + EqInt32, + builder->makeLocalGet(rewindIndex, i32), + builder->makeConst( + Literal(int32_t(curr->operands[0]->cast<Const>()->value.geti32()))))); + } + } + + void doWalkFunction(Function* func) { + // If the function cannot change our state, we have nothing to do - + // we will never unwind or rewind the stack here. + if (!analyzer->canChangeState(func->body)) { + return; + } + // Note the locals we want to preserve, before we add any more helpers. + numPreservableLocals = func->getNumLocals(); + // The new function body has a prelude to load locals if rewinding, + // then the actual main body, which does all its unwindings by breaking + // to the unwind block, which then handles pushing the call index, as + // well as saving the locals. + // An index is needed for getting the unwinding and rewinding call indexes + // around TODO: can this be the same index? + auto unwindIndex = builder->addVar(func, i32); + rewindIndex = builder->addVar(func, i32); + // Rewrite the function body. + builder = make_unique<BysyncifyBuilder>(*getModule()); + walk(func->body); + // After the normal function body, emit a barrier before the postamble. + Expression* barrier; + if (func->result == none) { + // The function may have ended without a return; ensure one. + barrier = builder->makeReturn(); + } else { + // The function must have returned or hit an unreachable, but emit one + // to make possible bugs easier to figure out (as this should never be + // reached). The optimizer can remove this anyhow. + barrier = builder->makeUnreachable(); + } + auto* newBody = builder->makeBlock( + {builder->makeIf(builder->makeStateCheck(State::Rewinding), + makeLocalLoading()), + builder->makeLocalSet( + unwindIndex, + builder->makeBlock(BYSYNCIFY_UNWIND, + builder->makeSequence(func->body, barrier))), + makeCallIndexPush(unwindIndex), + makeLocalSaving()}); + if (func->result != none) { + // If we unwind, we must still "return" a value, even if it will be + // ignored on the outside. + newBody->list.push_back( + LiteralUtils::makeZero(func->result, *getModule())); + newBody->finalize(func->result); + } + func->body = newBody; + // Making things like returns conditional may alter types. + ReFinalize().walkFunctionInModule(func, getModule()); + } + +private: + std::unique_ptr<BysyncifyBuilder> builder; + + Index rewindIndex; + Index numPreservableLocals; + + Expression* makeLocalLoading() { + if (numPreservableLocals == 0) { + return builder->makeNop(); + } + auto* func = getFunction(); + Index total = 0; + for (Index i = 0; i < numPreservableLocals; i++) { + auto type = func->getLocalType(i); + auto size = getTypeSize(type); + total += size; + } + auto* block = builder->makeBlock(); + block->list.push_back(builder->makeIncStackPos(-total)); + auto tempIndex = builder->addVar(func, i32); + block->list.push_back( + builder->makeLocalSet(tempIndex, builder->makeGetStackPos())); + Index offset = 0; + for (Index i = 0; i < numPreservableLocals; i++) { + auto type = func->getLocalType(i); + auto size = getTypeSize(type); + assert(size % STACK_ALIGN == 0); + // TODO: higher alignment? + block->list.push_back(builder->makeLocalSet( + i, + builder->makeLoad(size, + true, + offset, + STACK_ALIGN, + builder->makeLocalGet(tempIndex, i32), + type))); + offset += size; + } + block->finalize(); + return block; + } + + Expression* makeLocalSaving() { + if (numPreservableLocals == 0) { + return builder->makeNop(); + } + auto* func = getFunction(); + auto* block = builder->makeBlock(); + auto tempIndex = builder->addVar(func, i32); + block->list.push_back( + builder->makeLocalSet(tempIndex, builder->makeGetStackPos())); + Index offset = 0; + for (Index i = 0; i < numPreservableLocals; i++) { + auto type = func->getLocalType(i); + auto size = getTypeSize(type); + assert(size % STACK_ALIGN == 0); + // TODO: higher alignment? + block->list.push_back( + builder->makeStore(size, + offset, + STACK_ALIGN, + builder->makeLocalGet(tempIndex, i32), + builder->makeLocalGet(i, type), + type)); + offset += size; + } + block->list.push_back(builder->makeIncStackPos(offset)); + block->finalize(); + return block; + } + + Expression* makeCallIndexPush(Index tempIndex) { + // TODO: add a check against the stack end here + return builder->makeSequence( + builder->makeStore(4, + 0, + 4, + builder->makeGetStackPos(), + builder->makeLocalGet(tempIndex, i32), + i32), + builder->makeIncStackPos(4)); + } +}; + +} // anonymous namespace + +struct Bysyncify : public Pass { + void run(PassRunner* runner, Module* module) override { + bool optimize = runner->options.optimizeLevel > 0; + // Find which imports can change the state. + const char* ALL_IMPORTS_CAN_CHANGE_STATE = "__bysyncify_all_imports"; + auto stateChangingImports = runner->options.getArgumentOrDefault( + "bysyncify", ALL_IMPORTS_CAN_CHANGE_STATE); + bool allImportsCanChangeState = + stateChangingImports == ALL_IMPORTS_CAN_CHANGE_STATE; + std::string separator = ","; + stateChangingImports = separator + stateChangingImports + separator; + + // Scan the module. + ModuleAnalyzer analyzer(*module, [&](Name module, Name base) { + if (allImportsCanChangeState) { + return true; + } + std::string full = separator + module.str + '.' + base.str + separator; + return stateChangingImports.find(full) != std::string::npos; + }); + + // Add necessary globals before we emit code to use them. + addGlobals(module); + + // Instrument the flow of code, adding code instrumentation and + // skips for when rewinding. We do this on flat IR so that it is + // practical to add code around each call, without affecting + // anything else. + { + PassRunner runner(module); + runner.add("flatten"); + // Dce is useful here, since BysyncifyFlow makes control flow conditional, + // which may make unreachable code look reachable. We also do some other + // minimal optimization here in an unconditional way here, to counteract + // the flattening. + runner.add("dce"); + runner.add("simplify-locals-nonesting"); + runner.add("merge-blocks"); + runner.add("vacuum"); + runner.add<BysyncifyFlow>(&analyzer); + runner.setIsNested(true); + runner.setValidateGlobally(false); + runner.run(); + } + // Next, add local saving/restoring logic. We optimize before doing this, + // to undo the extra code generated by flattening, and to arrive at the + // minimal amount of locals (which is important as we must save and + // restore those locals). We also and optimize after as well to simplify + // the code as much as possible. + { + PassRunner runner(module); + if (optimize) { + runner.addDefaultFunctionOptimizationPasses(); + } + runner.add<BysyncifyLocals>(&analyzer); + if (optimize) { + runner.addDefaultFunctionOptimizationPasses(); + } + runner.setIsNested(true); + runner.setValidateGlobally(false); + runner.run(); + } + // Finally, add function support (that should not have been seen by + // the previous passes). + addFunctions(module); + } + +private: + void addGlobals(Module* module) { + Builder builder(*module); + module->addGlobal(builder.makeGlobal(BYSYNCIFY_STATE, + i32, + builder.makeConst(Literal(int32_t(0))), + Builder::Mutable)); + module->addGlobal(builder.makeGlobal(BYSYNCIFY_DATA, + i32, + builder.makeConst(Literal(int32_t(0))), + Builder::Mutable)); + } + + void addFunctions(Module* module) { + Builder builder(*module); + auto makeFunction = [&](Name name, + bool setData, + State state, + Expression* extra) { + std::vector<Type> params; + if (setData) { + params.push_back(i32); + } + auto* body = builder.makeBlock(); + if (setData) { + // Verify the data is valid. + auto* stackPos = builder.makeLoad(4, + false, + int32_t(DataOffset::BStackPos), + 4, + builder.makeLocalGet(0, i32), + i32); + auto* stackEnd = builder.makeLoad(4, + false, + int32_t(DataOffset::BStackEnd), + 4, + builder.makeLocalGet(0, i32), + i32); + body->list.push_back( + builder.makeIf(builder.makeBinary(GtUInt32, stackPos, stackEnd), + builder.makeUnreachable())); + } + body->list.push_back(builder.makeGlobalSet( + BYSYNCIFY_STATE, builder.makeConst(Literal(int32_t(state))))); + if (extra) { + body->list.push_back(extra); + } + body->finalize(); + auto* func = builder.makeFunction( + name, std::move(params), none, std::vector<Type>{}, body); + module->addFunction(func); + module->addExport(builder.makeExport(name, name, ExternalKind::Function)); + }; + + makeFunction( + BYSYNCIFY_START_UNWIND, + true, + State::Unwinding, + builder.makeGlobalSet(BYSYNCIFY_DATA, builder.makeLocalGet(0, i32))); + makeFunction( + BYSYNCIFY_START_REWIND, + true, + State::Rewinding, + builder.makeGlobalSet(BYSYNCIFY_DATA, builder.makeLocalGet(0, i32))); + makeFunction(BYSYNCIFY_STOP_REWIND, false, State::Normal, nullptr); + } +}; + +Pass* createBysyncifyPass() { return new Bysyncify(); } + +} // namespace wasm |