summaryrefslogtreecommitdiff
path: root/src/passes/Bysyncify.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/Bysyncify.cpp')
-rw-r--r--src/passes/Bysyncify.cpp166
1 files changed, 138 insertions, 28 deletions
diff --git a/src/passes/Bysyncify.cpp b/src/passes/Bysyncify.cpp
index 29b3cf242..e798be562 100644
--- a/src/passes/Bysyncify.cpp
+++ b/src/passes/Bysyncify.cpp
@@ -236,9 +236,54 @@ enum class DataOffset { BStackPos = 0, BStackEnd = 4 };
const auto STACK_ALIGN = 4;
+// A helper class for managing fake global names. Creates the globals and
+// provides mappings for using them.
+class GlobalHelper {
+ Module& module;
+
+public:
+ GlobalHelper(Module& module) : module(module) {
+ map[i32] = "bysyncify_fake_call_global_i32";
+ map[i64] = "bysyncify_fake_call_global_i64";
+ map[f32] = "bysyncify_fake_call_global_f32";
+ map[f64] = "bysyncify_fake_call_global_f64";
+ Builder builder(module);
+ for (auto& pair : map) {
+ auto type = pair.first;
+ auto name = pair.second;
+ rev[name] = type;
+ module.addGlobal(builder.makeGlobal(
+ name, type, LiteralUtils::makeZero(type, module), Builder::Mutable));
+ }
+ }
+
+ ~GlobalHelper() {
+ for (auto& pair : map) {
+ auto name = pair.second;
+ module.removeGlobal(name);
+ }
+ }
+
+ Name getName(Type type) { return map.at(type); }
+
+ Type getTypeOrNone(Name name) {
+ auto iter = rev.find(name);
+ if (iter != rev.end()) {
+ return iter->second;
+ }
+ return none;
+ }
+
+private:
+ std::map<Type, Name> map;
+ std::map<Name, Type> rev;
+};
+
// Analyze the entire module to see which calls may change the state, that
// is, start an unwind or rewind), either in itself or in something called
// by it.
+// Handles global module management, needed from the various parts of this
+// transformation.
class ModuleAnalyzer {
Module& module;
bool canIndirectChangeState;
@@ -268,7 +313,8 @@ public:
ModuleAnalyzer(Module& module,
std::function<bool(Name, Name)> canImportChangeState,
bool canIndirectChangeState)
- : module(module), canIndirectChangeState(canIndirectChangeState) {
+ : module(module), canIndirectChangeState(canIndirectChangeState),
+ globals(module) {
// Scan to see which functions can directly change the state.
// Also handle the bysyncify imports, removing them (as we will implement
// them later), and replace calls to them with calls to the later proper
@@ -429,6 +475,8 @@ public:
}
return walker.canChangeState;
}
+
+ GlobalHelper globals;
};
// Checks if something performs a call: either a direct or indirect call,
@@ -487,8 +535,9 @@ struct BysyncifyFlow : public Pass {
BysyncifyFlow(ModuleAnalyzer* analyzer) : analyzer(analyzer) {}
void
- runOnFunction(PassRunner* runner, Module* module_, Function* func) override {
+ runOnFunction(PassRunner* runner, Module* module_, Function* func_) override {
module = module_;
+ func = func_;
// If the function cannot change our state, we have nothing to do -
// we will never unwind or rewind the stack here.
if (!analyzer->needsInstrumentation(func)) {
@@ -518,6 +567,7 @@ private:
std::unique_ptr<BysyncifyBuilder> builder;
Module* module;
+ Function* func;
// Each call in the function has an index, noted during unwind and checked
// during rewind.
@@ -527,12 +577,15 @@ private:
if (!analyzer->canChangeState(curr)) {
return makeMaybeSkip(curr);
}
- // The IR is in flat form, which makes this much simpler. We basically
- // need to add skips to avoid code when rewinding, and checks around calls
- // with unwinding/rewinding support.
+ // The IR is in flat form, which makes this much simpler: there are no
+ // unnecessarily nested side effects or control flow, so we can add
+ // skips for rewinding in an easy manner, putting a single if around
+ // whole chunks of code. Also calls are separated out so that it is easy
+ // to add the necessary checks for them for unwinding/rewinding support.
//
// Aside from that, we must "linearize" all control flow so that we can
- // reach the right part when unwinding. For example, for an if we do this:
+ // reach the right part when rewinding, which is done by always skipping
+ // forward. For example, for an if we do this:
//
// if (condition()) {
// side1();
@@ -595,25 +648,28 @@ private:
// must be in one of the children.
assert(!analyzer->canChangeState(iff->condition));
// We must linearize this, which means we pass through both arms if we
- // are rewinding. This is pretty simple as in flat form the if condition
- // is either a const or a get, so easily copyable.
- // Start with the first arm, for which we reuse the original if.
- auto* otherArm = iff->ifFalse;
- iff->ifFalse = nullptr;
- auto* originalCondition = iff->condition;
+ // are rewinding.
+ if (!iff->ifFalse) {
+ iff->condition = builder->makeBinary(
+ OrInt32, iff->condition, builder->makeStateCheck(State::Rewinding));
+ iff->ifTrue = process(iff->ifTrue);
+ iff->finalize();
+ return iff;
+ }
+ auto conditionTemp = builder->addVar(func, i32);
+ iff->condition = builder->makeLocalTee(conditionTemp, iff->condition);
iff->condition = builder->makeBinary(
- OrInt32, originalCondition, builder->makeStateCheck(State::Rewinding));
+ OrInt32, iff->condition, builder->makeStateCheck(State::Rewinding));
iff->ifTrue = process(iff->ifTrue);
+ auto* otherArm = iff->ifFalse;
+ iff->ifFalse = nullptr;
iff->finalize();
- if (!otherArm) {
- return iff;
- }
// Add support for the second arm as well.
auto* otherIf = builder->makeIf(
builder->makeBinary(
OrInt32,
- builder->makeUnary(
- EqZInt32, ExpressionManipulator::copy(originalCondition, *module)),
+ builder->makeUnary(EqZInt32,
+ builder->makeLocalGet(conditionTemp, i32)),
builder->makeStateCheck(State::Rewinding)),
process(otherArm));
otherIf->finalize();
@@ -640,19 +696,39 @@ private:
// change the state
assert(doesCall(curr));
assert(curr->type == none);
+ // The case of a set is tricky: we must *not* execute the set when
+ // unwinding, since at that point we have a fake value for the return,
+ // and if we applied it to the local, it would end up saved and then
+ // potentially used later - and with coalescing, that may interfere
+ // with other things. Note also that at this point in the process we
+ // have not yet written the load saving/loading code, so the optimizer
+ // doesn't see that locals will have another use at the beginning and
+ // end of the function. We don't do that yet because we don't want it
+ // to force the final number of locals to be too high, but we also
+ // must be careful to never touch a local unnecessarily to avoid bugs.
+ // To implement this, write to a fake global; we'll fix it up after
+ // BysyncifyLocals locals adds local saving/restoring.
+ auto* set = curr->dynCast<LocalSet>();
+ if (set) {
+ auto name = analyzer->globals.getName(set->value->type);
+ curr = builder->makeGlobalSet(name, set->value);
+ set->value = builder->makeGlobalGet(name, set->value->type);
+ }
+ // Instrument the call itself (or, if a drop, the drop+call).
auto index = callIndex++;
// Execute the call, if either normal execution, or if rewinding and this
// is the right call to go into.
// TODO: we can read the next call index once in each function (but should
// avoid saving/restoring that local later)
- return builder->makeIf(
+ curr = builder->makeIf(
builder->makeIf(builder->makeStateCheck(State::Normal),
builder->makeConst(Literal(int32_t(1))),
makeCallIndexPeek(index)),
- builder->makeSequence(curr, makePossibleUnwind(index)));
+ builder->makeSequence(curr, makePossibleUnwind(index, set)));
+ return curr;
}
- Expression* makePossibleUnwind(Index index) {
+ Expression* makePossibleUnwind(Index index, Expression* ifNotUnwinding) {
// In this pass we emit an "unwind" as a call to a fake intrinsic. We
// will implement it in the later pass. (We can't create the unwind block
// target here, as the optimizer would remove it later; we can only add
@@ -660,7 +736,8 @@ private:
return builder->makeIf(
builder->makeStateCheck(State::Unwinding),
builder->makeCall(
- BYSYNCIFY_UNWIND, {builder->makeConst(Literal(int32_t(index)))}, none));
+ BYSYNCIFY_UNWIND, {builder->makeConst(Literal(int32_t(index)))}, none),
+ ifNotUnwinding);
}
Expression* makeCallIndexPeek(Index index) {
@@ -707,6 +784,29 @@ struct BysyncifyLocals : public WalkerPass<PostWalker<BysyncifyLocals>> {
}
}
+ void visitGlobalSet(GlobalSet* curr) {
+ auto type = analyzer->globals.getTypeOrNone(curr->name);
+ if (type != none) {
+ replaceCurrent(
+ builder->makeLocalSet(getFakeCallLocal(type), curr->value));
+ }
+ }
+
+ void visitGlobalGet(GlobalGet* curr) {
+ auto type = analyzer->globals.getTypeOrNone(curr->name);
+ if (type != none) {
+ replaceCurrent(builder->makeLocalGet(getFakeCallLocal(type), type));
+ }
+ }
+
+ Index getFakeCallLocal(Type type) {
+ auto iter = fakeCallLocals.find(type);
+ if (iter != fakeCallLocals.end()) {
+ return iter->second;
+ }
+ return fakeCallLocals[type] = builder->addVar(getFunction(), type);
+ }
+
void doWalkFunction(Function* func) {
// If the function cannot change our state, we have nothing to do -
// we will never unwind or rewind the stack here.
@@ -763,6 +863,7 @@ private:
Index rewindIndex;
Index numPreservableLocals;
+ std::map<Type, Index> fakeCallLocals;
Expression* makeLocalLoading() {
if (numPreservableLocals == 0) {
@@ -884,13 +985,22 @@ struct Bysyncify : public Pass {
PassRunner runner(module);
runner.add("flatten");
// Dce is useful here, since BysyncifyFlow makes control flow conditional,
- // which may make unreachable code look reachable. We also do some other
- // minimal optimization here in an unconditional way here, to counteract
- // the flattening.
+ // which may make unreachable code look reachable. It also lets us ignore
+ // unreachable code here.
runner.add("dce");
- runner.add("simplify-locals-nonesting");
- runner.add("merge-blocks");
- runner.add("vacuum");
+ if (optimize) {
+ // Optimizing before BsyncifyFlow is crucial, especially coalescing,
+ // because the flow changes add many branches, break up if-elses, etc.,
+ // all of which extend the live ranges of locals. In other words, it is
+ // not possible to coalesce well afterwards.
+ runner.add("simplify-locals-nonesting");
+ runner.add("reorder-locals");
+ runner.add("coalesce-locals");
+ runner.add("simplify-locals-nonesting");
+ runner.add("reorder-locals");
+ runner.add("merge-blocks");
+ runner.add("vacuum");
+ }
runner.add<BysyncifyFlow>(&analyzer);
runner.setIsNested(true);
runner.setValidateGlobally(false);