diff options
author | Alon Zakai <azakai@google.com> | 2022-02-03 14:23:49 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-03 22:23:49 +0000 |
commit | 880c765ab9a4124f708da57329dcbe07c1ca9fa3 (patch) | |
tree | 0b2d7f53b816155253e6fa469ca9e58f8e5c7f56 | |
parent | e6f15747d1e3557cfff87c3149e91e3dbd0ff6c7 (diff) | |
download | binaryen-880c765ab9a4124f708da57329dcbe07c1ca9fa3.tar.gz binaryen-880c765ab9a4124f708da57329dcbe07c1ca9fa3.tar.bz2 binaryen-880c765ab9a4124f708da57329dcbe07c1ca9fa3.zip |
[Wasm GC] [ctor-eval] Evaluate and serialize GC data (#4491)
This ended up simpler than I thought. We can simply emit global and
local data as we go, creating globals as necessary to contain GC data,
and referring to them using global.get later. That will ensure that
data identity works (things referring to the same object in the interpreter
will refer to the same object when the wasm is loaded). In more detail,
each live GC item is created in a "defining global", a global that is
immutable and of the precise type of that data. Then we just read from
that location in any place that wants to refer to that data. That is,
something like
function foo() {
var x = Bar(10);
var y = Bar(20);
var z = x;
z.value++; // first object now contains 11
...
}
will be evalled into something like
var define$0 = Bar(11); // note the ++ has taken effect here
var define$1 = Bar(20);
function foo() {
var x = define$0;
var y = define$1;
var z = define$0;
...
}
This PR should handle everything but "cycles", that is, GC data that at
runtime ends up forming a loop. Leaving that for later work (not sure
how urgent it is to fix).
-rw-r--r-- | src/tools/wasm-ctor-eval.cpp | 173 | ||||
-rw-r--r-- | test/ctor-eval/gc-2.wast | 54 | ||||
-rw-r--r-- | test/ctor-eval/gc-2.wast.ctors | 1 | ||||
-rw-r--r-- | test/ctor-eval/gc-2.wast.out | 29 | ||||
-rw-r--r-- | test/ctor-eval/gc-array.wast | 45 | ||||
-rw-r--r-- | test/ctor-eval/gc-array.wast.ctors | 1 | ||||
-rw-r--r-- | test/ctor-eval/gc-array.wast.out | 27 | ||||
-rw-r--r-- | test/ctor-eval/gc.wast | 65 | ||||
-rw-r--r-- | test/ctor-eval/gc.wast.ctors | 1 | ||||
-rw-r--r-- | test/ctor-eval/gc.wast.out | 41 |
10 files changed, 433 insertions, 4 deletions
diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 0bcf42f9c..59e54d4f7 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -33,6 +33,7 @@ #include "pass.h" #include "support/colors.h" #include "support/file.h" +#include "support/small_set.h" #include "support/string.h" #include "tool-options.h" #include "wasm-builder.h" @@ -157,6 +158,8 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface { // Called when we want to apply the current state of execution to the Module. // Until this is called the Module is never changed. void applyToModule() { + clearApplyState(); + // If nothing was ever written to memory then there is nothing to update. if (!memory.empty()) { applyMemoryToModule(); @@ -404,6 +407,22 @@ private: return ret; } + // Clear the state of the operation of applying the interpreter's runtime + // information into the module. + // + // This happens each time we apply contents to the module, which is basically + // once per ctor function, but can be more fine-grained also if we execute a + // line at a time. + void clearApplyState() { + // The process of allocating "defining globals" begins here, from scratch + // each time (things live before may no longer be). + definingGlobals.clear(); + + // When we start to apply the state there should be no previous state left + // over. + assert(seenDataStack.empty()); + } + void applyMemoryToModule() { // Memory must have already been flattened into the standard form: one // segment at offset 0, or none. @@ -421,11 +440,157 @@ private: segment.data = memory; } + // Serializing GC data requires more work than linear memory, because + // allocations have an identity, and they are created using struct.new / + // array.new, which we must emit in a proper location in the wasm. This + // affects how we serialize globals, which can contain GC data, and also, we + // use globals to store GC data, so overall the process of computing the + // globals is where most of the GC logic ends up. + // + // The general idea for handling GC data is as follows: After evaluating the + // code, we end up with some live allocations in the interpreter, which we + // need to somehow serialize into the wasm module. We will put each such live + // GC data item into its own "defining global", a global whose purpose is to + // create and store that data. Each such global is immutable, and has the + // exact type of the data, for simplicity. Every other reference to that GC + // data in the interpreter's memory can then be serialized by simply emitting + // a global.get of that defining global. void applyGlobalsToModule() { Builder builder(*wasm); - for (const auto& [name, value] : instance->globals) { - wasm->getGlobal(name)->init = builder.makeConstantExpression(value); + + if (!wasm->features.hasGC()) { + // Without GC, we can simply serialize the globals in place as they are. + for (const auto& [name, values] : instance->globals) { + wasm->getGlobal(name)->init = getSerialization(values); + } + return; + } + + // We need to emit the "defining globals" of GC data before the existing + // globals, as the normal ones may refer to them. We do this by removing all + // the existing globals, and then adding them one by one, during which time + // we call getSerialization() for their init expressions. If their init + // refes to GC data, then we will allocate a defining global for that data, + // and refer to it. Put another way, we place the existing globals back into + // the module one at a time, adding their dependencies as we go. + auto oldGlobals = std::move(wasm->globals); + wasm->updateMaps(); + + for (auto& oldGlobal : oldGlobals) { + // Serialize the global's value. While doing so, pass in the name of this + // global, as we may be able to reuse the global as the defining global + // for the value. See getSerialization() for more details. + Name name; + if (!oldGlobal->mutable_ && oldGlobal->type == oldGlobal->init->type) { + // This has the properties we need of a defining global - immutable and + // of the precise type - so use it. + name = oldGlobal->name; + } + + // If there is a value here to serialize, do so. (If there is no value, + // then this global was added after the interpreter initialized the + // module, which means it is a new global we've added since; we don't need + // to do anything for such a global - if it is needed it will show up as a + // dependency of something, and be emitted at the right time and place.) + auto iter = instance->globals.find(oldGlobal->name); + if (iter != instance->globals.end()) { + oldGlobal->init = getSerialization(iter->second, name); + wasm->addGlobal(std::move(oldGlobal)); + } + } + } + +public: + // Maps each GC data in the interpreter to its defining global: the global in + // which it is created, and then all other users of it can just global.get + // that. + std::unordered_map<GCData*, Name> definingGlobals; + + // The data we have seen so far on the stack. This is used to guard against + // infinite recursion, which would otherwise happen if there is a cycle among + // the live objects, which we don't handle yet. + // + // Pick a constant of 2 here to handle the common case of an object with a + // reference to another object that is already in a defining global. + SmallSet<GCData*, 2> seenDataStack; + + // If |possibleDefiningGlobal| is provided, it is the name of a global that we + // are in the init expression of, and which can be reused as defining global, + // if the other conditions are suitable. + Expression* getSerialization(const Literal& value, + Name possibleDefiningGlobal = Name()) { + Builder builder(*wasm); + + if (!value.isData()) { + // This can be handled normally. + return builder.makeConstantExpression(value); + } + + // This is GC data, which we must handle in a more careful way. + auto* data = value.getGCData().get(); + if (!data) { + // This is a null, so simply emit one. + return builder.makeRefNull(value.type); } + + // There was actual GC data allocated here. + auto type = value.type; + auto& definingGlobal = definingGlobals[data]; + if (!definingGlobal.is()) { + // This is the first usage of this allocation. Generate a struct.new / + // array.new for it. + auto& values = value.getGCData()->values; + std::vector<Expression*> args; + + // The initial values for this allocation may themselves be GC + // allocations. Recurse and add globals as necessary. + // TODO: Handle cycles. That will require code in the start function. For + // now, just error if we detect an infinite recursion. + if (seenDataStack.count(data)) { + Fatal() << "Cycle in live GC data, which we cannot serialize yet."; + } + seenDataStack.insert(data); + for (auto& value : values) { + args.push_back(getSerialization(value)); + } + seenDataStack.erase(data); + + Expression* init; + auto heapType = type.getHeapType(); + // TODO: handle rtts if we need them + if (heapType.isStruct()) { + init = builder.makeStructNew(heapType, args); + } else if (heapType.isArray()) { + // TODO: for repeated identical values, can use ArrayNew + init = builder.makeArrayInit(heapType, args); + } else { + WASM_UNREACHABLE("bad gc type"); + } + + if (possibleDefiningGlobal.is()) { + // No need to allocate a new global, as we are in the definition of + // one. Just return the initialization expression, which will be + // placed in that global's |init| field, and first note this as the + // defining global. + definingGlobal = possibleDefiningGlobal; + return init; + } + + // Allocate a new defining global. + auto name = Names::getValidGlobalName(*wasm, "ctor-eval$global"); + wasm->addGlobal(builder.makeGlobal(name, type, init, Builder::Immutable)); + definingGlobal = name; + } + + // Refer to this GC allocation by reading from the global that is + // designated to contain it. + return builder.makeGlobalGet(definingGlobal, value.type); + } + + Expression* getSerialization(const Literals& values, + Name possibleDefiningGlobal = Name()) { + assert(values.size() == 1); + return getSerialization(values[0], possibleDefiningGlobal); } }; @@ -573,7 +738,7 @@ EvalCtorOutcome evalCtor(EvallingModuleRunner& instance, for (Index i = 0; i < copyFunc->getNumLocals(); i++) { auto value = appliedLocals[i]; localSets.push_back( - builder.makeLocalSet(i, builder.makeConstantExpression(value))); + builder.makeLocalSet(i, interface.getSerialization(value))); } // Put the local sets at the front of the block. We know there must be a @@ -666,7 +831,7 @@ void evalCtors(Module& wasm, if (func->getResults() == Type::none) { copyFunc->body = Builder(wasm).makeNop(); } else { - copyFunc->body = Builder(wasm).makeConstantExpression(*outcome); + copyFunc->body = interface.getSerialization(*outcome); } wasm.getExport(exp->name)->value = copyName; } diff --git a/test/ctor-eval/gc-2.wast b/test/ctor-eval/gc-2.wast new file mode 100644 index 000000000..fed45abac --- /dev/null +++ b/test/ctor-eval/gc-2.wast @@ -0,0 +1,54 @@ +(module + (type $struct (struct_subtype (field i32) data)) + + (import "import" "import" (func $import (param anyref))) + + ;; This struct is created in an immutable global, but it has the wrong type. + ;; We will create a new defining global for it that has the proper type, and + ;; read from it here. (This is necessary as when the global is used elsewhere + ;; we want to get the right type from the global.get.) + (global $global1 (ref any) + (struct.new $struct + (i32.const 1337) + ) + ) + + ;; Test reordering of globals. This global will be written a value that is + ;; actually defined after it. To handle that, we must create it earlier than + ;; this global. + (global $global2 (mut (ref null $struct)) + (ref.null $struct) + ) + + ;; This global is perfect to be a defining global (immutable, right type), but + ;; because of an earlier use, we will end up defining it earlier on, and + ;; reading it here. + (global $global3 (ref $struct) + (struct.new $struct + (i32.const 9999) + ) + ) + + (func "test1" + (global.set $global2 + (global.get $global3) + ) + ) + + (func "keepalive" (result i32) + (select + (struct.get $struct 0 + (ref.cast_static $struct + (global.get $global1) + ) + ) + (struct.get $struct 0 + (global.get $global2) + ) + (struct.get $struct 0 + (global.get $global3) + ) + ) + ) +) + diff --git a/test/ctor-eval/gc-2.wast.ctors b/test/ctor-eval/gc-2.wast.ctors new file mode 100644 index 000000000..a5bce3fd2 --- /dev/null +++ b/test/ctor-eval/gc-2.wast.ctors @@ -0,0 +1 @@ +test1 diff --git a/test/ctor-eval/gc-2.wast.out b/test/ctor-eval/gc-2.wast.out new file mode 100644 index 000000000..ce30a27e7 --- /dev/null +++ b/test/ctor-eval/gc-2.wast.out @@ -0,0 +1,29 @@ +(module + (type $struct (struct (field i32))) + (type $none_=>_i32 (func (result i32))) + (global $ctor-eval$global (ref $struct) (struct.new $struct + (i32.const 1337) + )) + (global $global1 (ref any) (global.get $ctor-eval$global)) + (global $ctor-eval$global_0 (ref $struct) (struct.new $struct + (i32.const 9999) + )) + (global $global2 (mut (ref null $struct)) (global.get $ctor-eval$global_0)) + (global $global3 (ref $struct) (global.get $ctor-eval$global_0)) + (export "keepalive" (func $1)) + (func $1 (result i32) + (select + (struct.get $struct 0 + (ref.cast_static $struct + (global.get $global1) + ) + ) + (struct.get $struct 0 + (global.get $global2) + ) + (struct.get $struct 0 + (global.get $global3) + ) + ) + ) +) diff --git a/test/ctor-eval/gc-array.wast b/test/ctor-eval/gc-array.wast new file mode 100644 index 000000000..8f6731821 --- /dev/null +++ b/test/ctor-eval/gc-array.wast @@ -0,0 +1,45 @@ +(module + (type $array (array (mut i32))) + + (import "import" "import" (func $import (param anyref))) + + ;; This global will remain as it is. + (global $global1 (ref $array) + (array.init_static $array + (i32.const 10) + (i32.const 20) + (i32.const 30) + (i32.const 40) + ) + ) + + (global $global2 (ref $array) + (array.init_static $array + (i32.const 42) + ;; This location will be written with a new value, 1337 + (i32.const 0) + ) + ) + + (func "test1" + (array.set $array + (global.get $global2) + (i32.const 1) + (i32.const 1337) + ) + ) + + (func "keepalive" (result i32) + (i32.add + (array.get $array + (global.get $global1) + (i32.const 0) + ) + (array.get $array + (global.get $global2) + (i32.const 0) + ) + ) + ) +) + diff --git a/test/ctor-eval/gc-array.wast.ctors b/test/ctor-eval/gc-array.wast.ctors new file mode 100644 index 000000000..a5bce3fd2 --- /dev/null +++ b/test/ctor-eval/gc-array.wast.ctors @@ -0,0 +1 @@ +test1 diff --git a/test/ctor-eval/gc-array.wast.out b/test/ctor-eval/gc-array.wast.out new file mode 100644 index 000000000..d78eba852 --- /dev/null +++ b/test/ctor-eval/gc-array.wast.out @@ -0,0 +1,27 @@ +(module + (type $array (array (mut i32))) + (type $none_=>_i32 (func (result i32))) + (global $global1 (ref $array) (array.init_static $array + (i32.const 10) + (i32.const 20) + (i32.const 30) + (i32.const 40) + )) + (global $global2 (ref $array) (array.init_static $array + (i32.const 42) + (i32.const 1337) + )) + (export "keepalive" (func $1)) + (func $1 (result i32) + (i32.add + (array.get $array + (global.get $global1) + (i32.const 0) + ) + (array.get $array + (global.get $global2) + (i32.const 0) + ) + ) + ) +) diff --git a/test/ctor-eval/gc.wast b/test/ctor-eval/gc.wast new file mode 100644 index 000000000..0449b6140 --- /dev/null +++ b/test/ctor-eval/gc.wast @@ -0,0 +1,65 @@ +(module + (type $struct (struct_subtype (field i32) data)) + + (import "import" "import" (func $import (param anyref))) + + ;; Create a GC object in a global. We can keep the struct.new here even after + ;; evalling (we should not create an extra, unneeded global, and read from + ;; that). + (global $global1 (ref $struct) + (struct.new $struct + (i32.const 1337) + ) + ) + + ;; After evalling we should see this refer to a struct with contents 42, and + ;; not 41, which is overridden, see "test1". We also should not see any code + ;; that creates an object with 41, as that is no longer live. + ;; + ;; Note that we will not simply do a struct.new in this global, as it is + ;; mutable, and we only use immutable globals as defining globals for values, + ;; so a new (immutable) global will appear, and we will read from it. + (global $global2 (mut (ref null $struct)) (ref.null $struct)) + + (func "test1" + ;; Leave the first local as null, which we should handle properly (we will + ;; end up emitting nothing and still using the default null value). + (local $temp1 (ref null $struct)) + (local $temp2 (ref null $struct)) + + (global.set $global2 + (struct.new $struct + (i32.const 41) + ) + ) + (global.set $global2 + (struct.new $struct + (i32.const 42) + ) + ) + + ;; Write a value to this local. A struct with value 99 will be created in a + ;; global, and referred to here. + (local.set $temp2 + (struct.new $struct + (i32.const 99) + ) + ) + + ;; Stop evalling here at the import. + (call $import (local.get $temp1)) + (call $import (local.get $temp2)) + ) + + (func "keepalive" (result i32) + (i32.add + (struct.get $struct 0 + (global.get $global1) + ) + (struct.get $struct 0 + (global.get $global2) + ) + ) + ) +) + diff --git a/test/ctor-eval/gc.wast.ctors b/test/ctor-eval/gc.wast.ctors new file mode 100644 index 000000000..a5bce3fd2 --- /dev/null +++ b/test/ctor-eval/gc.wast.ctors @@ -0,0 +1 @@ +test1 diff --git a/test/ctor-eval/gc.wast.out b/test/ctor-eval/gc.wast.out new file mode 100644 index 000000000..b926b5ad4 --- /dev/null +++ b/test/ctor-eval/gc.wast.out @@ -0,0 +1,41 @@ +(module + (type $struct (struct (field i32))) + (type $anyref_=>_none (func (param anyref))) + (type $none_=>_i32 (func (result i32))) + (type $none_=>_none (func)) + (import "import" "import" (func $import (param anyref))) + (global $global1 (ref $struct) (struct.new $struct + (i32.const 1337) + )) + (global $ctor-eval$global (ref $struct) (struct.new $struct + (i32.const 42) + )) + (global $global2 (mut (ref null $struct)) (global.get $ctor-eval$global)) + (global $ctor-eval$global_0 (ref $struct) (struct.new $struct + (i32.const 99) + )) + (export "test1" (func $0_0)) + (export "keepalive" (func $1)) + (func $1 (result i32) + (i32.add + (struct.get $struct 0 + (global.get $global1) + ) + (struct.get $struct 0 + (global.get $global2) + ) + ) + ) + (func $0_0 + (local $0 (ref null $struct)) + (local.set $0 + (global.get $ctor-eval$global_0) + ) + (call $import + (ref.null $struct) + ) + (call $import + (local.get $0) + ) + ) +) |