diff options
-rw-r--r-- | src/tools/wasm-ctor-eval.cpp | 173 | ||||
-rw-r--r-- | test/ctor-eval/gc-2.wast | 54 | ||||
-rw-r--r-- | test/ctor-eval/gc-2.wast.ctors | 1 | ||||
-rw-r--r-- | test/ctor-eval/gc-2.wast.out | 29 | ||||
-rw-r--r-- | test/ctor-eval/gc-array.wast | 45 | ||||
-rw-r--r-- | test/ctor-eval/gc-array.wast.ctors | 1 | ||||
-rw-r--r-- | test/ctor-eval/gc-array.wast.out | 27 | ||||
-rw-r--r-- | test/ctor-eval/gc.wast | 65 | ||||
-rw-r--r-- | test/ctor-eval/gc.wast.ctors | 1 | ||||
-rw-r--r-- | test/ctor-eval/gc.wast.out | 41 |
10 files changed, 433 insertions, 4 deletions
diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 0bcf42f9c..59e54d4f7 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -33,6 +33,7 @@ #include "pass.h" #include "support/colors.h" #include "support/file.h" +#include "support/small_set.h" #include "support/string.h" #include "tool-options.h" #include "wasm-builder.h" @@ -157,6 +158,8 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface { // Called when we want to apply the current state of execution to the Module. // Until this is called the Module is never changed. void applyToModule() { + clearApplyState(); + // If nothing was ever written to memory then there is nothing to update. if (!memory.empty()) { applyMemoryToModule(); @@ -404,6 +407,22 @@ private: return ret; } + // Clear the state of the operation of applying the interpreter's runtime + // information into the module. + // + // This happens each time we apply contents to the module, which is basically + // once per ctor function, but can be more fine-grained also if we execute a + // line at a time. + void clearApplyState() { + // The process of allocating "defining globals" begins here, from scratch + // each time (things live before may no longer be). + definingGlobals.clear(); + + // When we start to apply the state there should be no previous state left + // over. + assert(seenDataStack.empty()); + } + void applyMemoryToModule() { // Memory must have already been flattened into the standard form: one // segment at offset 0, or none. @@ -421,11 +440,157 @@ private: segment.data = memory; } + // Serializing GC data requires more work than linear memory, because + // allocations have an identity, and they are created using struct.new / + // array.new, which we must emit in a proper location in the wasm. This + // affects how we serialize globals, which can contain GC data, and also, we + // use globals to store GC data, so overall the process of computing the + // globals is where most of the GC logic ends up. + // + // The general idea for handling GC data is as follows: After evaluating the + // code, we end up with some live allocations in the interpreter, which we + // need to somehow serialize into the wasm module. We will put each such live + // GC data item into its own "defining global", a global whose purpose is to + // create and store that data. Each such global is immutable, and has the + // exact type of the data, for simplicity. Every other reference to that GC + // data in the interpreter's memory can then be serialized by simply emitting + // a global.get of that defining global. void applyGlobalsToModule() { Builder builder(*wasm); - for (const auto& [name, value] : instance->globals) { - wasm->getGlobal(name)->init = builder.makeConstantExpression(value); + + if (!wasm->features.hasGC()) { + // Without GC, we can simply serialize the globals in place as they are. + for (const auto& [name, values] : instance->globals) { + wasm->getGlobal(name)->init = getSerialization(values); + } + return; + } + + // We need to emit the "defining globals" of GC data before the existing + // globals, as the normal ones may refer to them. We do this by removing all + // the existing globals, and then adding them one by one, during which time + // we call getSerialization() for their init expressions. If their init + // refes to GC data, then we will allocate a defining global for that data, + // and refer to it. Put another way, we place the existing globals back into + // the module one at a time, adding their dependencies as we go. + auto oldGlobals = std::move(wasm->globals); + wasm->updateMaps(); + + for (auto& oldGlobal : oldGlobals) { + // Serialize the global's value. While doing so, pass in the name of this + // global, as we may be able to reuse the global as the defining global + // for the value. See getSerialization() for more details. + Name name; + if (!oldGlobal->mutable_ && oldGlobal->type == oldGlobal->init->type) { + // This has the properties we need of a defining global - immutable and + // of the precise type - so use it. + name = oldGlobal->name; + } + + // If there is a value here to serialize, do so. (If there is no value, + // then this global was added after the interpreter initialized the + // module, which means it is a new global we've added since; we don't need + // to do anything for such a global - if it is needed it will show up as a + // dependency of something, and be emitted at the right time and place.) + auto iter = instance->globals.find(oldGlobal->name); + if (iter != instance->globals.end()) { + oldGlobal->init = getSerialization(iter->second, name); + wasm->addGlobal(std::move(oldGlobal)); + } + } + } + +public: + // Maps each GC data in the interpreter to its defining global: the global in + // which it is created, and then all other users of it can just global.get + // that. + std::unordered_map<GCData*, Name> definingGlobals; + + // The data we have seen so far on the stack. This is used to guard against + // infinite recursion, which would otherwise happen if there is a cycle among + // the live objects, which we don't handle yet. + // + // Pick a constant of 2 here to handle the common case of an object with a + // reference to another object that is already in a defining global. + SmallSet<GCData*, 2> seenDataStack; + + // If |possibleDefiningGlobal| is provided, it is the name of a global that we + // are in the init expression of, and which can be reused as defining global, + // if the other conditions are suitable. + Expression* getSerialization(const Literal& value, + Name possibleDefiningGlobal = Name()) { + Builder builder(*wasm); + + if (!value.isData()) { + // This can be handled normally. + return builder.makeConstantExpression(value); + } + + // This is GC data, which we must handle in a more careful way. + auto* data = value.getGCData().get(); + if (!data) { + // This is a null, so simply emit one. + return builder.makeRefNull(value.type); } + + // There was actual GC data allocated here. + auto type = value.type; + auto& definingGlobal = definingGlobals[data]; + if (!definingGlobal.is()) { + // This is the first usage of this allocation. Generate a struct.new / + // array.new for it. + auto& values = value.getGCData()->values; + std::vector<Expression*> args; + + // The initial values for this allocation may themselves be GC + // allocations. Recurse and add globals as necessary. + // TODO: Handle cycles. That will require code in the start function. For + // now, just error if we detect an infinite recursion. + if (seenDataStack.count(data)) { + Fatal() << "Cycle in live GC data, which we cannot serialize yet."; + } + seenDataStack.insert(data); + for (auto& value : values) { + args.push_back(getSerialization(value)); + } + seenDataStack.erase(data); + + Expression* init; + auto heapType = type.getHeapType(); + // TODO: handle rtts if we need them + if (heapType.isStruct()) { + init = builder.makeStructNew(heapType, args); + } else if (heapType.isArray()) { + // TODO: for repeated identical values, can use ArrayNew + init = builder.makeArrayInit(heapType, args); + } else { + WASM_UNREACHABLE("bad gc type"); + } + + if (possibleDefiningGlobal.is()) { + // No need to allocate a new global, as we are in the definition of + // one. Just return the initialization expression, which will be + // placed in that global's |init| field, and first note this as the + // defining global. + definingGlobal = possibleDefiningGlobal; + return init; + } + + // Allocate a new defining global. + auto name = Names::getValidGlobalName(*wasm, "ctor-eval$global"); + wasm->addGlobal(builder.makeGlobal(name, type, init, Builder::Immutable)); + definingGlobal = name; + } + + // Refer to this GC allocation by reading from the global that is + // designated to contain it. + return builder.makeGlobalGet(definingGlobal, value.type); + } + + Expression* getSerialization(const Literals& values, + Name possibleDefiningGlobal = Name()) { + assert(values.size() == 1); + return getSerialization(values[0], possibleDefiningGlobal); } }; @@ -573,7 +738,7 @@ EvalCtorOutcome evalCtor(EvallingModuleRunner& instance, for (Index i = 0; i < copyFunc->getNumLocals(); i++) { auto value = appliedLocals[i]; localSets.push_back( - builder.makeLocalSet(i, builder.makeConstantExpression(value))); + builder.makeLocalSet(i, interface.getSerialization(value))); } // Put the local sets at the front of the block. We know there must be a @@ -666,7 +831,7 @@ void evalCtors(Module& wasm, if (func->getResults() == Type::none) { copyFunc->body = Builder(wasm).makeNop(); } else { - copyFunc->body = Builder(wasm).makeConstantExpression(*outcome); + copyFunc->body = interface.getSerialization(*outcome); } wasm.getExport(exp->name)->value = copyName; } diff --git a/test/ctor-eval/gc-2.wast b/test/ctor-eval/gc-2.wast new file mode 100644 index 000000000..fed45abac --- /dev/null +++ b/test/ctor-eval/gc-2.wast @@ -0,0 +1,54 @@ +(module + (type $struct (struct_subtype (field i32) data)) + + (import "import" "import" (func $import (param anyref))) + + ;; This struct is created in an immutable global, but it has the wrong type. + ;; We will create a new defining global for it that has the proper type, and + ;; read from it here. (This is necessary as when the global is used elsewhere + ;; we want to get the right type from the global.get.) + (global $global1 (ref any) + (struct.new $struct + (i32.const 1337) + ) + ) + + ;; Test reordering of globals. This global will be written a value that is + ;; actually defined after it. To handle that, we must create it earlier than + ;; this global. + (global $global2 (mut (ref null $struct)) + (ref.null $struct) + ) + + ;; This global is perfect to be a defining global (immutable, right type), but + ;; because of an earlier use, we will end up defining it earlier on, and + ;; reading it here. + (global $global3 (ref $struct) + (struct.new $struct + (i32.const 9999) + ) + ) + + (func "test1" + (global.set $global2 + (global.get $global3) + ) + ) + + (func "keepalive" (result i32) + (select + (struct.get $struct 0 + (ref.cast_static $struct + (global.get $global1) + ) + ) + (struct.get $struct 0 + (global.get $global2) + ) + (struct.get $struct 0 + (global.get $global3) + ) + ) + ) +) + diff --git a/test/ctor-eval/gc-2.wast.ctors b/test/ctor-eval/gc-2.wast.ctors new file mode 100644 index 000000000..a5bce3fd2 --- /dev/null +++ b/test/ctor-eval/gc-2.wast.ctors @@ -0,0 +1 @@ +test1 diff --git a/test/ctor-eval/gc-2.wast.out b/test/ctor-eval/gc-2.wast.out new file mode 100644 index 000000000..ce30a27e7 --- /dev/null +++ b/test/ctor-eval/gc-2.wast.out @@ -0,0 +1,29 @@ +(module + (type $struct (struct (field i32))) + (type $none_=>_i32 (func (result i32))) + (global $ctor-eval$global (ref $struct) (struct.new $struct + (i32.const 1337) + )) + (global $global1 (ref any) (global.get $ctor-eval$global)) + (global $ctor-eval$global_0 (ref $struct) (struct.new $struct + (i32.const 9999) + )) + (global $global2 (mut (ref null $struct)) (global.get $ctor-eval$global_0)) + (global $global3 (ref $struct) (global.get $ctor-eval$global_0)) + (export "keepalive" (func $1)) + (func $1 (result i32) + (select + (struct.get $struct 0 + (ref.cast_static $struct + (global.get $global1) + ) + ) + (struct.get $struct 0 + (global.get $global2) + ) + (struct.get $struct 0 + (global.get $global3) + ) + ) + ) +) diff --git a/test/ctor-eval/gc-array.wast b/test/ctor-eval/gc-array.wast new file mode 100644 index 000000000..8f6731821 --- /dev/null +++ b/test/ctor-eval/gc-array.wast @@ -0,0 +1,45 @@ +(module + (type $array (array (mut i32))) + + (import "import" "import" (func $import (param anyref))) + + ;; This global will remain as it is. + (global $global1 (ref $array) + (array.init_static $array + (i32.const 10) + (i32.const 20) + (i32.const 30) + (i32.const 40) + ) + ) + + (global $global2 (ref $array) + (array.init_static $array + (i32.const 42) + ;; This location will be written with a new value, 1337 + (i32.const 0) + ) + ) + + (func "test1" + (array.set $array + (global.get $global2) + (i32.const 1) + (i32.const 1337) + ) + ) + + (func "keepalive" (result i32) + (i32.add + (array.get $array + (global.get $global1) + (i32.const 0) + ) + (array.get $array + (global.get $global2) + (i32.const 0) + ) + ) + ) +) + diff --git a/test/ctor-eval/gc-array.wast.ctors b/test/ctor-eval/gc-array.wast.ctors new file mode 100644 index 000000000..a5bce3fd2 --- /dev/null +++ b/test/ctor-eval/gc-array.wast.ctors @@ -0,0 +1 @@ +test1 diff --git a/test/ctor-eval/gc-array.wast.out b/test/ctor-eval/gc-array.wast.out new file mode 100644 index 000000000..d78eba852 --- /dev/null +++ b/test/ctor-eval/gc-array.wast.out @@ -0,0 +1,27 @@ +(module + (type $array (array (mut i32))) + (type $none_=>_i32 (func (result i32))) + (global $global1 (ref $array) (array.init_static $array + (i32.const 10) + (i32.const 20) + (i32.const 30) + (i32.const 40) + )) + (global $global2 (ref $array) (array.init_static $array + (i32.const 42) + (i32.const 1337) + )) + (export "keepalive" (func $1)) + (func $1 (result i32) + (i32.add + (array.get $array + (global.get $global1) + (i32.const 0) + ) + (array.get $array + (global.get $global2) + (i32.const 0) + ) + ) + ) +) diff --git a/test/ctor-eval/gc.wast b/test/ctor-eval/gc.wast new file mode 100644 index 000000000..0449b6140 --- /dev/null +++ b/test/ctor-eval/gc.wast @@ -0,0 +1,65 @@ +(module + (type $struct (struct_subtype (field i32) data)) + + (import "import" "import" (func $import (param anyref))) + + ;; Create a GC object in a global. We can keep the struct.new here even after + ;; evalling (we should not create an extra, unneeded global, and read from + ;; that). + (global $global1 (ref $struct) + (struct.new $struct + (i32.const 1337) + ) + ) + + ;; After evalling we should see this refer to a struct with contents 42, and + ;; not 41, which is overridden, see "test1". We also should not see any code + ;; that creates an object with 41, as that is no longer live. + ;; + ;; Note that we will not simply do a struct.new in this global, as it is + ;; mutable, and we only use immutable globals as defining globals for values, + ;; so a new (immutable) global will appear, and we will read from it. + (global $global2 (mut (ref null $struct)) (ref.null $struct)) + + (func "test1" + ;; Leave the first local as null, which we should handle properly (we will + ;; end up emitting nothing and still using the default null value). + (local $temp1 (ref null $struct)) + (local $temp2 (ref null $struct)) + + (global.set $global2 + (struct.new $struct + (i32.const 41) + ) + ) + (global.set $global2 + (struct.new $struct + (i32.const 42) + ) + ) + + ;; Write a value to this local. A struct with value 99 will be created in a + ;; global, and referred to here. + (local.set $temp2 + (struct.new $struct + (i32.const 99) + ) + ) + + ;; Stop evalling here at the import. + (call $import (local.get $temp1)) + (call $import (local.get $temp2)) + ) + + (func "keepalive" (result i32) + (i32.add + (struct.get $struct 0 + (global.get $global1) + ) + (struct.get $struct 0 + (global.get $global2) + ) + ) + ) +) + diff --git a/test/ctor-eval/gc.wast.ctors b/test/ctor-eval/gc.wast.ctors new file mode 100644 index 000000000..a5bce3fd2 --- /dev/null +++ b/test/ctor-eval/gc.wast.ctors @@ -0,0 +1 @@ +test1 diff --git a/test/ctor-eval/gc.wast.out b/test/ctor-eval/gc.wast.out new file mode 100644 index 000000000..b926b5ad4 --- /dev/null +++ b/test/ctor-eval/gc.wast.out @@ -0,0 +1,41 @@ +(module + (type $struct (struct (field i32))) + (type $anyref_=>_none (func (param anyref))) + (type $none_=>_i32 (func (result i32))) + (type $none_=>_none (func)) + (import "import" "import" (func $import (param anyref))) + (global $global1 (ref $struct) (struct.new $struct + (i32.const 1337) + )) + (global $ctor-eval$global (ref $struct) (struct.new $struct + (i32.const 42) + )) + (global $global2 (mut (ref null $struct)) (global.get $ctor-eval$global)) + (global $ctor-eval$global_0 (ref $struct) (struct.new $struct + (i32.const 99) + )) + (export "test1" (func $0_0)) + (export "keepalive" (func $1)) + (func $1 (result i32) + (i32.add + (struct.get $struct 0 + (global.get $global1) + ) + (struct.get $struct 0 + (global.get $global2) + ) + ) + ) + (func $0_0 + (local $0 (ref null $struct)) + (local.set $0 + (global.get $ctor-eval$global_0) + ) + (call $import + (ref.null $struct) + ) + (call $import + (local.get $0) + ) + ) +) |