summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/tools/wasm-ctor-eval.cpp173
-rw-r--r--test/ctor-eval/gc-2.wast54
-rw-r--r--test/ctor-eval/gc-2.wast.ctors1
-rw-r--r--test/ctor-eval/gc-2.wast.out29
-rw-r--r--test/ctor-eval/gc-array.wast45
-rw-r--r--test/ctor-eval/gc-array.wast.ctors1
-rw-r--r--test/ctor-eval/gc-array.wast.out27
-rw-r--r--test/ctor-eval/gc.wast65
-rw-r--r--test/ctor-eval/gc.wast.ctors1
-rw-r--r--test/ctor-eval/gc.wast.out41
10 files changed, 433 insertions, 4 deletions
diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp
index 0bcf42f9c..59e54d4f7 100644
--- a/src/tools/wasm-ctor-eval.cpp
+++ b/src/tools/wasm-ctor-eval.cpp
@@ -33,6 +33,7 @@
#include "pass.h"
#include "support/colors.h"
#include "support/file.h"
+#include "support/small_set.h"
#include "support/string.h"
#include "tool-options.h"
#include "wasm-builder.h"
@@ -157,6 +158,8 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface {
// Called when we want to apply the current state of execution to the Module.
// Until this is called the Module is never changed.
void applyToModule() {
+ clearApplyState();
+
// If nothing was ever written to memory then there is nothing to update.
if (!memory.empty()) {
applyMemoryToModule();
@@ -404,6 +407,22 @@ private:
return ret;
}
+ // Clear the state of the operation of applying the interpreter's runtime
+ // information into the module.
+ //
+ // This happens each time we apply contents to the module, which is basically
+ // once per ctor function, but can be more fine-grained also if we execute a
+ // line at a time.
+ void clearApplyState() {
+ // The process of allocating "defining globals" begins here, from scratch
+ // each time (things live before may no longer be).
+ definingGlobals.clear();
+
+ // When we start to apply the state there should be no previous state left
+ // over.
+ assert(seenDataStack.empty());
+ }
+
void applyMemoryToModule() {
// Memory must have already been flattened into the standard form: one
// segment at offset 0, or none.
@@ -421,11 +440,157 @@ private:
segment.data = memory;
}
+ // Serializing GC data requires more work than linear memory, because
+ // allocations have an identity, and they are created using struct.new /
+ // array.new, which we must emit in a proper location in the wasm. This
+ // affects how we serialize globals, which can contain GC data, and also, we
+ // use globals to store GC data, so overall the process of computing the
+ // globals is where most of the GC logic ends up.
+ //
+ // The general idea for handling GC data is as follows: After evaluating the
+ // code, we end up with some live allocations in the interpreter, which we
+ // need to somehow serialize into the wasm module. We will put each such live
+ // GC data item into its own "defining global", a global whose purpose is to
+ // create and store that data. Each such global is immutable, and has the
+ // exact type of the data, for simplicity. Every other reference to that GC
+ // data in the interpreter's memory can then be serialized by simply emitting
+ // a global.get of that defining global.
void applyGlobalsToModule() {
Builder builder(*wasm);
- for (const auto& [name, value] : instance->globals) {
- wasm->getGlobal(name)->init = builder.makeConstantExpression(value);
+
+ if (!wasm->features.hasGC()) {
+ // Without GC, we can simply serialize the globals in place as they are.
+ for (const auto& [name, values] : instance->globals) {
+ wasm->getGlobal(name)->init = getSerialization(values);
+ }
+ return;
+ }
+
+ // We need to emit the "defining globals" of GC data before the existing
+ // globals, as the normal ones may refer to them. We do this by removing all
+ // the existing globals, and then adding them one by one, during which time
+ // we call getSerialization() for their init expressions. If their init
+ // refes to GC data, then we will allocate a defining global for that data,
+ // and refer to it. Put another way, we place the existing globals back into
+ // the module one at a time, adding their dependencies as we go.
+ auto oldGlobals = std::move(wasm->globals);
+ wasm->updateMaps();
+
+ for (auto& oldGlobal : oldGlobals) {
+ // Serialize the global's value. While doing so, pass in the name of this
+ // global, as we may be able to reuse the global as the defining global
+ // for the value. See getSerialization() for more details.
+ Name name;
+ if (!oldGlobal->mutable_ && oldGlobal->type == oldGlobal->init->type) {
+ // This has the properties we need of a defining global - immutable and
+ // of the precise type - so use it.
+ name = oldGlobal->name;
+ }
+
+ // If there is a value here to serialize, do so. (If there is no value,
+ // then this global was added after the interpreter initialized the
+ // module, which means it is a new global we've added since; we don't need
+ // to do anything for such a global - if it is needed it will show up as a
+ // dependency of something, and be emitted at the right time and place.)
+ auto iter = instance->globals.find(oldGlobal->name);
+ if (iter != instance->globals.end()) {
+ oldGlobal->init = getSerialization(iter->second, name);
+ wasm->addGlobal(std::move(oldGlobal));
+ }
+ }
+ }
+
+public:
+ // Maps each GC data in the interpreter to its defining global: the global in
+ // which it is created, and then all other users of it can just global.get
+ // that.
+ std::unordered_map<GCData*, Name> definingGlobals;
+
+ // The data we have seen so far on the stack. This is used to guard against
+ // infinite recursion, which would otherwise happen if there is a cycle among
+ // the live objects, which we don't handle yet.
+ //
+ // Pick a constant of 2 here to handle the common case of an object with a
+ // reference to another object that is already in a defining global.
+ SmallSet<GCData*, 2> seenDataStack;
+
+ // If |possibleDefiningGlobal| is provided, it is the name of a global that we
+ // are in the init expression of, and which can be reused as defining global,
+ // if the other conditions are suitable.
+ Expression* getSerialization(const Literal& value,
+ Name possibleDefiningGlobal = Name()) {
+ Builder builder(*wasm);
+
+ if (!value.isData()) {
+ // This can be handled normally.
+ return builder.makeConstantExpression(value);
+ }
+
+ // This is GC data, which we must handle in a more careful way.
+ auto* data = value.getGCData().get();
+ if (!data) {
+ // This is a null, so simply emit one.
+ return builder.makeRefNull(value.type);
}
+
+ // There was actual GC data allocated here.
+ auto type = value.type;
+ auto& definingGlobal = definingGlobals[data];
+ if (!definingGlobal.is()) {
+ // This is the first usage of this allocation. Generate a struct.new /
+ // array.new for it.
+ auto& values = value.getGCData()->values;
+ std::vector<Expression*> args;
+
+ // The initial values for this allocation may themselves be GC
+ // allocations. Recurse and add globals as necessary.
+ // TODO: Handle cycles. That will require code in the start function. For
+ // now, just error if we detect an infinite recursion.
+ if (seenDataStack.count(data)) {
+ Fatal() << "Cycle in live GC data, which we cannot serialize yet.";
+ }
+ seenDataStack.insert(data);
+ for (auto& value : values) {
+ args.push_back(getSerialization(value));
+ }
+ seenDataStack.erase(data);
+
+ Expression* init;
+ auto heapType = type.getHeapType();
+ // TODO: handle rtts if we need them
+ if (heapType.isStruct()) {
+ init = builder.makeStructNew(heapType, args);
+ } else if (heapType.isArray()) {
+ // TODO: for repeated identical values, can use ArrayNew
+ init = builder.makeArrayInit(heapType, args);
+ } else {
+ WASM_UNREACHABLE("bad gc type");
+ }
+
+ if (possibleDefiningGlobal.is()) {
+ // No need to allocate a new global, as we are in the definition of
+ // one. Just return the initialization expression, which will be
+ // placed in that global's |init| field, and first note this as the
+ // defining global.
+ definingGlobal = possibleDefiningGlobal;
+ return init;
+ }
+
+ // Allocate a new defining global.
+ auto name = Names::getValidGlobalName(*wasm, "ctor-eval$global");
+ wasm->addGlobal(builder.makeGlobal(name, type, init, Builder::Immutable));
+ definingGlobal = name;
+ }
+
+ // Refer to this GC allocation by reading from the global that is
+ // designated to contain it.
+ return builder.makeGlobalGet(definingGlobal, value.type);
+ }
+
+ Expression* getSerialization(const Literals& values,
+ Name possibleDefiningGlobal = Name()) {
+ assert(values.size() == 1);
+ return getSerialization(values[0], possibleDefiningGlobal);
}
};
@@ -573,7 +738,7 @@ EvalCtorOutcome evalCtor(EvallingModuleRunner& instance,
for (Index i = 0; i < copyFunc->getNumLocals(); i++) {
auto value = appliedLocals[i];
localSets.push_back(
- builder.makeLocalSet(i, builder.makeConstantExpression(value)));
+ builder.makeLocalSet(i, interface.getSerialization(value)));
}
// Put the local sets at the front of the block. We know there must be a
@@ -666,7 +831,7 @@ void evalCtors(Module& wasm,
if (func->getResults() == Type::none) {
copyFunc->body = Builder(wasm).makeNop();
} else {
- copyFunc->body = Builder(wasm).makeConstantExpression(*outcome);
+ copyFunc->body = interface.getSerialization(*outcome);
}
wasm.getExport(exp->name)->value = copyName;
}
diff --git a/test/ctor-eval/gc-2.wast b/test/ctor-eval/gc-2.wast
new file mode 100644
index 000000000..fed45abac
--- /dev/null
+++ b/test/ctor-eval/gc-2.wast
@@ -0,0 +1,54 @@
+(module
+ (type $struct (struct_subtype (field i32) data))
+
+ (import "import" "import" (func $import (param anyref)))
+
+ ;; This struct is created in an immutable global, but it has the wrong type.
+ ;; We will create a new defining global for it that has the proper type, and
+ ;; read from it here. (This is necessary as when the global is used elsewhere
+ ;; we want to get the right type from the global.get.)
+ (global $global1 (ref any)
+ (struct.new $struct
+ (i32.const 1337)
+ )
+ )
+
+ ;; Test reordering of globals. This global will be written a value that is
+ ;; actually defined after it. To handle that, we must create it earlier than
+ ;; this global.
+ (global $global2 (mut (ref null $struct))
+ (ref.null $struct)
+ )
+
+ ;; This global is perfect to be a defining global (immutable, right type), but
+ ;; because of an earlier use, we will end up defining it earlier on, and
+ ;; reading it here.
+ (global $global3 (ref $struct)
+ (struct.new $struct
+ (i32.const 9999)
+ )
+ )
+
+ (func "test1"
+ (global.set $global2
+ (global.get $global3)
+ )
+ )
+
+ (func "keepalive" (result i32)
+ (select
+ (struct.get $struct 0
+ (ref.cast_static $struct
+ (global.get $global1)
+ )
+ )
+ (struct.get $struct 0
+ (global.get $global2)
+ )
+ (struct.get $struct 0
+ (global.get $global3)
+ )
+ )
+ )
+)
+
diff --git a/test/ctor-eval/gc-2.wast.ctors b/test/ctor-eval/gc-2.wast.ctors
new file mode 100644
index 000000000..a5bce3fd2
--- /dev/null
+++ b/test/ctor-eval/gc-2.wast.ctors
@@ -0,0 +1 @@
+test1
diff --git a/test/ctor-eval/gc-2.wast.out b/test/ctor-eval/gc-2.wast.out
new file mode 100644
index 000000000..ce30a27e7
--- /dev/null
+++ b/test/ctor-eval/gc-2.wast.out
@@ -0,0 +1,29 @@
+(module
+ (type $struct (struct (field i32)))
+ (type $none_=>_i32 (func (result i32)))
+ (global $ctor-eval$global (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+ (global $global1 (ref any) (global.get $ctor-eval$global))
+ (global $ctor-eval$global_0 (ref $struct) (struct.new $struct
+ (i32.const 9999)
+ ))
+ (global $global2 (mut (ref null $struct)) (global.get $ctor-eval$global_0))
+ (global $global3 (ref $struct) (global.get $ctor-eval$global_0))
+ (export "keepalive" (func $1))
+ (func $1 (result i32)
+ (select
+ (struct.get $struct 0
+ (ref.cast_static $struct
+ (global.get $global1)
+ )
+ )
+ (struct.get $struct 0
+ (global.get $global2)
+ )
+ (struct.get $struct 0
+ (global.get $global3)
+ )
+ )
+ )
+)
diff --git a/test/ctor-eval/gc-array.wast b/test/ctor-eval/gc-array.wast
new file mode 100644
index 000000000..8f6731821
--- /dev/null
+++ b/test/ctor-eval/gc-array.wast
@@ -0,0 +1,45 @@
+(module
+ (type $array (array (mut i32)))
+
+ (import "import" "import" (func $import (param anyref)))
+
+ ;; This global will remain as it is.
+ (global $global1 (ref $array)
+ (array.init_static $array
+ (i32.const 10)
+ (i32.const 20)
+ (i32.const 30)
+ (i32.const 40)
+ )
+ )
+
+ (global $global2 (ref $array)
+ (array.init_static $array
+ (i32.const 42)
+ ;; This location will be written with a new value, 1337
+ (i32.const 0)
+ )
+ )
+
+ (func "test1"
+ (array.set $array
+ (global.get $global2)
+ (i32.const 1)
+ (i32.const 1337)
+ )
+ )
+
+ (func "keepalive" (result i32)
+ (i32.add
+ (array.get $array
+ (global.get $global1)
+ (i32.const 0)
+ )
+ (array.get $array
+ (global.get $global2)
+ (i32.const 0)
+ )
+ )
+ )
+)
+
diff --git a/test/ctor-eval/gc-array.wast.ctors b/test/ctor-eval/gc-array.wast.ctors
new file mode 100644
index 000000000..a5bce3fd2
--- /dev/null
+++ b/test/ctor-eval/gc-array.wast.ctors
@@ -0,0 +1 @@
+test1
diff --git a/test/ctor-eval/gc-array.wast.out b/test/ctor-eval/gc-array.wast.out
new file mode 100644
index 000000000..d78eba852
--- /dev/null
+++ b/test/ctor-eval/gc-array.wast.out
@@ -0,0 +1,27 @@
+(module
+ (type $array (array (mut i32)))
+ (type $none_=>_i32 (func (result i32)))
+ (global $global1 (ref $array) (array.init_static $array
+ (i32.const 10)
+ (i32.const 20)
+ (i32.const 30)
+ (i32.const 40)
+ ))
+ (global $global2 (ref $array) (array.init_static $array
+ (i32.const 42)
+ (i32.const 1337)
+ ))
+ (export "keepalive" (func $1))
+ (func $1 (result i32)
+ (i32.add
+ (array.get $array
+ (global.get $global1)
+ (i32.const 0)
+ )
+ (array.get $array
+ (global.get $global2)
+ (i32.const 0)
+ )
+ )
+ )
+)
diff --git a/test/ctor-eval/gc.wast b/test/ctor-eval/gc.wast
new file mode 100644
index 000000000..0449b6140
--- /dev/null
+++ b/test/ctor-eval/gc.wast
@@ -0,0 +1,65 @@
+(module
+ (type $struct (struct_subtype (field i32) data))
+
+ (import "import" "import" (func $import (param anyref)))
+
+ ;; Create a GC object in a global. We can keep the struct.new here even after
+ ;; evalling (we should not create an extra, unneeded global, and read from
+ ;; that).
+ (global $global1 (ref $struct)
+ (struct.new $struct
+ (i32.const 1337)
+ )
+ )
+
+ ;; After evalling we should see this refer to a struct with contents 42, and
+ ;; not 41, which is overridden, see "test1". We also should not see any code
+ ;; that creates an object with 41, as that is no longer live.
+ ;;
+ ;; Note that we will not simply do a struct.new in this global, as it is
+ ;; mutable, and we only use immutable globals as defining globals for values,
+ ;; so a new (immutable) global will appear, and we will read from it.
+ (global $global2 (mut (ref null $struct)) (ref.null $struct))
+
+ (func "test1"
+ ;; Leave the first local as null, which we should handle properly (we will
+ ;; end up emitting nothing and still using the default null value).
+ (local $temp1 (ref null $struct))
+ (local $temp2 (ref null $struct))
+
+ (global.set $global2
+ (struct.new $struct
+ (i32.const 41)
+ )
+ )
+ (global.set $global2
+ (struct.new $struct
+ (i32.const 42)
+ )
+ )
+
+ ;; Write a value to this local. A struct with value 99 will be created in a
+ ;; global, and referred to here.
+ (local.set $temp2
+ (struct.new $struct
+ (i32.const 99)
+ )
+ )
+
+ ;; Stop evalling here at the import.
+ (call $import (local.get $temp1))
+ (call $import (local.get $temp2))
+ )
+
+ (func "keepalive" (result i32)
+ (i32.add
+ (struct.get $struct 0
+ (global.get $global1)
+ )
+ (struct.get $struct 0
+ (global.get $global2)
+ )
+ )
+ )
+)
+
diff --git a/test/ctor-eval/gc.wast.ctors b/test/ctor-eval/gc.wast.ctors
new file mode 100644
index 000000000..a5bce3fd2
--- /dev/null
+++ b/test/ctor-eval/gc.wast.ctors
@@ -0,0 +1 @@
+test1
diff --git a/test/ctor-eval/gc.wast.out b/test/ctor-eval/gc.wast.out
new file mode 100644
index 000000000..b926b5ad4
--- /dev/null
+++ b/test/ctor-eval/gc.wast.out
@@ -0,0 +1,41 @@
+(module
+ (type $struct (struct (field i32)))
+ (type $anyref_=>_none (func (param anyref)))
+ (type $none_=>_i32 (func (result i32)))
+ (type $none_=>_none (func))
+ (import "import" "import" (func $import (param anyref)))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+ (global $ctor-eval$global (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+ (global $global2 (mut (ref null $struct)) (global.get $ctor-eval$global))
+ (global $ctor-eval$global_0 (ref $struct) (struct.new $struct
+ (i32.const 99)
+ ))
+ (export "test1" (func $0_0))
+ (export "keepalive" (func $1))
+ (func $1 (result i32)
+ (i32.add
+ (struct.get $struct 0
+ (global.get $global1)
+ )
+ (struct.get $struct 0
+ (global.get $global2)
+ )
+ )
+ )
+ (func $0_0
+ (local $0 (ref null $struct))
+ (local.set $0
+ (global.get $ctor-eval$global_0)
+ )
+ (call $import
+ (ref.null $struct)
+ )
+ (call $import
+ (local.get $0)
+ )
+ )
+)