diff options
-rw-r--r-- | src/passes/ConstantFieldPropagation.cpp | 139 | ||||
-rw-r--r-- | test/lit/passes/cfp.wast | 297 |
2 files changed, 354 insertions, 82 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index 621ceec2a..f73a9b380 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -169,6 +169,11 @@ struct StructValues : public std::vector<PossibleConstantValues> { assert(index < size()); return std::vector<PossibleConstantValues>::operator[](index); } + + const PossibleConstantValues& operator[](size_t index) const { + assert(index < size()); + return std::vector<PossibleConstantValues>::operator[](index); + } }; // Map of types to information about the values their fields can take. @@ -206,13 +211,21 @@ struct StructValuesMap : public std::unordered_map<HeapType, StructValues> { using FunctionStructValuesMap = std::unordered_map<Function*, StructValuesMap>; // Scan each function to note all its writes to struct fields. +// +// We track information from struct.new and struct.set separately, because in +// struct.new we know more about the type - we know the actual exact type being +// written to, and not just that it is of a subtype of the instruction's type, +// which helps later. struct Scanner : public WalkerPass<PostWalker<Scanner>> { bool isFunctionParallel() override { return true; } - Pass* create() override { return new Scanner(functionInfos); } + Pass* create() override { + return new Scanner(functionNewInfos, functionSetInfos); + } - Scanner(FunctionStructValuesMap& functionInfos) - : functionInfos(functionInfos) {} + Scanner(FunctionStructValuesMap& functionNewInfos, + FunctionStructValuesMap& functionSetInfos) + : functionNewInfos(functionNewInfos), functionSetInfos(functionSetInfos) {} void visitStructNew(StructNew* curr) { auto type = curr->type; @@ -222,7 +235,7 @@ struct Scanner : public WalkerPass<PostWalker<Scanner>> { // Note writes to all the fields of the struct. auto heapType = type.getHeapType(); - auto& values = getStructValues(heapType); + auto& values = functionNewInfos[getFunction()][heapType]; auto& fields = heapType.getStruct().fields; for (Index i = 0; i < fields.size(); i++) { auto& fieldValues = values[i]; @@ -242,15 +255,13 @@ struct Scanner : public WalkerPass<PostWalker<Scanner>> { // Note a write to this field of the struct. auto heapType = type.getHeapType(); - noteExpression(curr->value, getStructValues(heapType)[curr->index]); + auto& values = functionSetInfos[getFunction()][heapType]; + noteExpression(curr->value, values[curr->index]); } private: - FunctionStructValuesMap& functionInfos; - - StructValues& getStructValues(HeapType type) { - return functionInfos[getFunction()][type]; - } + FunctionStructValuesMap& functionNewInfos; + FunctionStructValuesMap& functionSetInfos; // Note a value, checking whether it is a constant or not. void noteExpression(Expression* expr, PossibleConstantValues& info) { @@ -348,28 +359,34 @@ struct ConstantFieldPropagation : public Pass { } // Find and analyze all writes inside each function. - FunctionStructValuesMap functionInfos; + FunctionStructValuesMap functionNewInfos, functionSetInfos; for (auto& func : module->functions) { // Initialize the data for each function, so that we can operate on this // structure in parallel without modifying it. - functionInfos[func.get()]; + functionNewInfos[func.get()]; + functionSetInfos[func.get()]; } - Scanner scanner(functionInfos); + Scanner scanner(functionNewInfos, functionSetInfos); scanner.run(runner, module); scanner.walkModuleCode(module); // Combine the data from the functions. - StructValuesMap combinedInfos; - for (auto& kv : functionInfos) { - StructValuesMap& infos = kv.second; - for (auto& kv : infos) { - auto type = kv.first; - auto& info = kv.second; - for (Index i = 0; i < info.size(); i++) { - combinedInfos[type][i].combine(info[i]); + auto combine = [](const FunctionStructValuesMap& functionInfos, + StructValuesMap& combinedInfos) { + for (auto& kv : functionInfos) { + const StructValuesMap& infos = kv.second; + for (auto& kv : infos) { + auto type = kv.first; + auto& info = kv.second; + for (Index i = 0; i < info.size(); i++) { + combinedInfos[type][i].combine(info[i]); + } } } - } + }; + StructValuesMap combinedNewInfos, combinedSetInfos; + combine(functionNewInfos, combinedNewInfos); + combine(functionSetInfos, combinedSetInfos); // Handle subtyping. |combinedInfo| so far contains data that represents // each struct.new and struct.set's operation on the struct type used in @@ -398,44 +415,64 @@ struct ConstantFieldPropagation : public Pass { // efficient, we therefore propagate information about the possible values // in each field to both subtypes and supertypes. // - // TODO: Model struct.new separately from struct.set. With new we actually - // do know the specific type being written to, which means a get is - // only relevant for a new if the get is of a subtype. That means we - // only need to propagate values from new to subtypes. + // struct.new on the other hand knows exactly what type is being written to, + // and so given a get of $A and a new of $B, the new is relevant for the get + // iff $A is a subtype of $B, so we only need to propagate in one direction + // there, to supertypes. // // TODO: A topological sort could avoid repeated work here perhaps. + SubTypes subTypes(*module); - UniqueDeferredQueue<HeapType> work; - for (auto& kv : combinedInfos) { - auto type = kv.first; - work.push(type); - } - while (!work.empty()) { - auto type = work.pop(); - auto& infos = combinedInfos[type]; - - // Propagate shared fields to the supertype. - HeapType superType; - if (type.getSuperType(superType)) { - auto& superInfos = combinedInfos[superType]; - auto& superFields = superType.getStruct().fields; - for (Index i = 0; i < superFields.size(); i++) { - if (superInfos[i].combine(infos[i])) { - work.push(superType); + + auto propagate = [&subTypes](StructValuesMap& combinedInfos, + bool toSubTypes) { + UniqueDeferredQueue<HeapType> work; + for (auto& kv : combinedInfos) { + auto type = kv.first; + work.push(type); + } + while (!work.empty()) { + auto type = work.pop(); + auto& infos = combinedInfos[type]; + + // Propagate shared fields to the supertype. + HeapType superType; + if (type.getSuperType(superType)) { + auto& superInfos = combinedInfos[superType]; + auto& superFields = superType.getStruct().fields; + for (Index i = 0; i < superFields.size(); i++) { + if (superInfos[i].combine(infos[i])) { + work.push(superType); + } } } - } - // Propagate shared fields to the subtypes. - auto numFields = type.getStruct().fields.size(); - for (auto subType : subTypes.getSubTypes(type)) { - auto& subInfos = combinedInfos[subType]; - for (Index i = 0; i < numFields; i++) { - if (subInfos[i].combine(infos[i])) { - work.push(subType); + if (toSubTypes) { + // Propagate shared fields to the subtypes. + auto numFields = type.getStruct().fields.size(); + for (auto subType : subTypes.getSubTypes(type)) { + auto& subInfos = combinedInfos[subType]; + for (Index i = 0; i < numFields; i++) { + if (subInfos[i].combine(infos[i])) { + work.push(subType); + } + } } } } + }; + propagate(combinedNewInfos, false); + propagate(combinedSetInfos, true); + + // Combine both sources of information to the final information that gets + // care about. + StructValuesMap combinedInfos = std::move(combinedNewInfos); + for (auto& kv : combinedSetInfos) { + auto type = kv.first; + auto& info = kv.second; + for (Index i = 0; i < info.size(); i++) { + combinedInfos[type][i].combine(info[i]); + } } // Optimize. diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast index 684e5cf98..fce78d126 100644 --- a/test/lit/passes/cfp.wast +++ b/test/lit/passes/cfp.wast @@ -553,9 +553,7 @@ ;; Subtyping: Create a supertype and get a subtype. As we never create a ;; subtype, the get must trap anyhow (the reference it receives can -;; only by null in this closed world). Since the only possible writes -;; to the field are of the value 10, we will optimize to that. (But -;; as it will trap anyhow, that doesn't really matter.) +;; only be null in this closed world). (module ;; CHECK: (type $none_=>_none (func)) @@ -582,6 +580,61 @@ ) ;; CHECK: (func $get ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null $substruct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $get + (drop + (struct.get $substruct 0 + (ref.null $substruct) + ) + ) + ) +) + +;; As above, but in addition to a new of $struct also add a set. The set could +;; in principle be relevant for the get as far as this pass knows, and so we +;; will optimize the result to the only possible value. (In practice, though, +;; it will trap anyhow.) +(module + ;; CHECK: (type $struct (struct (field (mut i32)))) + (type $struct (struct (mut i32))) + ;; CHECK: (type $none_=>_none (func)) + + ;; CHECK: (type $substruct (struct (field (mut i32))) (extends $struct)) + (type $substruct (struct (mut i32)) (extends $struct)) + + ;; CHECK: (func $create + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_with_rtt $struct + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (rtt.canon $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $struct 0 + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $create + (drop + (struct.new_with_rtt $struct + (i32.const 10) + (rtt.canon $struct) + ) + ) + (struct.set $struct 0 + (ref.null $struct) + (i32.const 10) + ) + ) + ;; CHECK: (func $get + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.as_non_null @@ -771,17 +824,15 @@ ) ;; Subtyping: Create both a subtype and a supertype, with different constants -;; for the shared field, but get from the subtype. As the field is -;; shared between the types, we cannot optimize here: we model the -;; struct.new as a set, and if there is a set of the supertype, then -;; it might be to a reference of the subtype. -;; TODO: model new and set separately +;; for the shared field, but get from the subtype. The field is +;; shared between the types, but we only create the substruct with +;; one value, so we can optimize. (module + ;; CHECK: (type $none_=>_none (func)) + ;; CHECK: (type $substruct (struct (field i32) (field f64)) (extends $struct)) (type $substruct (struct i32 f64) (extends $struct)) - ;; CHECK: (type $none_=>_none (func)) - ;; CHECK: (type $struct (struct (field i32))) (type $struct (struct i32)) @@ -817,6 +868,75 @@ ) ;; CHECK: (func $get ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (ref.null $substruct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $get + (drop + (struct.get $substruct 0 + (ref.null $substruct) + ) + ) + ) +) + +;; As above, but add a set of $struct. The set prevents the optimization. +(module + ;; CHECK: (type $struct (struct (field (mut i32)))) + (type $struct (struct (mut i32))) + + ;; CHECK: (type $substruct (struct (field (mut i32)) (field f64)) (extends $struct)) + (type $substruct (struct (mut i32) f64) (extends $struct)) + + ;; CHECK: (type $none_=>_none (func)) + + ;; CHECK: (func $create + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_with_rtt $struct + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (rtt.canon $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $struct 0 + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_with_rtt $substruct + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: (rtt.canon $substruct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $create + (drop + (struct.new_with_rtt $struct + (i32.const 10) + (rtt.canon $struct) + ) + ) + (struct.set $struct 0 + (ref.null $struct) + (i32.const 10) + ) + (drop + (struct.new_with_rtt $substruct + (i32.const 20) + (f64.const 3.14159) + (rtt.canon $substruct) + ) + ) + ) + ;; CHECK: (func $get + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (struct.get $substruct 0 ;; CHECK-NEXT: (ref.null $substruct) ;; CHECK-NEXT: ) @@ -967,22 +1087,17 @@ ;; Multi-level subtyping with conflicts. The even-numbered fields will get ;; different values in the sub-most type. Create the top and bottom types, but -;; not the middle one. Any field which receives more than one value, no matter -;; where, cannot be optimized, which leaves the odd fields as optimizable, as -;; well as fields only present in the sub-most class anyhow (if they have a -;; constant value in the single construction there). -;; (See notes earlier on the limitations of our modeling new and set in the same -;; way.) +;; not the middle one. (module ;; CHECK: (type $struct3 (struct (field i32) (field i32) (field f64) (field f64) (field anyref) (field anyref)) (extends $struct2)) (type $struct3 (struct i32 i32 f64 f64 anyref anyref) (extends $struct2)) - ;; CHECK: (type $struct2 (struct (field i32) (field i32) (field f64) (field f64)) (extends $struct1)) - (type $struct2 (struct i32 i32 f64 f64) (extends $struct1)) - ;; CHECK: (type $struct1 (struct (field i32) (field i32))) (type $struct1 (struct i32 i32)) + ;; CHECK: (type $struct2 (struct (field i32) (field i32) (field f64) (field f64)) (extends $struct1)) + (type $struct2 (struct i32 i32 f64 f64) (extends $struct1)) + ;; CHECK: (type $anyref_=>_none (func (param anyref))) ;; CHECK: (type $none_=>_none (func)) @@ -1055,8 +1170,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $struct2 1 - ;; CHECK-NEXT: (ref.null $struct2) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (ref.null $struct2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 999) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -1090,8 +1210,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $struct3 1 - ;; CHECK-NEXT: (ref.null $struct3) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (ref.null $struct3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 999) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -1196,15 +1321,116 @@ ) ;; Multi-level subtyping with a different value in the middle of the chain. We -;; cannot optimize any of these (for the very last field in the subtype, if we -;; modeled new separately from set then we could, as mentioned earlier). +;; can only optimize $struct3. (module - ;; CHECK: (type $struct1 (struct (field i32))) - (type $struct1 (struct i32)) - ;; CHECK: (type $struct2 (struct (field i32) (field f64)) (extends $struct1)) - (type $struct2 (struct i32 f64) (extends $struct1)) - ;; CHECK: (type $struct3 (struct (field i32) (field f64) (field anyref)) (extends $struct2)) - (type $struct3 (struct i32 f64 anyref) (extends $struct2)) + ;; CHECK: (type $struct1 (struct (field (mut i32)))) + (type $struct1 (struct (mut i32))) + ;; CHECK: (type $struct2 (struct (field (mut i32)) (field f64)) (extends $struct1)) + (type $struct2 (struct (mut i32) f64) (extends $struct1)) + ;; CHECK: (type $none_=>_none (func)) + + ;; CHECK: (type $struct3 (struct (field (mut i32)) (field f64) (field anyref)) (extends $struct2)) + (type $struct3 (struct (mut i32) f64 anyref) (extends $struct2)) + + ;; CHECK: (func $create + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_with_rtt $struct1 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (rtt.canon $struct1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_with_rtt $struct2 + ;; CHECK-NEXT: (i32.const 9999) + ;; CHECK-NEXT: (f64.const 0) + ;; CHECK-NEXT: (rtt.canon $struct2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_with_rtt $struct3 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (f64.const 0) + ;; CHECK-NEXT: (ref.null any) + ;; CHECK-NEXT: (rtt.canon $struct3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $create + (drop + (struct.new_with_rtt $struct1 + (i32.const 10) + (rtt.canon $struct1) + ) + ) + (drop + (struct.new_with_rtt $struct2 + (i32.const 9999) ;; use a different value here + (f64.const 0) + (rtt.canon $struct2) + ) + ) + (drop + (struct.new_with_rtt $struct3 + (i32.const 10) + (f64.const 0) + (ref.null any) + (rtt.canon $struct3) + ) + ) + ) + ;; CHECK: (func $get + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct1 0 + ;; CHECK-NEXT: (ref.null $struct1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct2 0 + ;; CHECK-NEXT: (ref.null $struct2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (ref.null $struct3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $get + ;; Get field 0 in all the types. + (drop + (struct.get $struct1 0 + (ref.null $struct1) + ) + ) + (drop + (struct.get $struct2 0 + (ref.null $struct2) + ) + ) + (drop + (struct.get $struct3 0 + (ref.null $struct3) + ) + ) + ) +) + +;; As above, but add not just a new of the middle class with a different value +;; but also a set. That prevents all optimizations. +(module + ;; CHECK: (type $struct2 (struct (field (mut i32)) (field f64)) (extends $struct1)) + (type $struct2 (struct (mut i32) f64) (extends $struct1)) + + ;; CHECK: (type $struct1 (struct (field (mut i32)))) + (type $struct1 (struct (mut i32))) + + ;; CHECK: (type $struct3 (struct (field (mut i32)) (field f64) (field anyref)) (extends $struct2)) + (type $struct3 (struct (mut i32) f64 anyref) (extends $struct2)) ;; CHECK: (type $none_=>_none (func)) @@ -1222,6 +1448,10 @@ ;; CHECK-NEXT: (rtt.canon $struct2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $struct2 0 + ;; CHECK-NEXT: (ref.null $struct2) + ;; CHECK-NEXT: (i32.const 9999) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (struct.new_with_rtt $struct3 ;; CHECK-NEXT: (i32.const 10) @@ -1245,6 +1475,11 @@ (rtt.canon $struct2) ) ) + (struct.set $struct2 0 + (ref.null $struct2) + (i32.const 9999) ;; use a different value here + (f64.const 0) + ) (drop (struct.new_with_rtt $struct3 (i32.const 10) |