Fix a fuzz issue with scanning heap read types (#5184)

If a heap type only ever appears as the result of a read, we must include it in the analysis in ModuleUtils, even though it isn't written in the binary format. Otherwise analyses using ModuleUtils can error on not finding all types in the list of types. Fixes #5180
author: Alon Zakai <azakai@google.com> 2022-11-01 09:34:33 -0700
committer: GitHub <noreply@github.com> 2022-11-01 09:34:33 -0700
commit: 5e0ab66800de5d376429fcd281bd4cd1e41d8ed5 (patch)
tree: a28a6e12fbb790d2cf994f289e54e983f1a0e8d6
parent: 1fa6fcfebbec9e45bcdc481f19757ac14cda4699 (diff)
download: binaryen-5e0ab66800de5d376429fcd281bd4cd1e41d8ed5.tar.gz
binaryen-5e0ab66800de5d376429fcd281bd4cd1e41d8ed5.tar.bz2
binaryen-5e0ab66800de5d376429fcd281bd4cd1e41d8ed5.zip
2 files changed, 58 insertions, 1 deletions
diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp
index 5044d303c..744f62682 100644
--- a/src/ir/module-utils.cpp
+++ b/src/ir/module-utils.cpp
@@ -40,6 +40,11 @@ struct Counts : public InsertOrderedMap<HeapType, size_t> {
       (*this)[type];
     }
   }
+  void include(Type type) {
+    for (HeapType ht : type.getHeapTypeChildren()) {
+      include(ht);
+    }
+  }
 };
 
 struct CodeScanner
@@ -73,13 +78,20 @@ struct CodeScanner
       }
     } else if (auto* get = curr->dynCast<StructGet>()) {
       counts.note(get->ref->type);
+      // If the type we read is a reference type then we must include it. It is
+      // not written in the binary format, so it doesn't need to be counted, but
+      // it does need to be taken into account in the IR (this may be the only
+      // place this type appears in the entire binary, and we must scan all
+      // types as the analyses that use us depend on that).
+      counts.include(get->type);
     } else if (auto* set = curr->dynCast<StructSet>()) {
       counts.note(set->ref->type);
     } else if (auto* get = curr->dynCast<ArrayGet>()) {
       counts.note(get->ref->type);
+      // See note on StructGet above.
+      counts.include(get->type);
     } else if (auto* set = curr->dynCast<ArraySet>()) {
       counts.note(set->ref->type);
-
     } else if (Properties::isControlFlowStructure(curr)) {
       if (curr->type.isTuple()) {
         // TODO: Allow control flow to have input types as well
diff --git a/test/lit/passes/signature-refining_gto.wat b/test/lit/passes/signature-refining_gto.wat
new file mode 100644
index 000000000..183ce7d54
--- /dev/null
+++ b/test/lit/passes/signature-refining_gto.wat
@@ -0,0 +1,45 @@
+;; RUN: foreach %s %t wasm-opt --nominal --signature-refining --gto --roundtrip -all -S -o - | filecheck %s
+
+(module
+ ;; The type $A should not be emitted at all (see below).
+ ;; CHECK-NOT: (type $A
+ (type $A (struct_subtype (field (mut (ref null $A))) data))
+
+ ;; CHECK:      (type $ref|none|_=>_none (func_subtype (param (ref none)) func))
+
+ ;; CHECK:      (type $funcref_i32_=>_none (func_subtype (param funcref i32) func))
+
+ ;; CHECK:      (func $struct.get (type $ref|none|_=>_none) (param $0 (ref none))
+ ;; CHECK-NEXT:  (drop
+ ;; CHECK-NEXT:   (local.get $0)
+ ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT:  (unreachable)
+ ;; CHECK-NEXT: )
+ (func $struct.get (param $0 (ref $A))
+  ;; This function is always called with a null, so the parameter type will be
+  ;; refined to that. After doing so, the struct.get will be reading from a
+  ;; null type, and we should avoid erroring on that in --gto which scans all
+  ;; the heap types and updates them. Likewise, we should not error during
+  ;; roundtrip which also scans heap types.
+  (drop
+   (struct.get $A 0
+    (local.get $0)
+   )
+  )
+ )
+
+ ;; CHECK:      (func $caller (type $funcref_i32_=>_none) (param $0 funcref) (param $1 i32)
+ ;; CHECK-NEXT:  (call $struct.get
+ ;; CHECK-NEXT:   (ref.as_non_null
+ ;; CHECK-NEXT:    (ref.null none)
+ ;; CHECK-NEXT:   )
+ ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT: )
+ (func $caller (param $0 funcref) (param $1 i32)
+  (call $struct.get
+   (ref.as_non_null
+    (ref.null none)
+   )
+  )
+ )
+)
author	Alon Zakai <azakai@google.com>	2022-11-01 09:34:33 -0700
committer	GitHub <noreply@github.com>	2022-11-01 09:34:33 -0700
commit	5e0ab66800de5d376429fcd281bd4cd1e41d8ed5 (patch)
tree	a28a6e12fbb790d2cf994f289e54e983f1a0e8d6
parent	1fa6fcfebbec9e45bcdc481f19757ac14cda4699 (diff)
download	binaryen-5e0ab66800de5d376429fcd281bd4cd1e41d8ed5.tar.gz binaryen-5e0ab66800de5d376429fcd281bd4cd1e41d8ed5.tar.bz2 binaryen-5e0ab66800de5d376429fcd281bd4cd1e41d8ed5.zip