wasm-decompile: name data sections with their content (#1309)

This is a fun way to give somewhat meaningful names to sections containing strings. In the case of pure binary sections this likely generates random characters, but that's not any worse than the current generated names.
author: Wouter van Oortmerssen <aardappel@gmail.com> 2020-01-23 15:50:33 -0800
committer: GitHub <noreply@github.com> 2020-01-23 15:50:33 -0800
commit: 90cec44b193b95b71cf1ea34137cbf6e939b7144 (patch)
tree: 555bdbb682e7cbb3c5c4b217dc8d9c992e81ad3f /src/decompiler-naming.h
parent: 5926d542ee382ccf1c693921f42aec767270295b (diff)
download: wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.gz
wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.bz2
wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.zip
1 files changed, 44 insertions, 0 deletions
diff --git a/src/decompiler-naming.h b/src/decompiler-naming.h
index 786c3ebb..41e0e3cb 100644
--- a/src/decompiler-naming.h
+++ b/src/decompiler-naming.h
@@ -110,6 +110,47 @@ void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh,
   }
 }
 
+enum {
+  // This a bit arbitrary, change at will.
+  min_content_identifier_size = 7,
+  max_content_identifier_size = 30
+};
+
+void RenameToContents(std::vector<DataSegment*>& segs, BindingHash& bh) {
+  std::string s;
+  for (auto seg : segs) {
+    s = "d_";
+    for (auto c : seg->data) {
+      if (isalnum(c) || c == '_') {
+        s += static_cast<char>(c);
+      }
+      if (s.size() >= max_content_identifier_size) {
+        // We truncate any very long names, since those make for hard to
+        // format output. They can be somewhat long though, since data segment
+        // references tend to not occur that often.
+        break;
+      }
+    }
+    if (s.size() < min_content_identifier_size) {
+      // It is useful to have a minimum, since if there few printable characters
+      // in a data section, that is probably a sign of binary, and those few
+      // characters are not going to be very significant.
+      continue;
+    }
+    // We could do the same disambiguition as RenameToIdentifier and
+    // GenerateNames do, but if we come up with a clashing name here it is
+    // likely a sign of not very meaningful binary data, so it is easier to
+    // just keep the original generated name in that case.
+    if (bh.count(s) != 0) {
+      continue;
+    }
+    // Remove original entry.
+    bh.erase(seg->name);
+    seg->name = s;
+    bh.emplace(s, Binding(static_cast<Index>(&seg - &segs[0])));
+  }
+}
+
 // Function names may contain arbitrary C++ syntax, so we want to
 // filter those to look like identifiers. A function name may be set
 // by a name section (applied in ReadBinaryIr, called before this function)
@@ -119,6 +160,7 @@ void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh,
 // this function).
 // To not have to add too many decompiler-specific code into those systems
 // (using a callback??) we instead rename everything here.
+// Also do data section renaming here.
 void RenameAll(Module& module) {
   // We also filter common C++ keywords/STL idents that make for huge
   // identifiers.
@@ -144,6 +186,8 @@ void RenameAll(Module& module) {
   // Also do this for some other kinds of names.
   RenameToIdentifiers(module.globals, module.global_bindings, nullptr);
   RenameToIdentifiers(module.tables, module.table_bindings, nullptr);
+
+  RenameToContents(module.data_segments, module.data_segment_bindings);
 }
 
 }  // namespace wabt
author	Wouter van Oortmerssen <aardappel@gmail.com>	2020-01-23 15:50:33 -0800
committer	GitHub <noreply@github.com>	2020-01-23 15:50:33 -0800
commit	90cec44b193b95b71cf1ea34137cbf6e939b7144 (patch)
tree	555bdbb682e7cbb3c5c4b217dc8d9c992e81ad3f /src/decompiler-naming.h
parent	5926d542ee382ccf1c693921f42aec767270295b (diff)
download	wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.gz wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.bz2 wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.zip