summaryrefslogtreecommitdiff
path: root/src/decompiler-naming.h
diff options
context:
space:
mode:
authorWouter van Oortmerssen <aardappel@gmail.com>2020-01-23 15:50:33 -0800
committerGitHub <noreply@github.com>2020-01-23 15:50:33 -0800
commit90cec44b193b95b71cf1ea34137cbf6e939b7144 (patch)
tree555bdbb682e7cbb3c5c4b217dc8d9c992e81ad3f /src/decompiler-naming.h
parent5926d542ee382ccf1c693921f42aec767270295b (diff)
downloadwabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.gz
wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.bz2
wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.zip
wasm-decompile: name data sections with their content (#1309)
This is a fun way to give somewhat meaningful names to sections containing strings. In the case of pure binary sections this likely generates random characters, but that's not any worse than the current generated names.
Diffstat (limited to 'src/decompiler-naming.h')
-rw-r--r--src/decompiler-naming.h44
1 files changed, 44 insertions, 0 deletions
diff --git a/src/decompiler-naming.h b/src/decompiler-naming.h
index 786c3ebb..41e0e3cb 100644
--- a/src/decompiler-naming.h
+++ b/src/decompiler-naming.h
@@ -110,6 +110,47 @@ void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh,
}
}
+enum {
+ // This a bit arbitrary, change at will.
+ min_content_identifier_size = 7,
+ max_content_identifier_size = 30
+};
+
+void RenameToContents(std::vector<DataSegment*>& segs, BindingHash& bh) {
+ std::string s;
+ for (auto seg : segs) {
+ s = "d_";
+ for (auto c : seg->data) {
+ if (isalnum(c) || c == '_') {
+ s += static_cast<char>(c);
+ }
+ if (s.size() >= max_content_identifier_size) {
+ // We truncate any very long names, since those make for hard to
+ // format output. They can be somewhat long though, since data segment
+ // references tend to not occur that often.
+ break;
+ }
+ }
+ if (s.size() < min_content_identifier_size) {
+ // It is useful to have a minimum, since if there few printable characters
+ // in a data section, that is probably a sign of binary, and those few
+ // characters are not going to be very significant.
+ continue;
+ }
+ // We could do the same disambiguition as RenameToIdentifier and
+ // GenerateNames do, but if we come up with a clashing name here it is
+ // likely a sign of not very meaningful binary data, so it is easier to
+ // just keep the original generated name in that case.
+ if (bh.count(s) != 0) {
+ continue;
+ }
+ // Remove original entry.
+ bh.erase(seg->name);
+ seg->name = s;
+ bh.emplace(s, Binding(static_cast<Index>(&seg - &segs[0])));
+ }
+}
+
// Function names may contain arbitrary C++ syntax, so we want to
// filter those to look like identifiers. A function name may be set
// by a name section (applied in ReadBinaryIr, called before this function)
@@ -119,6 +160,7 @@ void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh,
// this function).
// To not have to add too many decompiler-specific code into those systems
// (using a callback??) we instead rename everything here.
+// Also do data section renaming here.
void RenameAll(Module& module) {
// We also filter common C++ keywords/STL idents that make for huge
// identifiers.
@@ -144,6 +186,8 @@ void RenameAll(Module& module) {
// Also do this for some other kinds of names.
RenameToIdentifiers(module.globals, module.global_bindings, nullptr);
RenameToIdentifiers(module.tables, module.table_bindings, nullptr);
+
+ RenameToContents(module.data_segments, module.data_segment_bindings);
}
} // namespace wabt