diff options
author | Wouter van Oortmerssen <aardappel@gmail.com> | 2020-01-23 15:50:33 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-23 15:50:33 -0800 |
commit | 90cec44b193b95b71cf1ea34137cbf6e939b7144 (patch) | |
tree | 555bdbb682e7cbb3c5c4b217dc8d9c992e81ad3f /src/decompiler-naming.h | |
parent | 5926d542ee382ccf1c693921f42aec767270295b (diff) | |
download | wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.gz wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.tar.bz2 wabt-90cec44b193b95b71cf1ea34137cbf6e939b7144.zip |
wasm-decompile: name data sections with their content (#1309)
This is a fun way to give somewhat meaningful names to sections
containing strings. In the case of pure binary sections this likely
generates random characters, but that's not any worse than the
current generated names.
Diffstat (limited to 'src/decompiler-naming.h')
-rw-r--r-- | src/decompiler-naming.h | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/decompiler-naming.h b/src/decompiler-naming.h index 786c3ebb..41e0e3cb 100644 --- a/src/decompiler-naming.h +++ b/src/decompiler-naming.h @@ -110,6 +110,47 @@ void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh, } } +enum { + // This a bit arbitrary, change at will. + min_content_identifier_size = 7, + max_content_identifier_size = 30 +}; + +void RenameToContents(std::vector<DataSegment*>& segs, BindingHash& bh) { + std::string s; + for (auto seg : segs) { + s = "d_"; + for (auto c : seg->data) { + if (isalnum(c) || c == '_') { + s += static_cast<char>(c); + } + if (s.size() >= max_content_identifier_size) { + // We truncate any very long names, since those make for hard to + // format output. They can be somewhat long though, since data segment + // references tend to not occur that often. + break; + } + } + if (s.size() < min_content_identifier_size) { + // It is useful to have a minimum, since if there few printable characters + // in a data section, that is probably a sign of binary, and those few + // characters are not going to be very significant. + continue; + } + // We could do the same disambiguition as RenameToIdentifier and + // GenerateNames do, but if we come up with a clashing name here it is + // likely a sign of not very meaningful binary data, so it is easier to + // just keep the original generated name in that case. + if (bh.count(s) != 0) { + continue; + } + // Remove original entry. + bh.erase(seg->name); + seg->name = s; + bh.emplace(s, Binding(static_cast<Index>(&seg - &segs[0]))); + } +} + // Function names may contain arbitrary C++ syntax, so we want to // filter those to look like identifiers. A function name may be set // by a name section (applied in ReadBinaryIr, called before this function) @@ -119,6 +160,7 @@ void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh, // this function). // To not have to add too many decompiler-specific code into those systems // (using a callback??) we instead rename everything here. +// Also do data section renaming here. void RenameAll(Module& module) { // We also filter common C++ keywords/STL idents that make for huge // identifiers. @@ -144,6 +186,8 @@ void RenameAll(Module& module) { // Also do this for some other kinds of names. RenameToIdentifiers(module.globals, module.global_bindings, nullptr); RenameToIdentifiers(module.tables, module.table_bindings, nullptr); + + RenameToContents(module.data_segments, module.data_segment_bindings); } } // namespace wabt |