diff options
author | Alon Zakai <azakai@google.com> | 2019-07-26 13:45:48 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-26 13:45:48 -0700 |
commit | ccd95f8f5725a8d52557772b3081875babda312f (patch) | |
tree | 18ec755e0f6efc11a6835100d4f7a50b21f2ba64 /src | |
parent | 6ca3f4c80d57bd92c18f028828b889f5c74e10e9 (diff) | |
download | binaryen-ccd95f8f5725a8d52557772b3081875babda312f.tar.gz binaryen-ccd95f8f5725a8d52557772b3081875babda312f.tar.bz2 binaryen-ccd95f8f5725a8d52557772b3081875babda312f.zip |
Asyncify: whitelist and blacklist support (#2264)
The blacklist means "functions here are to be ignored and not instrumented, we can assume they never unwind." The whitelist means "only these functions, and no others, can unwind." I had hoped such lists would not be necessary, since Asyncify's overhead is much smaller than the old Asyncify and Emterpreter, but as projects have noticed, the overhead to size and speed is still significant. The lists give power users a way to reduce any unnecessary overhead.
A slightly tricky thing is escaping of names: we escape names from the names section (see #2261 #1646). The lists arrive in human-readable format, so we escape them before comparing to the internal escaped names. To enable that I refactored wasm-binary a little bit to provide the escaping logic, cc @yurydelendik
If both lists are specified, an error is shown (since that is meaningless). If a name appears in a list that is not in the module, we show a warning, which will hopefully help people debug typos etc. I had hoped to make this an error, but the problem is that due to inlining etc. a single list will not always work for both unoptimized and optimized builds (a function may vanish when optimizing, due to duplicate function elimination or inlining).
Fixes #2218.
Diffstat (limited to 'src')
-rw-r--r-- | src/passes/Asyncify.cpp | 113 | ||||
-rw-r--r-- | src/wasm-binary.h | 1 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 8 |
3 files changed, 102 insertions, 20 deletions
diff --git a/src/passes/Asyncify.cpp b/src/passes/Asyncify.cpp index 98b4425b1..7d6dd9f76 100644 --- a/src/passes/Asyncify.cpp +++ b/src/passes/Asyncify.cpp @@ -212,6 +212,24 @@ // If that is true for your codebase, then this can be extremely useful // as otherwise it looks like any indirect call can go to a lot of places. // +// --pass-arg=asyncify-blacklist@name1,name2,name3 +// +// If the blacklist is provided, then the functions in it will not be +// instrumented even if it looks like they need to. This can be useful +// if you know things the whole-program analysis doesn't, like if you +// know certain indirect calls are safe and won't unwind. But if you +// get the list wrong things will break (and in a production build user +// input might reach code paths you missed during testing, so it's hard +// to know you got this right), so this is not recommended unless you +// really know what are doing, and need to optimize every bit of speed +// and size. +// +// --pass-arg=asyncify-whitelist@name1,name2,name3 +// +// If the whitelist is provided, then only the functions in the list +// will be instrumented. Like the blacklist, getting this wrong will +// break your application. +// #include "ir/effects.h" #include "ir/literal-utils.h" @@ -327,9 +345,15 @@ class ModuleAnalyzer { public: ModuleAnalyzer(Module& module, std::function<bool(Name, Name)> canImportChangeState, - bool canIndirectChangeState) + bool canIndirectChangeState, + const String::Split& blacklistInput, + const String::Split& whitelistInput) : module(module), canIndirectChangeState(canIndirectChangeState), globals(module) { + + blacklist.insert(blacklistInput.begin(), blacklistInput.end()); + whitelist.insert(whitelistInput.begin(), whitelistInput.end()); + // Scan to see which functions can directly change the state. // Also handle the asyncify imports, removing them (as we will implement // them later), and replace calls to them with calls to the later proper @@ -388,11 +412,13 @@ public: } Info* info; Module* module; + ModuleAnalyzer* analyzer; bool canIndirectChangeState; }; Walker walker; walker.info = &info; walker.module = &module; + walker.analyzer = this; walker.canIndirectChangeState = canIndirectChangeState; walker.walk(func->body); @@ -406,6 +432,13 @@ public: map.swap(scanner.map); + // Functions in the blacklist are assumed to not change the state. + for (auto& name : blacklist) { + if (auto* func = module.getFunctionOrNull(name)) { + map[func].canChangeState = false; + } + } + // Remove the asyncify imports, if any. for (auto& pair : map) { auto* func = pair.first; @@ -435,12 +468,22 @@ public: while (!work.empty()) { auto* func = work.pop(); for (auto* caller : map[func].calledBy) { - if (!map[caller].canChangeState && !map[caller].isBottomMostRuntime) { + if (!map[caller].canChangeState && !map[caller].isBottomMostRuntime && + !blacklist.count(caller->name)) { map[caller].canChangeState = true; work.push(caller); } } } + + if (!whitelist.empty()) { + // Only the functions in the whitelist can change the state. + for (auto& func : module.functions) { + if (!func->imported()) { + map[func.get()].canChangeState = whitelist.count(func->name) > 0; + } + } + } } bool needsInstrumentation(Function* func) { @@ -481,6 +524,7 @@ public: // TODO optimize the other case, at least by type } Module* module; + ModuleAnalyzer* analyzer; Map* map; bool canIndirectChangeState; bool canChangeState = false; @@ -488,6 +532,7 @@ public: }; Walker walker; walker.module = &module; + walker.analyzer = this; walker.map = ↦ walker.canIndirectChangeState = canIndirectChangeState; walker.walk(curr); @@ -498,6 +543,8 @@ public: } GlobalHelper globals; + std::set<Name> blacklist; + std::set<Name> whitelist; }; // Checks if something performs a call: either a direct or indirect call, @@ -987,23 +1034,57 @@ struct Asyncify : public Pass { String::Split listedImports(stateChangingImports, ","); auto ignoreIndirect = runner->options.getArgumentOrDefault("asyncify-ignore-indirect", ""); + String::Split blacklist( + runner->options.getArgumentOrDefault("asyncify-blacklist", ""), ","); + String::Split whitelist( + runner->options.getArgumentOrDefault("asyncify-whitelist", ""), ","); + + // The lists contain human-readable strings. Turn them into the internal + // escaped names for later comparisons + auto processList = [module](String::Split& list, const std::string& which) { + for (auto& name : list) { + auto escaped = WasmBinaryBuilder::escape(name); + auto* func = module->getFunctionOrNull(escaped); + if (!func) { + std::cerr << "warning: Asyncify " << which + << "list contained a non-existing function name: " << name + << " (" << escaped << ")\n"; + } else if (func->imported()) { + Fatal() << "Asyncify " << which + << "list contained an imported function name (use the import " + "list for imports): " + << name << '\n'; + } + name = escaped.str; + } + }; + processList(blacklist, "black"); + processList(whitelist, "white"); + + if (!blacklist.empty() && !whitelist.empty()) { + Fatal() << "It makes no sense to use both a blacklist and a whitelist " + "with asyncify."; + } + + auto canImportChangeState = [&](Name module, Name base) { + if (allImportsCanChangeState) { + return true; + } + std::string full = std::string(module.str) + '.' + base.str; + for (auto& listedImport : listedImports) { + if (String::wildcardMatch(listedImport, full)) { + return true; + } + } + return false; + }; // Scan the module. ModuleAnalyzer analyzer(*module, - [&](Name module, Name base) { - if (allImportsCanChangeState) { - return true; - } - std::string full = - std::string(module.str) + '.' + base.str; - for (auto& listedImport : listedImports) { - if (String::wildcardMatch(listedImport, full)) { - return true; - } - } - return false; - }, - ignoreIndirect == ""); + canImportChangeState, + ignoreIndirect == "", + blacklist, + whitelist); // Add necessary globals before we emit code to use them. addGlobals(module); diff --git a/src/wasm-binary.h b/src/wasm-binary.h index e1eb5733c..f58a10cf7 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1174,6 +1174,7 @@ public: void readEvents(); + static Name escape(Name name); void readNames(size_t); void readFeatures(size_t); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 3d9a1ba9b..039cef5e5 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2054,13 +2054,13 @@ static char formatNibble(int nibble) { return nibble < 10 ? '0' + nibble : 'a' - 10 + nibble; } -static void escapeName(Name& name) { +Name WasmBinaryBuilder::escape(Name name) { bool allIdChars = true; for (const char* p = name.str; allIdChars && *p; p++) { allIdChars = isIdChar(*p); } if (allIdChars) { - return; + return name; } // encode name, if at least one non-idchar (per WebAssembly spec) was found std::string escaped; @@ -2075,7 +2075,7 @@ static void escapeName(Name& name) { escaped.push_back(formatNibble(ch >> 4)); escaped.push_back(formatNibble(ch & 15)); } - name = escaped; + return escaped; } void WasmBinaryBuilder::readNames(size_t payloadLen) { @@ -2098,7 +2098,7 @@ void WasmBinaryBuilder::readNames(size_t payloadLen) { for (size_t i = 0; i < num; i++) { auto index = getU32LEB(); auto rawName = getInlineString(); - escapeName(rawName); + rawName = escape(rawName); auto name = rawName; // De-duplicate names by appending .1, .2, etc. for (int i = 1; !usedNames.insert(name).second; ++i) { |