diff options
author | Alon Zakai <azakai@google.com> | 2024-07-25 11:16:45 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-25 11:16:45 -0700 |
commit | 9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c (patch) | |
tree | 632a9c57123fc25374d981acb2cb40600e3ea4f0 /scripts/benchmarking/bench.js | |
parent | d903dd30f6426b8eb07605cae01baf4158364e2d (diff) | |
download | binaryen-9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c.tar.gz binaryen-9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c.tar.bz2 binaryen-9cc1cb1ca66e89cbe7b7b5b52897f3bee3ee422c.zip |
Cost analysis: Remove "Unacceptable" hack (#6782)
We marked various expressions as having cost "Unacceptable", fixed at 100, to
ensure we never moved them out from an If arm, etc. Giving them such a high
cost avoids that problem - the cost is higher than the limit we have for moving
code from conditional to unconditional execution - but it also means the total
cost is unrealistic. For example, a function with one such instruction + an add
(cost 1) would end up with cost 101, and removing the add would look
insignificant, which causes issues for things that want to compare costs
(like Monomorphization).
To fix this, adjust some costs. The main change here is to give casts a cost of 5.
I measured this in depth, see the attached benchmark scripts, and it looks
clear that in both V8 and SpiderMonkey the cost of a cast is high enough to
make it not worth turning an if with ref.test arm into a select (which would
always execute the test).
Other costs adjusted here matter a lot less, because they are on operations
that have side effects and so the optimizer will anyhow not move them from
conditional to unconditional execution, but I tried to make them a bit more
realistic while I was removing "Unacceptable":
* Give most atomic operations the 10 cost we've been using for atomic loads/
stores. Perhaps wait and notify should be slower, however, but it seems like
assuming fast switching might be more relevant.
* Give growth operations a cost of 20, and throw operations a cost of 10. These
numbers are entirely made up as I am not even sure how to measure them in
a useful way (but, again, this should not matter much as they have side
effects).
Diffstat (limited to 'scripts/benchmarking/bench.js')
-rw-r--r-- | scripts/benchmarking/bench.js | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/scripts/benchmarking/bench.js b/scripts/benchmarking/bench.js new file mode 100644 index 000000000..d1284a241 --- /dev/null +++ b/scripts/benchmarking/bench.js @@ -0,0 +1,159 @@ + +// Benchmarking script. This runs on compiled bench.wat and prints out timings. +// +// Usage: +// +// * wasm-opt scripts/benchmarking/bench.wat -all --inline-functions-with-loops --always-inline-max-function-size=1000 --inlining --precompute-propagate --optimize-instructions --inlining --simplify-locals --coalesce-locals --vacuum --remove-unused-module-elements -o bench.wasm -g +// * Inspect the optimized wasm to see that inlining etc. worked properly +// (we rely on inlining to let us write bench.wat in a short/simple form, and +// we use very specific optimizations in order to not optimize away the +// differences we care about). +// * d8 bench.js -- bench.wasm +// etc. +// + +// Shell integration. +if (typeof console === 'undefined') { + console = { log: print }; +} +var tempRet0; +var binary; +if (typeof process === 'object' && typeof require === 'function' /* node.js detection */) { + var args = process.argv.slice(2); + binary = require('fs').readFileSync(args[0]); + if (!binary.buffer) binary = new Uint8Array(binary); +} else { + var args; + if (typeof scriptArgs != 'undefined') { + args = scriptArgs; + } else if (typeof arguments != 'undefined') { + args = arguments; + } + if (typeof readbuffer === 'function') { + binary = new Uint8Array(readbuffer(args[0])); + } else { + binary = read(args[0], 'binary'); + } +} + +// Create the wasm. +const module = new WebAssembly.Module(binary); +const instance = new WebAssembly.Instance(module, {}); +const exports = instance.exports; + +// Create the benchmarkers. +function makeBenchmarker(name) { + return { + name: name, + func: exports[name], + time: 0, + sum: 0, + iters: 0, + }; +} + +const benchmarkers = [ + makeBenchmarker('len'), + makeBenchmarker('and'), + makeBenchmarker('iff-both'), + makeBenchmarker('or'), + makeBenchmarker('iff-either'), + makeBenchmarker('select'), + makeBenchmarker('iff-nextor'), + makeBenchmarker('select-three'), + makeBenchmarker('iff-three'), +]; + +// We'll call the benchmark functions in random orders. Random orders avoid any +// interaction between the benchmarks from causing bias in the results. +// +// An alternative to randomly ordering the benchmarks in each iteration would be +// to fully benchmark one, then do the next, so there are large amounts of time +// between them, but that also allows them to become more like microbenchmarks +// where the branch predictor etc. might display very favorable behavior. +// Interleaving them makes things slightly more realistic. +// +// If we have too many benchmarks then eventually computing all orders ahead of +// time will not work, but so long as we can, it is faster this way rather than +// to compute random orders on the fly as we go. +function makeOrders(prefix) { + // Given a prefix of an order, like [] or [0, 3], return all the possible + // orders beginning with that prefix. + + // We cannot repeat anything already seen. + const seen = new Set(); + for (var x of prefix) { + seen.add(x); + } + + // Starting from the prefix, extend it by one item in all valid ways. + const extensions = []; + for (var i = 0; i < benchmarkers.length; i++) { + if (!seen.has(i)) { + extensions.push(prefix.concat(i)); + } + } + + if (prefix.length == benchmarkers.length - 1) { + // The extensions are complete orders; stop the recursion. + return extensions; + } + + // Recursively generate the full orders. + const ret = []; + for (var extension of extensions) { + for (var order of makeOrders(extension)) { + ret.push(order); + } + } + return ret; +} + +const orders = makeOrders([]); + +// Params. +const M = 10000000; +const N = 100; + +console.log('iters :', M); +console.log('list len :', N); +console.log('benchmarkers:', benchmarkers.length); +console.log('orderings :', orders.length); + +// Create a long linked list of objects of both type $A and $B. +var list = null; +for (var i = 0; i < N; i++) { + list = Math.random() < 0.5 ? exports.makeA(list) : exports.makeB(list); +} + +console.log('benchmarking...'); + +// Call the benchmark functions. + +for (var i = 0; i < M; i++) { + const order = orders[Math.floor(Math.random() * orders.length)]; + for (var k = 0; k < benchmarkers.length; k++) { + const benchmarker = benchmarkers[order[k]]; + const start = performance.now(); + const result = benchmarker.func(list); + benchmarker.time += performance.now() - start; + benchmarker.sum += result; + benchmarker.iters++; + } +} + +for (var benchmarker of benchmarkers) { + if (benchmarker.iters != M) { + throw 'wat'; + } +} + +console.log(); +for (var benchmarker of benchmarkers) { + console.log(`${benchmarker.name} time: \t${benchmarker.time}`) +} +console.log(); +for (var benchmarker of benchmarkers) { + console.log(`${benchmarker.name} mean sum: \t${benchmarker.sum / M}`) +} + |