diff options
author | Alon Zakai <azakai@google.com> | 2023-10-24 14:37:52 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-24 14:37:52 -0700 |
commit | ec8220f4aa556ce39145db13eddd84855b11f76c (patch) | |
tree | d994013922dcd35e59f043df83899e6325f36102 /src | |
parent | ba04e395508fc3414b952287d7e918d20361087e (diff) | |
download | binaryen-ec8220f4aa556ce39145db13eddd84855b11f76c.tar.gz binaryen-ec8220f4aa556ce39145db13eddd84855b11f76c.tar.bz2 binaryen-ec8220f4aa556ce39145db13eddd84855b11f76c.zip |
Fix unreachable code in LocalGraph by making it imprecise there (#6048)
Followup to #6046 - the fuzzer found we missed handling the case of the entry itself
being unreachable, or of an unreachable loop later. Properly identifying unreachable
code requires a flow analysis, unfortunately, so this PR gives up on that and instead
allows LocalGraph to be imprecise in unreachable code. That avoids adding any
overhead, but does mean the IR may be slightly confusing when debugging. It does
not have any optimization downsides, however, as it only affects unreachable code.
Also add a dump() impl in that file which helps debugging.
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/LocalGraph.cpp | 28 | ||||
-rw-r--r-- | src/ir/local-graph.h | 8 | ||||
-rw-r--r-- | src/passes/Precompute.cpp | 14 |
3 files changed, 29 insertions, 21 deletions
diff --git a/src/ir/LocalGraph.cpp b/src/ir/LocalGraph.cpp index 7865bc404..beef635b1 100644 --- a/src/ir/LocalGraph.cpp +++ b/src/ir/LocalGraph.cpp @@ -31,6 +31,10 @@ struct Info { std::vector<Expression*> actions; // for each index, the last local.set for it std::unordered_map<Index, LocalSet*> lastSets; + + void dump(Function* func) { + std::cout << " info: " << actions.size() << " actions\n"; + } }; // flow helper class. flows the gets to their sets @@ -106,10 +110,6 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { auto numLocals = func->getNumLocals(); std::vector<FlowBlock*> work; - // Track if we have unreachable code anywhere, as if we do that may inhibit - // certain optimizations below. - bool hasUnreachable = false; - // Convert input blocks (basicBlocks) into more efficient flow blocks to // improve memory access. std::vector<FlowBlock> flowBlocks; @@ -120,11 +120,6 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { for (Index i = 0; i < basicBlocks.size(); ++i) { auto* block = basicBlocks[i].get(); basicToFlowMap[block] = &flowBlocks[i]; - // Check for unreachable code. Note we ignore the entry block (index 0) as - // that is always reached when we are called. - if (i != 0 && block->in.empty()) { - hasUnreachable = true; - } } // We note which local indexes have local.sets, as that can help us @@ -200,19 +195,16 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> { if (gets.empty()) { continue; } - if (!hasUnreachable && !hasSet[index]) { + if (!hasSet[index]) { // This local index has no sets, so we know all gets will end up // reaching the entry block. Do that here as an optimization to avoid // flowing through the (potentially very many) blocks in the function. // - // Note that we must check for unreachable code in this function, as - // if there is any then we would not be precise: in that case, the - // gets may either have the entry value, or no value at all. It would - // be safe to mark the entry value in that case anyhow (as it only - // matters in unreachable code), but to keep the IR consistent and to - // avoid confusion when debugging, simply do not optimize if - // there is anything unreachable (which will not happen normally, as - // DCE should run before passes that use this utility). + // Note that we may be in unreachable code, and if so, we might add + // the entry values when they are not actually relevant. That is, we + // are not precise in the case of unreachable code. This can be + // confusing when debugging, but it does not have any downside for + // optimization (since unreachable code should be removed anyhow). for (auto* get : gets) { getSetses[get].insert(nullptr); } diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h index 8cc92d0fd..fd9306d5c 100644 --- a/src/ir/local-graph.h +++ b/src/ir/local-graph.h @@ -30,6 +30,14 @@ namespace wasm { // (see the SSA pass for actually creating new local indexes based // on this). // +// Note that this is not guaranteed to be precise in unreachable code. That is, +// we do not make the effort to represent the exact sets for each get, and may +// overestimate them (specifically, we may mark the entry value as possible, +// even if unreachability prevents that; doing so helps simplify and optimize +// the code, which we think is worthwhile for the possible annoyance in +// debugging etc.; and it has no downside for optimization, since unreachable +// code will be removed anyhow). +// struct LocalGraph { // main API diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index cddf8785f..a08848810 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -442,10 +442,18 @@ private: if (getFunction()->isVar(get->index)) { auto localType = getFunction()->getLocalType(get->index); if (localType.isNonNullable()) { - Fatal() << "Non-nullable local accessing the default value in " - << getFunction()->name << " (" << get->index << ')'; + // This is a non-nullable local that seems to read the default + // value at the function entry. This is either an internal error + // or a case of unreachable code; the latter is possible as + // LocalGraph is not precise in unreachable code. + // + // We cannot set zeros here (as applying them, even in + // unreachable code, would not validate), so just mark this as + // a hopeless case to ignore. + values = {}; + } else { + curr = Literal::makeZeros(localType); } - curr = Literal::makeZeros(localType); } else { // it's a param, so it's hopeless values = {}; |