diff options
author | Thomas Lively <tlively@google.com> | 2023-05-12 11:27:29 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-12 09:27:29 -0700 |
commit | 64e5a99ee014af3a138a36f5a87addfacfc95f0c (patch) | |
tree | a322095dfe09f1b361964682f56bc5c6f963e46a /src | |
parent | 7d5d24f400019ff023b65ccdb3c1d8d07ebb00a5 (diff) | |
download | binaryen-64e5a99ee014af3a138a36f5a87addfacfc95f0c.tar.gz binaryen-64e5a99ee014af3a138a36f5a87addfacfc95f0c.tar.bz2 binaryen-64e5a99ee014af3a138a36f5a87addfacfc95f0c.zip |
[analysis] Add a new iterable CFG utility (#5712)
Add a new "analysis" source directory that will contain the source for a new
static program analysis framework. To start the framework, add a CFG utility
that provides convenient iterators for iterating through the basic blocks of the
CFG as well as the predecessors, successors, and contents of each block.
The new CFGs are constructed using the existing CFGWalker, but they are
different in that the new utility is meant to provide a usable representation of
a CFG whereas CFGWalker is meant to allow collecting arbitrary information about
each basic block in a CFG.
For testing and debugging purposes, add `print` methods to CFGs and basic
blocks. This requires exposing the ability to print expression contents
excluding children, which was something we previously did only for StackIR.
Also add a new gtest file with a test for constructing and printing a CFG. The
test reveals some strange properties of the current CFG construction, including
empty blocks and strange placement of `loop` instructions, but fixing these
problems is left as future work.
Diffstat (limited to 'src')
-rw-r--r-- | src/analysis/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/analysis/cfg-impl.h | 140 | ||||
-rw-r--r-- | src/analysis/cfg.cpp | 106 | ||||
-rw-r--r-- | src/analysis/cfg.h | 79 | ||||
-rw-r--r-- | src/passes/Print.cpp | 11 | ||||
-rw-r--r-- | src/wasm.h | 9 |
6 files changed, 351 insertions, 0 deletions
diff --git a/src/analysis/CMakeLists.txt b/src/analysis/CMakeLists.txt new file mode 100644 index 000000000..ea0494859 --- /dev/null +++ b/src/analysis/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB analysis_HEADERS *.h) +set(analysis_SOURCES + cfg.cpp + ${analysis_HEADERS} +) +add_library(analysis OBJECT ${analysis_SOURCES}) diff --git a/src/analysis/cfg-impl.h b/src/analysis/cfg-impl.h new file mode 100644 index 000000000..cf348fa09 --- /dev/null +++ b/src/analysis/cfg-impl.h @@ -0,0 +1,140 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_analysis_cfg_impl_h +#define wasm_analysis_cfg_impl_h + +#include "cfg.h" + +namespace wasm::analysis { + +// An iterator over a sequence of contiguous pointers (represented as a pointer +// to a pointer in the sequence) that dereferences the pointed-to pointer. +// TODO: Move this to its own public header if there is ever another use for it. +template<typename T> struct _indirect_ptr_iterator { + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using different_type = off_t; + using reference = const T&; + using pointer = const T*; + + const T* const* ptr; + + const T& operator*() const { return **ptr; } + + const T& operator[](int n) const { return **(ptr + n); } + + _indirect_ptr_iterator& operator+=(int n) { + ptr += n; + return *this; + } + + _indirect_ptr_iterator& operator-=(int n) { + ptr -= n; + return *this; + } + + _indirect_ptr_iterator& operator++() { return *this += 1; } + + _indirect_ptr_iterator operator++(int) { + _indirect_ptr_iterator it = *this; + ++(*this); + return it; + } + + _indirect_ptr_iterator& operator--() { return *this -= 1; } + + _indirect_ptr_iterator operator--(int) { + _indirect_ptr_iterator it = *this; + --(*this); + return it; + } + + _indirect_ptr_iterator operator+(int n) const { + _indirect_ptr_iterator it = *this; + it += n; + return it; + } + + _indirect_ptr_iterator operator-(int n) const { + _indirect_ptr_iterator it = *this; + it -= n; + return it; + } + + bool operator==(const _indirect_ptr_iterator& other) const { + return ptr == other.ptr; + } + + bool operator!=(const _indirect_ptr_iterator& other) const { + return !(*this == other); + } + + bool operator<(const _indirect_ptr_iterator& other) const { + return ptr < other.ptr; + } + + bool operator>(const _indirect_ptr_iterator& other) const { + return ptr > other.ptr; + } + + bool operator<=(const _indirect_ptr_iterator& other) const { + return ptr <= other.ptr; + } + + bool operator>=(const _indirect_ptr_iterator& other) const { + return ptr >= other.ptr; + } +}; + +template<typename T> +_indirect_ptr_iterator<T> operator+(int n, + const _indirect_ptr_iterator<T>& it) { + return it + n; +} + +// Wraps a vector of pointers and provides dereferencing iterators for it. +template<typename T> struct _indirect_ptr_vec { + using iterator = _indirect_ptr_iterator<T>; + + const std::vector<T*>& vec; + + _indirect_ptr_vec(const std::vector<T*>& vec) : vec(vec) {} + + iterator begin() const { return {&vec.data()[0]}; } + iterator end() const { return {&vec.data()[vec.size()]}; } +}; + +struct BasicBlock::Predecessors : _indirect_ptr_vec<BasicBlock> { + Predecessors(const BasicBlock& block) + : _indirect_ptr_vec(block.predecessors) {} +}; + +struct BasicBlock::Successors : _indirect_ptr_vec<BasicBlock> { + Successors(const BasicBlock& block) : _indirect_ptr_vec(block.successors) {} +}; + +inline BasicBlock::Predecessors BasicBlock::preds() const { + return Predecessors(*this); +} + +inline BasicBlock::Successors BasicBlock::succs() const { + return Successors(*this); +} + +} // namespace wasm::analysis + +#endif // wasm_analysis_cfg_impl_h diff --git a/src/analysis/cfg.cpp b/src/analysis/cfg.cpp new file mode 100644 index 000000000..7d770776d --- /dev/null +++ b/src/analysis/cfg.cpp @@ -0,0 +1,106 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unordered_map> + +#include "analysis/cfg.h" +#include "cfg/cfg-traversal.h" +#include "wasm-stack.h" + +namespace wasm::analysis { + +CFG CFG::fromFunction(Function* func) { + struct CFGBuilder : CFGWalker<CFGBuilder, + UnifiedExpressionVisitor<CFGBuilder>, + std::vector<Expression*>> { + void visitExpression(Expression* curr) { + if (currBasicBlock) { + currBasicBlock->contents.push_back(curr); + } + } + }; + + CFGBuilder builder; + builder.walkFunction(func); + + size_t numBlocks = builder.basicBlocks.size(); + + CFG cfg; + cfg.blocks = std::vector<BasicBlock>(numBlocks); + + // From here the addresses of the new basic blocks are stable. + std::unordered_map<CFGBuilder::BasicBlock*, BasicBlock*> oldToNewBlocks; + for (size_t i = 0; i < numBlocks; ++i) { + oldToNewBlocks[builder.basicBlocks[i].get()] = &cfg.blocks[i]; + } + + for (size_t i = 0; i < numBlocks; ++i) { + auto& oldBlock = *builder.basicBlocks[i]; + auto& newBlock = cfg.blocks[i]; + newBlock.index = i; + newBlock.insts = std::move(oldBlock.contents); + newBlock.predecessors.reserve(oldBlock.in.size()); + for (auto* oldPred : oldBlock.in) { + newBlock.predecessors.push_back(oldToNewBlocks.at(oldPred)); + } + newBlock.successors.reserve(oldBlock.out.size()); + for (auto* oldSucc : oldBlock.out) { + newBlock.successors.push_back(oldToNewBlocks.at(oldSucc)); + } + } + + // Move-construct a new CFG to get mandatory copy elision, preserving basic + // block addresses through the return. + return CFG(std::move(cfg)); +} + +void CFG::print(std::ostream& os, Module* wasm) const { + size_t start = 0; + for (auto& block : *this) { + if (&block != &*begin()) { + os << '\n'; + } + block.print(os, wasm, start); + start += block.size(); + } +} + +void BasicBlock::print(std::ostream& os, Module* wasm, size_t start) const { + os << ";; preds: ["; + for (auto& pred : preds()) { + if (&pred != &*preds().begin()) { + os << ", "; + } + os << pred.index; + } + os << "], succs: ["; + + for (auto& succ : succs()) { + if (&succ != &*succs().begin()) { + os << ", "; + } + os << succ.index; + } + os << "]\n"; + + os << index << ":\n"; + size_t instIndex = start; + for (auto* inst : *this) { + os << " " << instIndex++ << ": " << ShallowExpression{inst, wasm} << '\n'; + } +} + +} // namespace wasm::analysis diff --git a/src/analysis/cfg.h b/src/analysis/cfg.h new file mode 100644 index 000000000..3b980f410 --- /dev/null +++ b/src/analysis/cfg.h @@ -0,0 +1,79 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// A generic CFG / basic block utility. Unlike the utilities in src/cfg/, this +// is a generic representation of a CFG rather than a generic builder of +// CFG-like objects. It lives here in src/analysis/ because it is primarily +// meant for use in the static analysis framework. Other Binaryen code will find +// it more idiomatic to use the utilities in src/cfg/. + +#ifndef wasm_analysis_cfg_h +#define wasm_analysis_cfg_h + +#include <iostream> +#include <vector> + +#include "wasm.h" + +namespace wasm::analysis { + +struct BasicBlock; + +struct CFG { + // Iterate through basic blocks. + using iterator = std::vector<BasicBlock>::const_iterator; + iterator begin() const { return blocks.cbegin(); } + iterator end() const { return blocks.cend(); } + size_t size() const { return blocks.size(); } + + static CFG fromFunction(Function* func); + + void print(std::ostream& os, Module* wasm = nullptr) const; + +private: + std::vector<BasicBlock> blocks; + friend BasicBlock; +}; + +struct BasicBlock { + using iterator = std::vector<Expression*>::const_iterator; + + // Iterate through instructions. + iterator begin() const { return insts.cbegin(); } + iterator end() const { return insts.cend(); } + size_t size() const { return insts.size(); } + + // Iterables for predecessor and successor blocks. + struct Predecessors; + struct Successors; + Predecessors preds() const; + Successors succs() const; + + void print(std::ostream& os, Module* wasm = nullptr, size_t start = 0) const; + +private: + Index index; + std::vector<Expression*> insts; + std::vector<BasicBlock*> predecessors; + std::vector<BasicBlock*> successors; + friend CFG; +}; + +} // namespace wasm::analysis + +#include "cfg-impl.h" + +#endif // wasm_analysis_cfg_h diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 76fd53759..0bddd338b 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -3776,6 +3776,17 @@ std::ostream& operator<<(std::ostream& o, wasm::ModuleExpression pair) { return wasm::printExpression(pair.second, o, false, false, &pair.first); } +std::ostream& operator<<(std::ostream& o, wasm::ShallowExpression expression) { + if (expression.module) { + wasm::PrintExpressionContents printer(expression.module, nullptr, o); + printer.visit(expression.expr); + } else { + wasm::PrintExpressionContents printer(nullptr, o); + printer.visit(expression.expr); + } + return o; +} + std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst) { return wasm::printStackInst(&inst, o); } diff --git a/src/wasm.h b/src/wasm.h index 474497847..727c9ca30 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2308,8 +2308,16 @@ public: void clearDebugInfo(); }; +// Utility for printing an expression with named types. using ModuleExpression = std::pair<Module&, Expression*>; +// Utility for printing only the top level of an expression. Named types will be +// used if `module` is non-null. +struct ShallowExpression { + Expression* expr; + Module* module = nullptr; +}; + } // namespace wasm namespace std { @@ -2322,6 +2330,7 @@ template<> struct hash<wasm::Address> { std::ostream& operator<<(std::ostream& o, wasm::Module& module); std::ostream& operator<<(std::ostream& o, wasm::Expression& expression); std::ostream& operator<<(std::ostream& o, wasm::ModuleExpression pair); +std::ostream& operator<<(std::ostream& o, wasm::ShallowExpression expression); std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst); std::ostream& operator<<(std::ostream& o, wasm::StackIR& ir); |