diff options
-rw-r--r-- | CMakeLists.txt | 8 | ||||
-rw-r--r-- | src/analysis/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/analysis/cfg-impl.h | 140 | ||||
-rw-r--r-- | src/analysis/cfg.cpp | 106 | ||||
-rw-r--r-- | src/analysis/cfg.h | 79 | ||||
-rw-r--r-- | src/passes/Print.cpp | 11 | ||||
-rw-r--r-- | src/wasm.h | 9 | ||||
-rw-r--r-- | test/gtest/CMakeLists.txt | 1 | ||||
-rw-r--r-- | test/gtest/cfg.cpp | 84 |
9 files changed, 441 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 27d9744b2..699a765ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -359,6 +359,7 @@ add_subdirectory(src/emscripten-optimizer) add_subdirectory(src/passes) add_subdirectory(src/support) add_subdirectory(src/wasm) +add_subdirectory(src/analysis) if(BUILD_TOOLS) # Build binaryen tools @@ -383,7 +384,8 @@ set(binaryen_objs $<TARGET_OBJECTS:emscripten-optimizer> $<TARGET_OBJECTS:ir> $<TARGET_OBJECTS:cfg> - $<TARGET_OBJECTS:support>) + $<TARGET_OBJECTS:support> + $<TARGET_OBJECTS:analysis>) if(BUILD_LLVM_DWARF) SET(binaryen_objs ${binaryen_objs} $<TARGET_OBJECTS:llvm_dwarf>) @@ -430,7 +432,7 @@ if(EMSCRIPTEN) # binaryen.js WebAssembly variant add_executable(binaryen_wasm ${binaryen_emscripten_SOURCES}) - target_link_libraries(binaryen_wasm wasm asmjs emscripten-optimizer passes ir cfg support wasm) + target_link_libraries(binaryen_wasm wasm asmjs emscripten-optimizer passes ir cfg support analysis wasm) target_link_libraries(binaryen_wasm "-sFILESYSTEM") target_link_libraries(binaryen_wasm "-sEXPORT_NAME=Binaryen") target_link_libraries(binaryen_wasm "-sNODERAWFS=0") @@ -451,7 +453,7 @@ if(EMSCRIPTEN) # binaryen.js JavaScript variant add_executable(binaryen_js ${binaryen_emscripten_SOURCES}) - target_link_libraries(binaryen_js wasm asmjs emscripten-optimizer passes ir cfg support wasm) + target_link_libraries(binaryen_js wasm asmjs emscripten-optimizer passes ir cfg support analysis wasm) target_link_libraries(binaryen_js "-sWASM=0") target_link_libraries(binaryen_js "-sWASM_ASYNC_COMPILATION=0") if(${CMAKE_CXX_COMPILER_VERSION} STREQUAL "6.0.1") diff --git a/src/analysis/CMakeLists.txt b/src/analysis/CMakeLists.txt new file mode 100644 index 000000000..ea0494859 --- /dev/null +++ b/src/analysis/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB analysis_HEADERS *.h) +set(analysis_SOURCES + cfg.cpp + ${analysis_HEADERS} +) +add_library(analysis OBJECT ${analysis_SOURCES}) diff --git a/src/analysis/cfg-impl.h b/src/analysis/cfg-impl.h new file mode 100644 index 000000000..cf348fa09 --- /dev/null +++ b/src/analysis/cfg-impl.h @@ -0,0 +1,140 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_analysis_cfg_impl_h +#define wasm_analysis_cfg_impl_h + +#include "cfg.h" + +namespace wasm::analysis { + +// An iterator over a sequence of contiguous pointers (represented as a pointer +// to a pointer in the sequence) that dereferences the pointed-to pointer. +// TODO: Move this to its own public header if there is ever another use for it. +template<typename T> struct _indirect_ptr_iterator { + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using different_type = off_t; + using reference = const T&; + using pointer = const T*; + + const T* const* ptr; + + const T& operator*() const { return **ptr; } + + const T& operator[](int n) const { return **(ptr + n); } + + _indirect_ptr_iterator& operator+=(int n) { + ptr += n; + return *this; + } + + _indirect_ptr_iterator& operator-=(int n) { + ptr -= n; + return *this; + } + + _indirect_ptr_iterator& operator++() { return *this += 1; } + + _indirect_ptr_iterator operator++(int) { + _indirect_ptr_iterator it = *this; + ++(*this); + return it; + } + + _indirect_ptr_iterator& operator--() { return *this -= 1; } + + _indirect_ptr_iterator operator--(int) { + _indirect_ptr_iterator it = *this; + --(*this); + return it; + } + + _indirect_ptr_iterator operator+(int n) const { + _indirect_ptr_iterator it = *this; + it += n; + return it; + } + + _indirect_ptr_iterator operator-(int n) const { + _indirect_ptr_iterator it = *this; + it -= n; + return it; + } + + bool operator==(const _indirect_ptr_iterator& other) const { + return ptr == other.ptr; + } + + bool operator!=(const _indirect_ptr_iterator& other) const { + return !(*this == other); + } + + bool operator<(const _indirect_ptr_iterator& other) const { + return ptr < other.ptr; + } + + bool operator>(const _indirect_ptr_iterator& other) const { + return ptr > other.ptr; + } + + bool operator<=(const _indirect_ptr_iterator& other) const { + return ptr <= other.ptr; + } + + bool operator>=(const _indirect_ptr_iterator& other) const { + return ptr >= other.ptr; + } +}; + +template<typename T> +_indirect_ptr_iterator<T> operator+(int n, + const _indirect_ptr_iterator<T>& it) { + return it + n; +} + +// Wraps a vector of pointers and provides dereferencing iterators for it. +template<typename T> struct _indirect_ptr_vec { + using iterator = _indirect_ptr_iterator<T>; + + const std::vector<T*>& vec; + + _indirect_ptr_vec(const std::vector<T*>& vec) : vec(vec) {} + + iterator begin() const { return {&vec.data()[0]}; } + iterator end() const { return {&vec.data()[vec.size()]}; } +}; + +struct BasicBlock::Predecessors : _indirect_ptr_vec<BasicBlock> { + Predecessors(const BasicBlock& block) + : _indirect_ptr_vec(block.predecessors) {} +}; + +struct BasicBlock::Successors : _indirect_ptr_vec<BasicBlock> { + Successors(const BasicBlock& block) : _indirect_ptr_vec(block.successors) {} +}; + +inline BasicBlock::Predecessors BasicBlock::preds() const { + return Predecessors(*this); +} + +inline BasicBlock::Successors BasicBlock::succs() const { + return Successors(*this); +} + +} // namespace wasm::analysis + +#endif // wasm_analysis_cfg_impl_h diff --git a/src/analysis/cfg.cpp b/src/analysis/cfg.cpp new file mode 100644 index 000000000..7d770776d --- /dev/null +++ b/src/analysis/cfg.cpp @@ -0,0 +1,106 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unordered_map> + +#include "analysis/cfg.h" +#include "cfg/cfg-traversal.h" +#include "wasm-stack.h" + +namespace wasm::analysis { + +CFG CFG::fromFunction(Function* func) { + struct CFGBuilder : CFGWalker<CFGBuilder, + UnifiedExpressionVisitor<CFGBuilder>, + std::vector<Expression*>> { + void visitExpression(Expression* curr) { + if (currBasicBlock) { + currBasicBlock->contents.push_back(curr); + } + } + }; + + CFGBuilder builder; + builder.walkFunction(func); + + size_t numBlocks = builder.basicBlocks.size(); + + CFG cfg; + cfg.blocks = std::vector<BasicBlock>(numBlocks); + + // From here the addresses of the new basic blocks are stable. + std::unordered_map<CFGBuilder::BasicBlock*, BasicBlock*> oldToNewBlocks; + for (size_t i = 0; i < numBlocks; ++i) { + oldToNewBlocks[builder.basicBlocks[i].get()] = &cfg.blocks[i]; + } + + for (size_t i = 0; i < numBlocks; ++i) { + auto& oldBlock = *builder.basicBlocks[i]; + auto& newBlock = cfg.blocks[i]; + newBlock.index = i; + newBlock.insts = std::move(oldBlock.contents); + newBlock.predecessors.reserve(oldBlock.in.size()); + for (auto* oldPred : oldBlock.in) { + newBlock.predecessors.push_back(oldToNewBlocks.at(oldPred)); + } + newBlock.successors.reserve(oldBlock.out.size()); + for (auto* oldSucc : oldBlock.out) { + newBlock.successors.push_back(oldToNewBlocks.at(oldSucc)); + } + } + + // Move-construct a new CFG to get mandatory copy elision, preserving basic + // block addresses through the return. + return CFG(std::move(cfg)); +} + +void CFG::print(std::ostream& os, Module* wasm) const { + size_t start = 0; + for (auto& block : *this) { + if (&block != &*begin()) { + os << '\n'; + } + block.print(os, wasm, start); + start += block.size(); + } +} + +void BasicBlock::print(std::ostream& os, Module* wasm, size_t start) const { + os << ";; preds: ["; + for (auto& pred : preds()) { + if (&pred != &*preds().begin()) { + os << ", "; + } + os << pred.index; + } + os << "], succs: ["; + + for (auto& succ : succs()) { + if (&succ != &*succs().begin()) { + os << ", "; + } + os << succ.index; + } + os << "]\n"; + + os << index << ":\n"; + size_t instIndex = start; + for (auto* inst : *this) { + os << " " << instIndex++ << ": " << ShallowExpression{inst, wasm} << '\n'; + } +} + +} // namespace wasm::analysis diff --git a/src/analysis/cfg.h b/src/analysis/cfg.h new file mode 100644 index 000000000..3b980f410 --- /dev/null +++ b/src/analysis/cfg.h @@ -0,0 +1,79 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// A generic CFG / basic block utility. Unlike the utilities in src/cfg/, this +// is a generic representation of a CFG rather than a generic builder of +// CFG-like objects. It lives here in src/analysis/ because it is primarily +// meant for use in the static analysis framework. Other Binaryen code will find +// it more idiomatic to use the utilities in src/cfg/. + +#ifndef wasm_analysis_cfg_h +#define wasm_analysis_cfg_h + +#include <iostream> +#include <vector> + +#include "wasm.h" + +namespace wasm::analysis { + +struct BasicBlock; + +struct CFG { + // Iterate through basic blocks. + using iterator = std::vector<BasicBlock>::const_iterator; + iterator begin() const { return blocks.cbegin(); } + iterator end() const { return blocks.cend(); } + size_t size() const { return blocks.size(); } + + static CFG fromFunction(Function* func); + + void print(std::ostream& os, Module* wasm = nullptr) const; + +private: + std::vector<BasicBlock> blocks; + friend BasicBlock; +}; + +struct BasicBlock { + using iterator = std::vector<Expression*>::const_iterator; + + // Iterate through instructions. + iterator begin() const { return insts.cbegin(); } + iterator end() const { return insts.cend(); } + size_t size() const { return insts.size(); } + + // Iterables for predecessor and successor blocks. + struct Predecessors; + struct Successors; + Predecessors preds() const; + Successors succs() const; + + void print(std::ostream& os, Module* wasm = nullptr, size_t start = 0) const; + +private: + Index index; + std::vector<Expression*> insts; + std::vector<BasicBlock*> predecessors; + std::vector<BasicBlock*> successors; + friend CFG; +}; + +} // namespace wasm::analysis + +#include "cfg-impl.h" + +#endif // wasm_analysis_cfg_h diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 76fd53759..0bddd338b 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -3776,6 +3776,17 @@ std::ostream& operator<<(std::ostream& o, wasm::ModuleExpression pair) { return wasm::printExpression(pair.second, o, false, false, &pair.first); } +std::ostream& operator<<(std::ostream& o, wasm::ShallowExpression expression) { + if (expression.module) { + wasm::PrintExpressionContents printer(expression.module, nullptr, o); + printer.visit(expression.expr); + } else { + wasm::PrintExpressionContents printer(nullptr, o); + printer.visit(expression.expr); + } + return o; +} + std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst) { return wasm::printStackInst(&inst, o); } diff --git a/src/wasm.h b/src/wasm.h index 474497847..727c9ca30 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2308,8 +2308,16 @@ public: void clearDebugInfo(); }; +// Utility for printing an expression with named types. using ModuleExpression = std::pair<Module&, Expression*>; +// Utility for printing only the top level of an expression. Named types will be +// used if `module` is non-null. +struct ShallowExpression { + Expression* expr; + Module* module = nullptr; +}; + } // namespace wasm namespace std { @@ -2322,6 +2330,7 @@ template<> struct hash<wasm::Address> { std::ostream& operator<<(std::ostream& o, wasm::Module& module); std::ostream& operator<<(std::ostream& o, wasm::Expression& expression); std::ostream& operator<<(std::ostream& o, wasm::ModuleExpression pair); +std::ostream& operator<<(std::ostream& o, wasm::ShallowExpression expression); std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst); std::ostream& operator<<(std::ostream& o, wasm::StackIR& ir); diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 21b7ad4ad..27b7be621 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -2,6 +2,7 @@ include_directories(../../third_party/googletest/googletest/include) include_directories(../../src/wasm) set(unittest_SOURCES + cfg.cpp dfa_minimization.cpp possible-contents.cpp type-builder.cpp diff --git a/test/gtest/cfg.cpp b/test/gtest/cfg.cpp new file mode 100644 index 000000000..d4400acc7 --- /dev/null +++ b/test/gtest/cfg.cpp @@ -0,0 +1,84 @@ +#include "analysis/cfg.h" +#include "support/colors.h" +#include "wasm-s-parser.h" +#include "wasm.h" +#include "gtest/gtest.h" + +using namespace wasm; +using namespace wasm::analysis; + +TEST(CFGTest, Print) { + auto moduleText = R"wasm( + (module + (func $foo + (drop + (i32.const 0) + ) + (drop + (if (result i32) + (i32.const 1) + (block + (loop $loop + (br_if $loop + (i32.const 2) + ) + ) + (i32.const 3) + ) + (return + (i32.const 4) + ) + ) + ) + ) + ) + )wasm"; + + auto cfgText = R"cfg(;; preds: [], succs: [1, 5] +0: + 0: i32.const 0 + 1: drop + 2: i32.const 1 + +;; preds: [0], succs: [2] +1: + +;; preds: [1, 2], succs: [3, 2] +2: + 3: i32.const 2 + 4: br_if $loop + +;; preds: [2], succs: [4] +3: + 5: loop $loop + +;; preds: [3], succs: [6] +4: + 6: i32.const 3 + 7: block + +;; preds: [0], succs: [] +5: + 8: i32.const 4 + 9: return + +;; preds: [4], succs: [] +6: + 10: drop + 11: block +)cfg"; + + Module wasm; + SExpressionParser parser(moduleText); + SExpressionWasmBuilder builder(wasm, *(*parser.root)[0], IRProfile::Normal); + + CFG cfg = CFG::fromFunction(wasm.getFunction("foo")); + + bool colors = Colors::isEnabled(); + Colors::setEnabled(false); + std::stringstream ss; + cfg.print(ss); + Colors::setEnabled(colors); + + EXPECT_EQ(ss.str(), cfgText); +} |