summaryrefslogtreecommitdiff
path: root/src/passes/stringify-walker.h
blob: 6f50f58d40a8656a639b6650294bddc3f15b4220 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
/*
 * Copyright 2023 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef wasm_passes_stringify_walker_h
#define wasm_passes_stringify_walker_h

#include "ir/iteration.h"
#include "ir/module-utils.h"
#include "ir/stack-utils.h"
#include "ir/utils.h"
#include "support/suffix_tree.h"
#include "wasm-ir-builder.h"
#include "wasm-traversal.h"
#include <queue>

namespace wasm {

/*
 * This walker does a normal postorder traversal except that it defers
 * traversing the contents of control flow structures it encounters. This
 * effectively un-nests control flow.
 *
 * For example, the below (contrived) wat:
 * 1 : (block
 * 2 :   (if
 * 3 :     (i32.const 0)
 * 4 :     (then (return (i32.const 1)))
 * 5 :     (else (return (i32.const 0)))))
 * 6 :   (drop
 * 7 :     (i32.add
 * 8 :       (i32.const 20)
 * 9 :       (i32.const 10)))
 * 10:   (if
 * 11:     (i32.const 1)
 * 12:     (then (return (i32.const 2)))
 * 14: )
 * Would have its expressions visited in the following order (based on line
 * number):
 * 1, 3, 2, 8, 9, 7, 6, 11, 10, 4, 5, 12
 *
 * Of note:
 *   - The visits to if-True on line 4 and if-False on line 5 are deferred until
 *     after the rest of the siblings of the if expression on line 2 are
 *     visited.
 *   - The if-condition (i32.const 0) on line 3 is visited before the if
 *     expression on line 2. Similarly, the if-condition (i32.const 1) on line
 *     11 is visited before the if expression on line 10.
 *   - The add (line 7) binary operator's left and right children (lines 8 - 9)
 *     are visited first as they need to be on the stack before the add
 *     operation is executed.
 */

template<typename SubType>
struct StringifyWalker
  : public PostWalker<SubType, UnifiedExpressionVisitor<SubType>> {

  using Super = PostWalker<SubType, UnifiedExpressionVisitor<SubType>>;

  struct SeparatorReason {
    struct FuncStart {
      Function* func;
    };

    struct BlockStart {
      Block* block;
    };

    struct IfStart {
      If* iff;
    };

    struct ElseStart {};

    struct LoopStart {
      Loop* loop;
    };

    struct TryBodyStart {};

    struct TryCatchStart {};

    struct End {
      Expression* curr;
    };
    using Separator = std::variant<FuncStart,
                                   BlockStart,
                                   IfStart,
                                   ElseStart,
                                   LoopStart,
                                   TryBodyStart,
                                   TryCatchStart,
                                   End>;

    Separator reason;

    SeparatorReason(Separator reason) : reason(reason) {}

    static SeparatorReason makeFuncStart(Function* func) {
      return SeparatorReason(FuncStart{func});
    }
    static SeparatorReason makeBlockStart(Block* block) {
      return SeparatorReason(BlockStart{block});
    }
    static SeparatorReason makeIfStart(If* iff) {
      return SeparatorReason(IfStart{iff});
    }
    static SeparatorReason makeElseStart() {
      return SeparatorReason(ElseStart{});
    }
    static SeparatorReason makeLoopStart(Loop* loop) {
      return SeparatorReason(LoopStart{loop});
    }
    static SeparatorReason makeTryCatchStart() {
      return SeparatorReason(TryCatchStart{});
    }
    static SeparatorReason makeTryBodyStart() {
      return SeparatorReason(TryBodyStart{});
    }
    static SeparatorReason makeEnd() { return SeparatorReason(End{}); }
    FuncStart* getFuncStart() { return std::get_if<FuncStart>(&reason); }
    BlockStart* getBlockStart() { return std::get_if<BlockStart>(&reason); }
    IfStart* getIfStart() { return std::get_if<IfStart>(&reason); }
    ElseStart* getElseStart() { return std::get_if<ElseStart>(&reason); }
    LoopStart* getLoopStart() { return std::get_if<LoopStart>(&reason); }
    TryBodyStart* getTryBodyStart() {
      return std::get_if<TryBodyStart>(&reason);
    }
    TryCatchStart* getTryCatchStart() {
      return std::get_if<TryCatchStart>(&reason);
    }
    End* getEnd() { return std::get_if<End>(&reason); }
  };

  friend std::ostream&
  operator<<(std::ostream& o,
             typename StringifyWalker::SeparatorReason reason) {
    if (reason.getFuncStart()) {
      return o << "Func Start";
    } else if (reason.getBlockStart()) {
      return o << "Block Start";
    } else if (reason.getIfStart()) {
      return o << "If Start";
    } else if (reason.getElseStart()) {
      return o << "Else Start";
    } else if (reason.getLoopStart()) {
      return o << "Loop Start";
    } else if (reason.getTryBodyStart()) {
      return o << "Try Body Start";
    } else if (reason.getTryCatchStart()) {
      return o << "Try Catch Start";
    } else if (reason.getEnd()) {
      return o << "End";
    }

    return o << "~~~Undefined in operator<< overload~~~";
  }

  // To ensure control flow children are walked consistently during outlining,
  // we push a copy of the control flow expression. This avoids an issue where
  // control flow no longer points to the same expression after being
  // outlined into a new function.
  std::queue<Expression*> controlFlowQueue;

  /*
   * To initiate the walk, subclasses should call walkModule with a pointer to
   * the wasm module.
   *
   * Member functions addUniqueSymbol and visitExpression are provided as
   * extension points for subclasses. These functions will be called at
   * appropriate points during the walk and should be implemented by subclasses.
   */
  void visitExpression(Expression* curr);
  void addUniqueSymbol(SeparatorReason reason);

  void doWalkModule(Module* module);
  void doWalkFunction(Function* func);
  static void scan(SubType* self, Expression** currp);
  static void doVisitExpression(SubType* self, Expression** currp);

private:
  void dequeueControlFlow();
};

} // namespace wasm

#include "stringify-walker-impl.h"

namespace wasm {

// This custom hasher conforms to std::hash<Key>. Its purpose is to provide
// a custom hash for if expressions, so the if-condition of the if expression is
// not included in the hash for the if expression. This is needed because in the
// binary format, the if-condition comes before and is consumed by the if. To
// match the binary format, we hash the if condition before and separately from
// the rest of the if expression.
struct StringifyHasher {
  size_t operator()(Expression* curr) const;
};

// This custom equator conforms to std::equal_to<Key>. Similar to
// StringifyHasher, it's purpose is to not include the if-condition when
// evaluating the equality of two if expressions.
struct StringifyEquator {
  bool operator()(Expression* lhs, Expression* rhs) const;
};

struct HashStringifyWalker : public StringifyWalker<HashStringifyWalker> {
  // After calling walkModule, this vector contains the result of encoding a
  // wasm module as a string of uint32_t values. Each value represents either an
  // Expression or a separator to mark the end of control flow.
  std::vector<uint32_t> hashString;
  // A monotonic counter used to ensure that unique expressions in the
  // module are assigned a unique value in the hashString.
  uint32_t nextVal = 0;
  // A monotonic counter used to ensure that each separator in the
  // module is assigned a unique value in the hashString.
  int32_t nextSeparatorVal = -1;
  // Contains a mapping of expression pointer to value to ensure we
  // use the same value for matching expressions. A custom hasher and
  // equator is provided in order to separate out evaluation of the if-condition
  // when evaluating if expressions.
  std::unordered_map<Expression*, uint32_t, StringifyHasher, StringifyEquator>
    exprToCounter;
  std::vector<Expression*> exprs;

  void addUniqueSymbol(SeparatorReason reason);
  void visitExpression(Expression* curr);
  // Converts the idx from relative to the beginning of the program to
  // relative to its enclosing function, and returns the name of its function.
  std::pair<uint32_t, Name> makeRelative(uint32_t idx) const;

private:
  // Contains the indices that mark the start of each function.
  std::set<uint32_t> funcIndices;
  // Maps the start idx of each function to the function name.
  std::map<uint32_t, Name> idxToFuncName;
};

struct OutliningSequence {
  unsigned startIdx;
  unsigned endIdx;
  Name func;

  OutliningSequence(unsigned startIdx, unsigned endIdx, Name func)
    : startIdx(startIdx), endIdx(endIdx), func(func) {}
};

using Substrings = std::vector<SuffixTree::RepeatedSubstring>;

// Functions that filter vectors of SuffixTree::RepeatedSubstring
struct StringifyProcessor {
  static Substrings repeatSubstrings(std::vector<uint32_t>& hashString);
  static Substrings dedupe(const Substrings& substrings);
  // Filter is the general purpose function backing subsequent filter functions.
  // It can be used directly, but generally prefer a wrapper function
  // to encapsulate your condition and make it available for tests.
  static Substrings filter(const Substrings& substrings,
                           const std::vector<Expression*>& exprs,
                           std::function<bool(const Expression*)> condition);
  static Substrings filterLocalSets(const Substrings& substrings,
                                    const std::vector<Expression*>& exprs);
  static Substrings filterLocalGets(const Substrings& substrings,
                                    const std::vector<Expression*>& exprs);
  static Substrings filterBranches(const Substrings& substrings,
                                   const std::vector<Expression*>& exprs);
};

} // namespace wasm

#endif // wasm_passes_stringify_walker_h