1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
|
/*
* Copyright 2023 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef wasm_passes_stringify_walker_h
#define wasm_passes_stringify_walker_h
#include "ir/iteration.h"
#include "ir/module-utils.h"
#include "ir/utils.h"
#include "support/suffix_tree.h"
#include "wasm-traversal.h"
#include <queue>
namespace wasm {
/*
* This walker does a normal postorder traversal except that it defers
* traversing the contents of control flow structures it encounters. This
* effectively un-nests control flow.
*
* For example, the below (contrived) wat:
* 1 : (block
* 2 : (if
* 3 : (i32.const 0)
* 4 : (then (return (i32.const 1)))
* 5 : (else (return (i32.const 0)))))
* 6 : (drop
* 7 : (i32.add
* 8 : (i32.const 20)
* 9 : (i32.const 10)))
* 10: (if
* 11: (i32.const 1)
* 12: (then (return (i32.const 2)))
* 14: )
* Would have its expressions visited in the following order (based on line
* number):
* 1, 3, 2, 8, 9, 7, 6, 11, 10, 4, 5, 12
*
* Of note:
* - The visits to if-True on line 4 and if-False on line 5 are deferred until
* after the rest of the siblings of the if expression on line 2 are visited
* - The if-condition (i32.const 0) on line 3 is visited before the if
* expression on line 2. Similarly, the if-condition (i32.const 1) on line
* 11 is visited before the if expression on line 10.
* - The add (line 7) binary operator's left and right children (lines 8 - 9)
* are visited first as they need to be on the stack before the add
* operation is executed
*/
template<typename SubType>
struct StringifyWalker
: public PostWalker<SubType, UnifiedExpressionVisitor<SubType>> {
using Super = PostWalker<SubType, UnifiedExpressionVisitor<SubType>>;
struct SeparatorReason {
struct FuncStart {
Function* func;
};
struct BlockStart {
Block* curr;
};
struct IfStart {
If* iff;
};
struct ElseStart {
If* iff;
};
struct LoopStart {
Loop* loop;
};
struct TryBodyStart {};
struct TryCatchStart {};
struct End {
Expression* curr;
};
using Separator = std::variant<FuncStart,
BlockStart,
IfStart,
ElseStart,
LoopStart,
TryBodyStart,
TryCatchStart,
End>;
Separator reason;
SeparatorReason(Separator reason) : reason(reason) {}
static SeparatorReason makeFuncStart(Function* func) {
return SeparatorReason(FuncStart{func});
}
static SeparatorReason makeBlockStart(Block* block) {
return SeparatorReason(BlockStart{block});
}
static SeparatorReason makeIfStart(If* iff) {
return SeparatorReason(IfStart{iff});
}
static SeparatorReason makeElseStart(If* iff) {
return SeparatorReason(ElseStart{iff});
}
static SeparatorReason makeLoopStart(Loop* loop) {
return SeparatorReason(LoopStart{loop});
}
static SeparatorReason makeTryCatchStart() {
return SeparatorReason(TryCatchStart{});
}
static SeparatorReason makeTryBodyStart() {
return SeparatorReason(TryBodyStart{});
}
static SeparatorReason makeEnd() { return SeparatorReason(End{}); }
bool isFuncStart() { return std::get_if<FuncStart>(&reason); }
bool isBlockStart() { return std::get_if<BlockStart>(&reason); }
bool isIfStart() { return std::get_if<IfStart>(&reason); }
bool isElseStart() { return std::get_if<ElseStart>(&reason); }
bool isLoopStart() { return std::get_if<LoopStart>(&reason); }
bool isTryBodyStart() { return std::get_if<TryBodyStart>(&reason); }
bool isTryCatchStart() { return std::get_if<TryCatchStart>(&reason); }
bool isEnd() { return std::get_if<End>(&reason); }
};
friend std::ostream&
operator<<(std::ostream& o,
typename StringifyWalker::SeparatorReason reason) {
if (reason.isFuncStart()) {
return o << "Func Start";
} else if (reason.isBlockStart()) {
return o << "Block Start";
} else if (reason.isIfStart()) {
return o << "If Start";
} else if (reason.isElseStart()) {
return o << "Else Start";
} else if (reason.isLoopStart()) {
return o << "Loop Start";
} else if (reason.isTryBodyStart()) {
return o << "Try Body Start";
} else if (reason.isTryCatchStart()) {
return o << "Try Catch Start";
} else if (reason.isEnd()) {
return o << "End";
}
return o << "~~~Undefined in operator<< overload~~~";
}
std::queue<Expression**> controlFlowQueue;
/*
* To initiate the walk, subclasses should call walkModule with a pointer to
* the wasm module.
*
* Member functions addUniqueSymbol and visitExpression are provided as
* extension points for subclasses. These functions will be called at
* appropriate points during the walk and should be implemented by subclasses.
*/
void visitExpression(Expression* curr);
void addUniqueSymbol(SeparatorReason reason);
void doWalkModule(Module* module);
void doWalkFunction(Function* func);
void walk(Expression* curr);
static void scan(SubType* self, Expression** currp);
static void doVisitExpression(SubType* self, Expression** currp);
private:
void dequeueControlFlow();
};
} // namespace wasm
#include "stringify-walker-impl.h"
namespace wasm {
// This custom hasher conforms to std::hash<Key>. Its purpose is to provide
// a custom hash for if expressions, so the if-condition of the if expression is
// not included in the hash for the if expression. This is needed because in the
// binary format, the if-condition comes before and is consumed by the if. To
// match the binary format, we hash the if condition before and separately from
// the rest of the if expression.
struct StringifyHasher {
size_t operator()(Expression* curr) const;
};
// This custom equator conforms to std::equal_to<Key>. Similar to
// StringifyHasher, it's purpose is to not include the if-condition when
// evaluating the equality of two if expressions.
struct StringifyEquator {
bool operator()(Expression* lhs, Expression* rhs) const;
};
struct HashStringifyWalker : public StringifyWalker<HashStringifyWalker> {
// After calling walkModule, this vector contains the result of encoding a
// wasm module as a string of uint32_t values. Each value represents either an
// Expression or a separator to mark the end of control flow.
std::vector<uint32_t> hashString;
// A monotonic counter used to ensure that unique expressions in the
// module are assigned a unique value in the hashString
uint32_t nextVal = 0;
// A monotonic counter used to ensure that each separator in the
// module is assigned a unique value in the hashString
int32_t nextSeparatorVal = -1;
// Contains a mapping of expression pointer to value to ensure we
// use the same value for matching expressions. A custom hasher and
// equator is provided in order to separate out evaluation of the if-condition
// when evaluating if expressions.
std::unordered_map<Expression*, uint32_t, StringifyHasher, StringifyEquator>
exprToCounter;
void addUniqueSymbol(SeparatorReason reason);
void visitExpression(Expression* curr);
};
// Functions that filter vectors of SuffixTree::RepeatedSubstring
struct StringifyProcessor {
static std::vector<SuffixTree::RepeatedSubstring>
dedupe(const std::vector<SuffixTree::RepeatedSubstring>);
};
} // namespace wasm
#endif // wasm_passes_stringify_walker_h
|