summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2018-05-30 16:51:12 -0700
committerGitHub <noreply@github.com>2018-05-30 16:51:12 -0700
commit0b6a1f5aa243d8542c086d4274e43bf411d63d59 (patch)
treea0ccd69bb8c698ae4a8315fc30ab35ee0c4636d7
parent706b3f6c6cc95d688a5f97e9d82e15df6d04d54d (diff)
downloadbinaryen-0b6a1f5aa243d8542c086d4274e43bf411d63d59.tar.gz
binaryen-0b6a1f5aa243d8542c086d4274e43bf411d63d59.tar.bz2
binaryen-0b6a1f5aa243d8542c086d4274e43bf411d63d59.zip
Optimize validation of many nested blocks (#1576)
On the testcase from https://github.com/tweag/asterius/issues/19#issuecomment-393052653 this makes us almost 3x faster, and use 25% less memory. The main improvement here is to simplify and optimize the data structures the validator uses to validate br targets: use unordered maps, and use one less of them. Also some speedups from using that map more effectively (use of iterators to avoid multiple lookups). Also move the duplicate-node checks to the internal IR validation section, which makes more sense anyhow (it's not wasm validation, it's internal IR validation, which like the check for stale internal types, we do only if debugging).
-rw-r--r--src/emscripten-optimizer/istring.h3
-rw-r--r--src/wasm/wasm-validator.cpp93
-rw-r--r--test/example/c-api-kitchen-sink.c16
-rw-r--r--test/example/c-api-kitchen-sink.txt10
4 files changed, 50 insertions, 72 deletions
diff --git a/src/emscripten-optimizer/istring.h b/src/emscripten-optimizer/istring.h
index 8c77c4499..467ae682d 100644
--- a/src/emscripten-optimizer/istring.h
+++ b/src/emscripten-optimizer/istring.h
@@ -159,8 +159,7 @@ namespace std {
template <> struct hash<cashew::IString> : public unary_function<cashew::IString, size_t> {
size_t operator()(const cashew::IString& str) const {
- size_t hash = size_t(str.str);
- return hash = ((hash << 5) + hash) ^ 5381; /* (hash * 33) ^ c */
+ return std::hash<size_t>{}(size_t(str.str));
}
};
diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp
index 7246a15a6..9b5384efe 100644
--- a/src/wasm/wasm-validator.cpp
+++ b/src/wasm/wasm-validator.cpp
@@ -176,20 +176,27 @@ struct FunctionValidator : public WalkerPass<PostWalker<FunctionValidator>> {
FunctionValidator(ValidationInfo* info) : info(*info) {}
struct BreakInfo {
+ enum {
+ UnsetArity = Index(-1),
+ PoisonArity = Index(-2)
+ };
+
Type type;
Index arity;
- BreakInfo() {}
+ BreakInfo() : arity(UnsetArity) {}
BreakInfo(Type type, Index arity) : type(type), arity(arity) {}
+
+ bool hasBeenSet() {
+ // Compare to the impossible value.
+ return arity != UnsetArity;
+ }
};
- std::map<Name, Expression*> breakTargets;
- std::map<Expression*, BreakInfo> breakInfos;
+ std::unordered_map<Name, BreakInfo> breakInfos;
Type returnType = unreachable; // type used in returns
- std::set<Name> labelNames; // Binaryen IR requires that label names must be unique - IR generators must ensure that
-
- std::unordered_set<Expression*> seenExpressions; // expressions must not appear twice
+ std::unordered_set<Name> labelNames; // Binaryen IR requires that label names must be unique - IR generators must ensure that
void noteLabelName(Name name);
@@ -198,14 +205,14 @@ public:
static void visitPreBlock(FunctionValidator* self, Expression** currp) {
auto* curr = (*currp)->cast<Block>();
- if (curr->name.is()) self->breakTargets[curr->name] = curr;
+ if (curr->name.is()) self->breakInfos[curr->name];
}
void visitBlock(Block* curr);
static void visitPreLoop(FunctionValidator* self, Expression** currp) {
auto* curr = (*currp)->cast<Loop>();
- if (curr->name.is()) self->breakTargets[curr->name] = curr;
+ if (curr->name.is()) self->breakInfos[curr->name];
}
void visitLoop(Loop* curr);
@@ -285,16 +292,19 @@ private:
void FunctionValidator::noteLabelName(Name name) {
if (!name.is()) return;
- shouldBeTrue(labelNames.find(name) == labelNames.end(), name, "names in Binaryen IR must be unique - IR generators must ensure that");
- labelNames.insert(name);
+ bool inserted;
+ std::tie(std::ignore, inserted) = labelNames.insert(name);
+ shouldBeTrue(inserted, name, "names in Binaryen IR must be unique - IR generators must ensure that");
}
void FunctionValidator::visitBlock(Block* curr) {
// if we are break'ed to, then the value must be right for us
if (curr->name.is()) {
noteLabelName(curr->name);
- if (breakInfos.count(curr) > 0) {
- auto& info = breakInfos[curr];
+ auto iter = breakInfos.find(curr->name);
+ assert(iter != breakInfos.end()); // we set it ourselves
+ auto& info = iter->second;
+ if (info.hasBeenSet()) {
if (isConcreteType(curr->type)) {
shouldBeTrue(info.arity != 0, curr, "break arities must be > 0 if block has a value");
} else {
@@ -307,7 +317,7 @@ void FunctionValidator::visitBlock(Block* curr) {
if (isConcreteType(curr->type) && info.arity && info.type != unreachable) {
shouldBeEqual(curr->type, info.type, curr, "block+breaks must have right type if breaks have arity");
}
- shouldBeTrue(info.arity != Index(-1), curr, "break arities must match");
+ shouldBeTrue(info.arity != BreakInfo::PoisonArity, curr, "break arities must match");
if (curr->list.size() > 0) {
auto last = curr->list.back()->type;
if (isConcreteType(last) && info.type != unreachable) {
@@ -318,7 +328,7 @@ void FunctionValidator::visitBlock(Block* curr) {
}
}
}
- breakTargets.erase(curr->name);
+ breakInfos.erase(iter);
}
if (curr->list.size() > 1) {
for (Index i = 0; i < curr->list.size() - 1; i++) {
@@ -347,11 +357,13 @@ void FunctionValidator::visitBlock(Block* curr) {
void FunctionValidator::visitLoop(Loop* curr) {
if (curr->name.is()) {
noteLabelName(curr->name);
- breakTargets.erase(curr->name);
- if (breakInfos.count(curr) > 0) {
- auto& info = breakInfos[curr];
+ auto iter = breakInfos.find(curr->name);
+ assert(iter != breakInfos.end()); // we set it ourselves
+ auto& info = iter->second;
+ if (info.hasBeenSet()) {
shouldBeEqual(info.arity, Index(0), curr, "breaks to a loop cannot pass a value");
}
+ breakInfos.erase(iter);
}
if (curr->type == none) {
shouldBeFalse(isConcreteType(curr->body->type), curr, "bad body for a loop that has no value");
@@ -394,12 +406,12 @@ void FunctionValidator::noteBreak(Name name, Expression* value, Expression* curr
shouldBeUnequal(valueType, none, curr, "breaks must have a valid value");
arity = 1;
}
- if (!shouldBeTrue(breakTargets.count(name) > 0, curr, "all break targets must be valid")) return;
- auto* target = breakTargets[name];
- if (breakInfos.count(target) == 0) {
- breakInfos[target] = BreakInfo(valueType, arity);
+ auto iter = breakInfos.find(name);
+ if (!shouldBeTrue(iter != breakInfos.end(), curr, "all break targets must be valid")) return;
+ auto& info = iter->second;
+ if (!info.hasBeenSet()) {
+ info = BreakInfo(valueType, arity);
} else {
- auto& info = breakInfos[target];
if (info.type == unreachable) {
info.type = valueType;
} else if (valueType != unreachable) {
@@ -408,7 +420,7 @@ void FunctionValidator::noteBreak(Name name, Expression* value, Expression* curr
}
}
if (arity != info.arity) {
- info.arity = Index(-1); // a poison value
+ info.arity = BreakInfo::PoisonArity;
}
}
}
@@ -810,7 +822,7 @@ void FunctionValidator::visitFunction(Function* curr) {
if (returnType != unreachable) {
shouldBeEqual(curr->result, returnType, curr->body, "function result must match, if function has returns");
}
- shouldBeTrue(breakTargets.empty(), curr->body, "all named break targets must exist");
+ shouldBeTrue(breakInfos.empty(), curr->body, "all named break targets must exist");
returnType = unreachable;
labelNames.clear();
// if function has a named type, it must match up with the function's params and result
@@ -819,24 +831,6 @@ void FunctionValidator::visitFunction(Function* curr) {
shouldBeTrue(ft->params == curr->params, curr->name, "function params must match its declared type");
shouldBeTrue(ft->result == curr->result, curr->name, "function result must match its declared type");
}
- // expressions must not be seen more than once
- struct Walker : public PostWalker<Walker, UnifiedExpressionVisitor<Walker>> {
- std::unordered_set<Expression*>& seen;
- std::vector<Expression*> dupes;
-
- Walker(std::unordered_set<Expression*>& seen) : seen(seen) {}
-
- void visitExpression(Expression* curr) {
- bool inserted;
- std::tie(std::ignore, inserted) = seen.insert(curr);
- if (!inserted) dupes.push_back(curr);
- }
- };
- Walker walker(seenExpressions);
- walker.walk(curr->body);
- for (auto* bad : walker.dupes) {
- info.fail("expression seen more than once in the tree", bad, getFunction());
- }
}
static bool checkOffset(Expression* curr, Address add, Address max) {
@@ -890,9 +884,12 @@ static void validateBinaryenIR(Module& wasm, ValidationInfo& info) {
struct BinaryenIRValidator : public PostWalker<BinaryenIRValidator, UnifiedExpressionVisitor<BinaryenIRValidator>> {
ValidationInfo& info;
+ std::unordered_set<Expression*> seen;
+
BinaryenIRValidator(ValidationInfo& info) : info(info) {}
void visitExpression(Expression* curr) {
+ auto scope = getFunction() ? getFunction()->name : Name("(global scope)");
// check if a node type is 'stale', i.e., we forgot to finalize() the node.
auto oldType = curr->type;
ReFinalizeNode().visit(curr);
@@ -907,11 +904,19 @@ static void validateBinaryenIR(Module& wasm, ValidationInfo& info) {
// ok for it to be either i32 or unreachable.
if (!(isConcreteType(oldType) && newType == unreachable)) {
std::ostringstream ss;
- ss << "stale type found in " << (getFunction() ? getFunction()->name : Name("(global scope)")) << " on " << curr << "\n(marked as " << printType(oldType) << ", should be " << printType(newType) << ")\n";
+ ss << "stale type found in " << scope << " on " << curr << "\n(marked as " << printType(oldType) << ", should be " << printType(newType) << ")\n";
info.fail(ss.str(), curr, getFunction());
}
curr->type = oldType;
}
+ // check if a node is a duplicate - expressions must not be seen more than once
+ bool inserted;
+ std::tie(std::ignore, inserted) = seen.insert(curr);
+ if (!inserted) {
+ std::ostringstream ss;
+ ss << "expression seen more than once in the tree in " << scope << " on " << curr << '\n';
+ info.fail(ss.str(), curr, getFunction());
+ }
}
};
BinaryenIRValidator binaryenIRValidator(info);
@@ -952,7 +957,7 @@ static void validateExports(Module& module, ValidationInfo& info) {
}
}
}
- std::set<Name> exportNames;
+ std::unordered_set<Name> exportNames;
for (auto& exp : module.exports) {
Name name = exp->value;
if (exp->kind == ExternalKind::Function) {
diff --git a/test/example/c-api-kitchen-sink.c b/test/example/c-api-kitchen-sink.c
index 8c41ea124..aaf6aa3fe 100644
--- a/test/example/c-api-kitchen-sink.c
+++ b/test/example/c-api-kitchen-sink.c
@@ -569,22 +569,6 @@ void test_nonvalid() {
BinaryenModuleDispose(module);
}
- // validation failure due to duplicate nodes
- {
- BinaryenModuleRef module = BinaryenModuleCreate();
-
- BinaryenFunctionTypeRef v = BinaryenAddFunctionType(module, "i", BinaryenTypeInt32(), NULL, 0);
- BinaryenType localTypes[] = { };
- BinaryenExpressionRef num = makeInt32(module, 1234);
- BinaryenFunctionRef func = BinaryenAddFunction(module, "func", v, NULL, 0,
- BinaryenBinary(module, BinaryenTypeInt32(), num, num) // incorrectly use num twice
- );
-
- BinaryenModulePrint(module);
- printf("validation: %d\n", BinaryenModuleValidate(module));
-
- BinaryenModuleDispose(module);
- }
}
void test_tracing() {
diff --git a/test/example/c-api-kitchen-sink.txt b/test/example/c-api-kitchen-sink.txt
index 15a0ebe37..5d6b270bf 100644
--- a/test/example/c-api-kitchen-sink.txt
+++ b/test/example/c-api-kitchen-sink.txt
@@ -1070,16 +1070,6 @@ module loaded from binary form:
)
)
validation: 0
-(module
- (type $i (func (result i32)))
- (func $func (; 0 ;) (type $i) (result i32)
- (i32.sub
- (i32.const 1234)
- (i32.const 1234)
- )
- )
-)
-validation: 0
// beginning a Binaryen API trace
#include <math.h>
#include <map>