diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2020-04-13 14:02:58 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-13 14:02:58 -0700 |
commit | c16bfeebb5879e9512f2bbf7d611b3b1e0be7dee (patch) | |
tree | b31308a321e869d2bf64ddbffc0a59e91df80d62 /src | |
parent | c45ae16497ea76ad24982813cace1927565b0d45 (diff) | |
download | binaryen-c16bfeebb5879e9512f2bbf7d611b3b1e0be7dee.tar.gz binaryen-c16bfeebb5879e9512f2bbf7d611b3b1e0be7dee.tar.bz2 binaryen-c16bfeebb5879e9512f2bbf7d611b3b1e0be7dee.zip |
Use direct pointers as Type IDs (#2745)
Instead of using indices into the global interned type table. This
means that a lock is *never* needed to access an expanded Type. The
Type lock is now only acquired when a complex Type is created. On a
real-world wasm2js workload this improves wall clock time by 23% on my
machine with 72 cores and makes traffic on the Type lock entirely
insignificant.
**Before**
72 cores
real 0m6.914s
user 184.014s
sys 0m3.995s
1 core
real 0m25.903s
user 0m25.658s
sys 0m0.253s
**After**
72 cores
real 5.349s
user 70.309s
sys 9.691s
1 core
real 25.859s
user 25.615s
sys 0.253s
Diffstat (limited to 'src')
-rw-r--r-- | src/binaryen-c.cpp | 115 | ||||
-rw-r--r-- | src/binaryen-c.h | 2 | ||||
-rw-r--r-- | src/passes/LocalCSE.cpp | 2 | ||||
-rw-r--r-- | src/wasm-type.h | 28 | ||||
-rw-r--r-- | src/wasm/wasm-type.cpp | 123 |
5 files changed, 155 insertions, 115 deletions
diff --git a/src/binaryen-c.cpp b/src/binaryen-c.cpp index 84612d0e4..2ea4bd87f 100644 --- a/src/binaryen-c.cpp +++ b/src/binaryen-c.cpp @@ -128,6 +128,7 @@ void traceNameOrNULL(const char* name, std::ostream& out = std::cout) { } } +std::map<BinaryenType, size_t> types; std::map<BinaryenExpressionRef, size_t> expressions; std::map<BinaryenFunctionRef, size_t> functions; std::map<BinaryenGlobalRef, size_t> globals; @@ -135,6 +136,71 @@ std::map<BinaryenEventRef, size_t> events; std::map<BinaryenExportRef, size_t> exports; std::map<RelooperBlockRef, size_t> relooperBlocks; +static bool isBasicAPIType(BinaryenType type) { + return type == BinaryenTypeAuto() || type <= Type::_last_value_type; +} + +static const char* basicAPITypeFunction(BinaryenType type) { + if (type == BinaryenTypeAuto()) { + return "BinaryenTypeAuto()"; + } + switch (type) { + case Type::none: + return "BinaryenTypeNone()"; + case Type::i32: + return "BinaryenTypeInt32()"; + case Type::i64: + return "BinaryenTypeInt64()"; + case Type::f32: + return "BinaryenTypeFloat32()"; + case Type::f64: + return "BinaryenTypeFloat64()"; + case Type::v128: + return "BinaryenTypeVec128()"; + case Type::funcref: + return "BinaryenTypeFuncref()"; + case Type::anyref: + return "BinaryenTypeAnyref()"; + case Type::nullref: + return "BinaryenTypeNullref()"; + case Type::exnref: + return "BinaryenTypeExnref()"; + case Type::unreachable: + return "BinaryenTypeUnreachable()"; + default: + WASM_UNREACHABLE("unexpected type"); + } +} + +struct TypeArg { + BinaryenType type; + TypeArg(BinaryenType type) : type(type){}; +}; + +std::ostream& operator<<(std::ostream& os, TypeArg t) { + if (isBasicAPIType(t.type)) { + return os << basicAPITypeFunction(t.type); + } else { + auto it = types.find(t.type); + assert(it != types.end()); + return os << "types[" << it->second << "]"; + } +} + +size_t noteType(BinaryenType type) { + // Basic types can be trivially rematerialized at every use + assert(!isBasicAPIType(type)); + // Unlike expressions, the same type can be created multiple times + auto it = types.find(type); + if (it != types.end()) { + return it->second; + } else { + auto id = types.size(); + types[type] = id; + return id; + } +} + size_t noteExpression(BinaryenExpressionRef expression) { auto id = expressions.size(); assert(expressions.find(expression) == expressions.end()); @@ -153,6 +219,11 @@ void printArg(std::ostream& setup, std::ostream& out, T arg) { } template<> +void printArg(std::ostream& setup, std::ostream& out, BinaryenType arg) { + out << TypeArg(arg); +} + +template<> void printArg(std::ostream& setup, std::ostream& out, BinaryenExpressionRef arg) { @@ -170,15 +241,6 @@ void printArg(std::ostream& setup, std::ostream& out, StringLit arg) { } template<> -void printArg(std::ostream& setup, std::ostream& out, BinaryenType arg) { - if (arg == BinaryenTypeAuto()) { - out << "BinaryenTypeAuto()"; - } else { - out << arg; - } -} - -template<> void printArg(std::ostream& setup, std::ostream& out, BinaryenLiteral arg) { switch (arg.type) { case Type::i32: @@ -308,7 +370,7 @@ BinaryenType BinaryenTypeAnyref(void) { return Type::anyref; } BinaryenType BinaryenTypeNullref(void) { return Type::nullref; } BinaryenType BinaryenTypeExnref(void) { return Type::exnref; } BinaryenType BinaryenTypeUnreachable(void) { return Type::unreachable; } -BinaryenType BinaryenTypeAuto(void) { return uint32_t(-1); } +BinaryenType BinaryenTypeAuto(void) { return uintptr_t(-1); } BinaryenType BinaryenTypeCreate(BinaryenType* types, uint32_t numTypes) { std::vector<Type> typeVec; @@ -318,19 +380,20 @@ BinaryenType BinaryenTypeCreate(BinaryenType* types, uint32_t numTypes) { } Type result(typeVec); - if (tracing) { + if (tracing && !isBasicAPIType(result.getID())) { + auto id = noteType(result.getID()); std::string array = getTemp(); std::cout << " {\n"; std::cout << " BinaryenType " << array << "[] = {"; for (size_t i = 0; i < numTypes; ++i) { - std::cout << uint32_t(types[i]); + std::cout << basicAPITypeFunction(types[i]); if (i < numTypes - 1) { std::cout << ", "; } } std::cout << "};\n"; - std::cout << " BinaryenTypeCreate(" << array << ", " << numTypes - << "); // " << result.getID() << "\n"; + std::cout << " types[" << id << "] = BinaryenTypeCreate(" << array + << ", " << numTypes << ");\n"; std::cout << " }\n"; } @@ -534,12 +597,14 @@ BinaryenModuleRef BinaryenModuleCreate(void) { void BinaryenModuleDispose(BinaryenModuleRef module) { if (tracing) { std::cout << " BinaryenModuleDispose(the_module);\n"; + std::cout << " types.clear();\n"; std::cout << " expressions.clear();\n"; std::cout << " functions.clear();\n"; std::cout << " globals.clear();\n"; std::cout << " events.clear();\n"; std::cout << " exports.clear();\n"; std::cout << " relooperBlocks.clear();\n"; + types.clear(); expressions.clear(); functions.clear(); globals.clear(); @@ -3230,7 +3295,7 @@ BinaryenFunctionRef BinaryenAddFunction(BinaryenModuleRef module, if (i > 0) { std::cout << ", "; } - std::cout << varTypes[i]; + std::cout << TypeArg(varTypes[i]); } if (numVarTypes == 0) { // ensure the array is not empty, otherwise a compiler error on VS @@ -3241,8 +3306,9 @@ BinaryenFunctionRef BinaryenAddFunction(BinaryenModuleRef module, functions[ret] = id; std::cout << " functions[" << id << "] = BinaryenAddFunction(the_module, \"" << name << "\", " - << params << ", " << results << ", varTypes, " << numVarTypes - << ", expressions[" << expressions[body] << "]);\n"; + << TypeArg(params) << ", " << TypeArg(results) << ", varTypes, " + << numVarTypes << ", expressions[" << expressions[body] + << "]);\n"; std::cout << " }\n"; } @@ -3314,7 +3380,7 @@ BinaryenGlobalRef BinaryenAddGlobal(BinaryenModuleRef module, auto id = globals.size(); globals[ret] = id; std::cout << " globals[" << id << "] = BinaryenAddGlobal(the_module, \"" - << name << "\", " << type << ", " << int(mutable_) + << name << "\", " << TypeArg(type) << ", " << int(mutable_) << ", expressions[" << expressions[init] << "]);\n"; } @@ -3352,7 +3418,8 @@ BinaryenEventRef BinaryenAddEvent(BinaryenModuleRef module, BinaryenType results) { if (tracing) { std::cout << " BinaryenAddEvent(the_module, \"" << name << "\", " - << attribute << ", " << params << ", " << results << ");\n"; + << attribute << ", " << TypeArg(params) << ", " + << TypeArg(results) << ");\n"; } auto* wasm = (Module*)module; @@ -3395,7 +3462,8 @@ void BinaryenAddFunctionImport(BinaryenModuleRef module, if (tracing) { std::cout << " BinaryenAddFunctionImport(the_module, \"" << internalName << "\", \"" << externalModuleName << "\", \"" << externalBaseName - << "\", " << params << ", " << results << ");\n"; + << "\", " << TypeArg(params) << ", " << TypeArg(results) + << ");\n"; } ret->name = internalName; @@ -3448,7 +3516,7 @@ void BinaryenAddGlobalImport(BinaryenModuleRef module, if (tracing) { std::cout << " BinaryenAddGlobalImport(the_module, \"" << internalName << "\", \"" << externalModuleName << "\", \"" << externalBaseName - << "\", " << globalType << ", " << mutable_ << ");\n"; + << "\", " << TypeArg(globalType) << ", " << mutable_ << ");\n"; } ret->name = internalName; @@ -3471,8 +3539,8 @@ void BinaryenAddEventImport(BinaryenModuleRef module, if (tracing) { std::cout << " BinaryenAddEventImport(the_module, \"" << internalName << "\", \"" << externalModuleName << "\", \"" << externalBaseName - << "\", " << attribute << ", " << params << ", " << results - << ");\n"; + << "\", " << attribute << ", " << TypeArg(params) << ", " + << TypeArg(results) << ");\n"; } ret->name = internalName; @@ -4895,6 +4963,7 @@ void BinaryenSetAPITracing(int on) { "#include <map>\n" "#include \"binaryen-c.h\"\n" "int main() {\n" + " std::map<size_t, BinaryenType> types;\n" " std::map<size_t, BinaryenExpressionRef> expressions;\n" " std::map<size_t, BinaryenFunctionRef> functions;\n" " std::map<size_t, BinaryenGlobalRef> globals;\n" diff --git a/src/binaryen-c.h b/src/binaryen-c.h index 33faab45d..4387fb638 100644 --- a/src/binaryen-c.h +++ b/src/binaryen-c.h @@ -90,7 +90,7 @@ typedef uint32_t BinaryenIndex; // Core types (call to get the value of each; you can cache them, they // never change) -typedef uint32_t BinaryenType; +typedef uintptr_t BinaryenType; BINARYEN_API BinaryenType BinaryenTypeNone(void); BINARYEN_API BinaryenType BinaryenTypeInt32(void); diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index d6717a1ed..81a82d1c6 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -63,7 +63,7 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> { struct UsableHasher { HashType operator()(const Usable value) const { - return rehash(value.hashed.hash, value.localType.getID()); + return rehash(uint64_t(value.hashed.hash), value.localType.getID()); } }; diff --git a/src/wasm-type.h b/src/wasm-type.h index 05cfeaef5..6e74f0338 100644 --- a/src/wasm-type.h +++ b/src/wasm-type.h @@ -24,11 +24,11 @@ namespace wasm { class Type { - // enough for the limit of 1000 function arguments - static constexpr unsigned SIZE_BITS = 10; - static constexpr unsigned ID_BITS = 32 - SIZE_BITS; - unsigned id : ID_BITS; - unsigned _size : SIZE_BITS; + // The `id` uniquely represents each type, so type equality is just a + // comparison of the ids. For basic types the `id` is just the `ValueType` + // enum value below, and for constructed types the `id` is the address of the + // canonical representation of the type, making lookups cheap for all types. + uintptr_t id; void init(const std::vector<Type>&); public: @@ -44,20 +44,16 @@ public: anyref, nullref, exnref, + _last_value_type = exnref }; -private: - // Not in the enum because we don't want to have to have a case for it - static constexpr uint32_t last_value_type = exnref; - -public: Type() = default; // ValueType can be implicitly upgraded to Type - constexpr Type(ValueType id) : id(id), _size(id == none ? 0 : 1){}; + constexpr Type(ValueType id) : id(id){}; // But converting raw uint32_t is more dangerous, so make it explicit - explicit Type(uint32_t id); + explicit Type(uint64_t id) : id(id){}; // Construct from lists of elementary types Type(std::initializer_list<Type> types); @@ -68,8 +64,10 @@ public: const std::vector<Type>& expand() const; // Predicates - constexpr bool isSingle() const { return id >= i32 && id <= last_value_type; } - constexpr bool isMulti() const { return id > last_value_type; } + constexpr bool isSingle() const { + return id >= i32 && id <= _last_value_type; + } + constexpr bool isMulti() const { return id > _last_value_type; } constexpr bool isConcrete() const { return id >= i32; } constexpr bool isInteger() const { return id == i32 || id == i64; } constexpr bool isFloat() const { return id == f32 || id == f64; } @@ -91,7 +89,7 @@ public: bool hasVector() { return hasPredicate<&Type::isVector>(); } bool hasRef() { return hasPredicate<&Type::isRef>(); } - constexpr uint32_t getID() const { return id; } + constexpr uint64_t getID() const { return id; } constexpr ValueType getSingle() const { assert(!isMulti() && "Unexpected multivalue type"); return static_cast<ValueType>(id); diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index 30bd74e6a..624a42182 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include <array> #include <cassert> #include <shared_mutex> #include <sstream> @@ -27,7 +28,7 @@ template<> class std::hash<std::vector<wasm::Type>> { public: size_t operator()(const std::vector<wasm::Type>& types) const { - uint32_t res = wasm::rehash(0, uint32_t(types.size())); + uint64_t res = wasm::rehash(0, uint32_t(types.size())); for (auto t : types) { res = wasm::rehash(res, t.getID()); } @@ -36,44 +37,39 @@ public: }; size_t std::hash<wasm::Type>::operator()(const wasm::Type& type) const { - return std::hash<uint32_t>{}(type.getID()); + return std::hash<uint64_t>{}(type.getID()); } size_t std::hash<wasm::Signature>:: operator()(const wasm::Signature& sig) const { - return std::hash<uint64_t>{}(uint64_t(sig.params.getID()) << 32 | - uint64_t(sig.results.getID())); + return wasm::rehash(uint64_t(std::hash<uint64_t>{}(sig.params.getID())), + uint64_t(std::hash<uint64_t>{}(sig.results.getID()))); } namespace wasm { namespace { -// TODO: switch to std::shared_mutex in C++17 -std::shared_timed_mutex mutex; - -std::vector<std::unique_ptr<std::vector<Type>>> typeLists = [] { - std::vector<std::unique_ptr<std::vector<Type>>> lists; - - auto add = [&](std::initializer_list<Type> types) { - return lists.push_back(std::make_unique<std::vector<Type>>(types)); - }; - - add({}); - add({Type::unreachable}); - add({Type::i32}); - add({Type::i64}); - add({Type::f32}); - add({Type::f64}); - add({Type::v128}); - add({Type::funcref}); - add({Type::anyref}); - add({Type::nullref}); - add({Type::exnref}); - return lists; -}(); - -std::unordered_map<std::vector<Type>, uint32_t> indices = { +std::mutex mutex; + +std::array<std::vector<Type>, Type::_last_value_type + 1> basicTypes = { + std::vector<Type>{}, + {Type::unreachable}, + {Type::i32}, + {Type::i64}, + {Type::f32}, + {Type::f64}, + {Type::v128}, + {Type::funcref}, + {Type::anyref}, + {Type::nullref}, + {Type::exnref}}; + +// Track unique_ptrs for constructed types to avoid leaks +std::vector<std::unique_ptr<std::vector<Type>>> constructedTypes; + +// Maps from type vectors to the canonical Type ID +std::unordered_map<std::vector<Type>, uintptr_t> indices = { {{}, Type::none}, {{Type::unreachable}, Type::unreachable}, {{Type::i32}, Type::i32}, @@ -96,39 +92,25 @@ void Type::init(const std::vector<Type>& types) { } #endif - if (types.size() >= (1 << SIZE_BITS)) { - WASM_UNREACHABLE("Type too large"); + if (types.size() == 0) { + id = none; + return; } - _size = types.size(); - - auto lookup = [&]() { - auto indexIt = indices.find(types); - if (indexIt != indices.end()) { - id = indexIt->second; - return true; - } else { - return false; - } - }; - - { - // Try to look up previously interned type - std::shared_lock<std::shared_timed_mutex> lock(mutex); - if (lookup()) { - return; - } + if (types.size() == 1) { + *this = types[0]; + return; } - { - // Add a new type if it hasn't been added concurrently - std::lock_guard<std::shared_timed_mutex> lock(mutex); - if (lookup()) { - return; - } - if (typeLists.size() >= (1 << ID_BITS)) { - WASM_UNREACHABLE("Too many types!"); - } - id = typeLists.size(); - typeLists.push_back(std::make_unique<std::vector<Type>>(types)); + + // Add a new type if it hasn't been added concurrently + std::lock_guard<std::mutex> lock(mutex); + auto indexIt = indices.find(types); + if (indexIt != indices.end()) { + id = indexIt->second; + } else { + auto vec = std::make_unique<std::vector<Type>>(types); + id = uintptr_t(vec.get()); + constructedTypes.push_back(std::move(vec)); + assert(id > _last_value_type); indices[types] = id; } } @@ -137,23 +119,14 @@ Type::Type(std::initializer_list<Type> types) { init(types); } Type::Type(const std::vector<Type>& types) { init(types); } -Type::Type(uint32_t _id) { - if (_id <= last_value_type) { - *this = Type(static_cast<ValueType>(_id)); - } else { - id = _id; - // Unknown complex type; look up the size - std::shared_lock<std::shared_timed_mutex> lock(mutex); - _size = typeLists[id]->size(); - } -} - -size_t Type::size() const { return _size; } +size_t Type::size() const { return expand().size(); } const std::vector<Type>& Type::expand() const { - std::shared_lock<std::shared_timed_mutex> lock(mutex); - assert(id < typeLists.size()); - return *typeLists[id].get(); + if (id <= _last_value_type) { + return basicTypes[id]; + } else { + return *(std::vector<Type>*)id; + } } bool Type::operator<(const Type& other) const { |