diff options
author | Alon Zakai <alonzakai@gmail.com> | 2016-04-24 09:09:04 -0700 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2016-04-24 09:09:04 -0700 |
commit | 2cc2b8288f0179b8e506d420dd27fada5652e0c3 (patch) | |
tree | 3a80059fae25fe88bb784e3737ac6114198a9c24 /src/mixed_arena.h | |
parent | e9349f082af7d3057aa475a76a58cba7adac2b21 (diff) | |
download | binaryen-2cc2b8288f0179b8e506d420dd27fada5652e0c3.tar.gz binaryen-2cc2b8288f0179b8e506d420dd27fada5652e0c3.tar.bz2 binaryen-2cc2b8288f0179b8e506d420dd27fada5652e0c3.zip |
allow allocations on side threads (#365)
Diffstat (limited to 'src/mixed_arena.h')
-rw-r--r-- | src/mixed_arena.h | 74 |
1 files changed, 67 insertions, 7 deletions
diff --git a/src/mixed_arena.h b/src/mixed_arena.h index d03612df5..89c47485c 100644 --- a/src/mixed_arena.h +++ b/src/mixed_arena.h @@ -17,24 +17,82 @@ #ifndef wasm_mixed_arena_h #define wasm_mixed_arena_h +#include <atomic> #include <cassert> +#include <memory> +#include <mutex> +#include <thread> #include <vector> -#include "support/threads.h" - // // Arena allocation for mixed-type data. // +// Arena-style bump allocation is important for two reasons: First, so that +// allocation is quick, and second, so that allocated items are close together, +// which is cache-friendy. Arena allocation is also useful for a minor third +// reason which is to make freeing all the items in an arena very quick. +// +// Each WebAssembly Module has an arena allocator, which should be used +// for all of its AST nodes and so forth. When the Module is destroyed, the +// entire arena is cleaned up. +// +// When allocating an object in an arena, the object's proper constructor +// is called. Note that destructors are not called, because to make the +// arena simple and fast we do not track internal allocations inside it +// (and we can also avoid the need for virtual destructors). +// +// In general, optimization passes avoid allocation as much as possible. +// Many passes only remove or modify nodes anyhow, others can often +// reuse nodes that are being optimized out. This keeps things +// cache-friendly, and also makes the operations trivially thread-safe. +// In the rare case that a pass does need to allocate, and it is a +// parallel pass (so multiple threads might access the allocator), +// the MixedArena instance will notice if it is on a different thread +// than that arena's original thread, and will perform the allocation +// in a side arena for that other thread. This is done in a transparent +// way to the outside; as a result, it is always safe to allocate using +// a MixedArena, no matter which thread you are on. Allocations will +// of course be fastest on the original thread for the arena. +// struct MixedArena { + // fast bump allocation std::vector<char*> chunks; int index; // in last chunk + // multithreaded allocation - each arena is valid on a specific thread. + // if we are on the wrong thread, we safely look in the linked + // list of next, adding an allocator if necessary + // TODO: we don't really need locking here, atomics could suffice + std::thread::id threadId; + std::mutex mutex; + std::atomic<MixedArena*> next; + + MixedArena() { + threadId = std::this_thread::get_id(); + next.store(nullptr); + } + template<class T> T* alloc() { - // this structure should not be modified by multiple threads at once. - assert(!wasm::ThreadPool::isRunning()); - + // the bump allocator data should not be modified by multiple threads at once. + if (std::this_thread::get_id() != threadId) { + // use a double-checked locking pattern. + MixedArena* temp = next.load(std::memory_order_relaxed); + std::atomic_thread_fence(std::memory_order_acquire); + if (!temp) { + // prepare to carefully add a new arena, with our thread id, as the next + std::lock_guard<std::mutex> lock(mutex); + // note that we may have waited on the lock, while someone was creating next + temp = next.load(std::memory_order_relaxed); + if (!temp) { + temp = new MixedArena(); + std::atomic_thread_fence(std::memory_order_release); + next.store(temp, std::memory_order_relaxed); + } + } + return temp->alloc<T>(); + } const size_t CHUNK = 10000; size_t currSize = (sizeof(T) + 7) & (-8); // same alignment as malloc TODO optimize? assert(currSize < CHUNK); @@ -49,8 +107,10 @@ struct MixedArena { } void clear() { - assert(!wasm::ThreadPool::isRunning()); - + std::lock_guard<std::mutex> lock(mutex); + if (next.load()) { + next.load()->clear(); + } for (char* chunk : chunks) { delete[] chunk; } |