diff options
author | Shravan Narayan <shravanrn@gmail.com> | 2024-06-26 11:30:44 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-26 09:30:44 -0700 |
commit | 0e871afa4aaac9fe0b1f00cb42a59be666657a06 (patch) | |
tree | 22c449953033d0ea98200d9117c11419054a762e /src | |
parent | f820d171654de2dcb8cbf7078b4c98336c8e3c69 (diff) | |
download | wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.gz wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.bz2 wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.zip |
wasm2c: Segue optimization for modules with a single unshared memory (#2395)
Diffstat (limited to 'src')
-rw-r--r-- | src/c-writer.cc | 64 | ||||
-rw-r--r-- | src/prebuilt/wasm2c_source_declarations.cc | 121 | ||||
-rw-r--r-- | src/template/wasm2c.declarations.c | 80 |
3 files changed, 233 insertions, 32 deletions
diff --git a/src/c-writer.cc b/src/c-writer.cc index cd0ee1fd..1a091957 100644 --- a/src/c-writer.cc +++ b/src/c-writer.cc @@ -308,6 +308,7 @@ class CWriter { void Indent(int size = INDENT_SIZE); void Dedent(int size = INDENT_SIZE); + void NonIndented(std::function<void()> func); void WriteIndent(); void WriteData(const char* src, size_t size); void Writef(const char* format, ...); @@ -402,6 +403,9 @@ class CWriter { void WriteElemInitializerDecls(); void WriteElemInitializers(); void WriteElemTableInit(bool, const ElemSegment*, const Table*); + bool IsSingleUnsharedMemory(); + void InstallSegueBase(Memory* memory, bool save_old_value); + void RestoreSegueBase(); void WriteExports(CWriterPhase); void WriteTailCallExports(CWriterPhase); void WriteInitDecl(); @@ -1021,6 +1025,13 @@ void CWriter::Dedent(int size) { assert(indent_ >= 0); } +void CWriter::NonIndented(std::function<void()> func) { + int copy = indent_; + indent_ = 0; + func(); + indent_ = copy; +} + void CWriter::WriteIndent() { static char s_indent[] = " " @@ -1479,6 +1490,11 @@ std::string CWriter::GenerateHeaderGuard() const { void CWriter::WriteSourceTop() { Write(s_source_includes); Write(Newline(), "#include \"", header_name_, "\"", Newline()); + + if (IsSingleUnsharedMemory()) { + Write("#define IS_SINGLE_UNSHARED_MEMORY 1", Newline()); + } + Write(s_source_declarations, Newline()); if (module_->features_used.simd) { @@ -2425,6 +2441,28 @@ void CWriter::WriteElemTableInit(bool active_initialization, Write(");", Newline()); } +bool CWriter::IsSingleUnsharedMemory() { + return module_->memories.size() == 1 && + !module_->memories[0]->page_limits.is_shared; +} + +void CWriter::InstallSegueBase(Memory* memory, bool save_old_value) { + NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); }); + if (save_old_value) { + Write("uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();", Newline()); + } + auto primary_memory = + ExternalInstanceRef(ModuleFieldType::Memory, memory->name); + Write("WASM_RT_SEGUE_WRITE_BASE(", primary_memory, ".data);", Newline()); + NonIndented([&] { Write("#endif", Newline()); }); +} + +void CWriter::RestoreSegueBase() { + NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); }); + Write("WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);", Newline()); + NonIndented([&] { Write("#endif", Newline()); }); +} + void CWriter::WriteExports(CWriterPhase kind) { if (module_->exports.empty()) return; @@ -2500,8 +2538,14 @@ void CWriter::WriteExports(CWriterPhase kind) { switch (export_->kind) { case ExternalKind::Func: { Write(OpenBrace()); - if (func_->GetNumResults() > 0) { - Write("return "); + if (IsSingleUnsharedMemory()) { + InstallSegueBase(module_->memories[0], true /* save_old_value */); + } + auto num_results = func_->GetNumResults(); + if (num_results > 1) { + Write(func_->decl.sig.result_types, " ret = "); + } else if (num_results == 1) { + Write(func_->GetResultType(0), " ret = "); } Write(ExternalRef(ModuleFieldType::Func, internal_name), "("); @@ -2513,6 +2557,12 @@ void CWriter::WriteExports(CWriterPhase kind) { Write("instance"); } WriteParamSymbols(index_to_name); + if (IsSingleUnsharedMemory()) { + RestoreSegueBase(); + } + if (num_results > 0) { + Write("return ret;", Newline()); + } Write(CloseBrace(), Newline()); local_sym_map_.clear(); @@ -2611,6 +2661,9 @@ void CWriter::WriteInit() { } if (!module_->memories.empty()) { Write("init_memories(instance);", Newline()); + if (IsSingleUnsharedMemory()) { + InstallSegueBase(module_->memories[0], true /* save_old_value */); + } } if (!module_->tables.empty() && !module_->elem_segments.empty()) { Write("init_elem_instances(instance);", Newline()); @@ -2631,6 +2684,10 @@ void CWriter::WriteInit() { } Write(Newline()); } + + if (IsSingleUnsharedMemory()) { + RestoreSegueBase(); + } Write(CloseBrace(), Newline()); } @@ -3733,6 +3790,9 @@ void CWriter::Write(const ExprList& exprs) { Write(StackVar(0), " = ", func, "(", ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", ", StackVar(0), ");", Newline()); + if (IsSingleUnsharedMemory()) { + InstallSegueBase(module_->memories[0], false /* save_old_value */); + } break; } diff --git a/src/prebuilt/wasm2c_source_declarations.cc b/src/prebuilt/wasm2c_source_declarations.cc index ac0629e4..11aac524 100644 --- a/src/prebuilt/wasm2c_source_declarations.cc +++ b/src/prebuilt/wasm2c_source_declarations.cc @@ -40,6 +40,70 @@ R"w2c_template(#define MEM_ADDR(mem, addr, n) &(mem)->data[addr] R"w2c_template(#endif )w2c_template" R"w2c_template( +#ifndef WASM_RT_USE_SEGUE +)w2c_template" +R"w2c_template(// Memory functions can use the segue optimization if allowed. The segue +)w2c_template" +R"w2c_template(// optimization uses x86 segments to point to a linear memory. We use this +)w2c_template" +R"w2c_template(// optimization when: +)w2c_template" +R"w2c_template(// +)w2c_template" +R"w2c_template(// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +)w2c_template" +R"w2c_template(// (2) on x86_64 without WABT_BIG_ENDIAN enabled +)w2c_template" +R"w2c_template(// (3) the Wasm module uses a single unshared imported or exported memory +)w2c_template" +R"w2c_template(// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +)w2c_template" +R"w2c_template(// for accessing pointers, and supports memcpy on pointers with custom +)w2c_template" +R"w2c_template(// "address namespaces". GCC does not support the memcpy requirement, so +)w2c_template" +R"w2c_template(// this leaves only clang for now. +)w2c_template" +R"w2c_template(// (5) The OS doesn't replace the segment register on context switch which +)w2c_template" +R"w2c_template(// eliminates windows for now +)w2c_template" +R"w2c_template(#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ +)w2c_template" +R"w2c_template( (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ +)w2c_template" +R"w2c_template( __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +)w2c_template" +R"w2c_template(#define WASM_RT_USE_SEGUE 1 +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define WASM_RT_USE_SEGUE 0 +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( +#if WASM_RT_USE_SEGUE +)w2c_template" +R"w2c_template(// POSIX uses FS for TLS, GS is free +)w2c_template" +R"w2c_template(#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +)w2c_template" +R"w2c_template(#define WASM_RT_SEGUE_WRITE_BASE(base) \ +)w2c_template" +R"w2c_template( __builtin_ia32_wrgsbase64((uintptr_t)base) +)w2c_template" +R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) )w2c_template" R"w2c_template( @@ -124,13 +188,38 @@ R"w2c_template( TRAP(OOB); R"w2c_template(#endif )w2c_template" R"w2c_template( +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +)w2c_template" +R"w2c_template(#include <stdio.h> +)w2c_template" +R"w2c_template(#define WASM_RT_CHECK_BASE(mem) \ +)w2c_template" +R"w2c_template( if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ +)w2c_template" +R"w2c_template( puts("Segment register mismatch\n"); \ +)w2c_template" +R"w2c_template( abort(); \ +)w2c_template" +R"w2c_template( } +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define WASM_RT_CHECK_BASE(mem) +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( #if WASM_RT_MEMCHECK_GUARD_PAGES )w2c_template" -R"w2c_template(#define MEMCHECK(mem, a, t) +R"w2c_template(#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); )w2c_template" R"w2c_template(#else )w2c_template" -R"w2c_template(#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +R"w2c_template(#define MEMCHECK(mem, a, t) \ +)w2c_template" +R"w2c_template( WASM_RT_CHECK_BASE(mem); \ +)w2c_template" +R"w2c_template( RANGE_CHECK(mem, a, sizeof(t)) )w2c_template" R"w2c_template(#endif )w2c_template" @@ -204,32 +293,36 @@ R"w2c_template( load_data(MEM_ADDR(&m, o, s), i, s); \ R"w2c_template( } while (0) )w2c_template" R"w2c_template( -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ +)w2c_template" +R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ )w2c_template" -R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ +R"w2c_template( MEMCHECK(mem, addr, t1); \ )w2c_template" -R"w2c_template( MEMCHECK(mem, addr, t1); \ +R"w2c_template( t1 result; \ )w2c_template" -R"w2c_template( t1 result; \ +R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ )w2c_template" -R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ +R"w2c_template( sizeof(t1)); \ )w2c_template" -R"w2c_template( force_read(result); \ +R"w2c_template( force_read(result); \ )w2c_template" -R"w2c_template( return (t3)(t2)result; \ +R"w2c_template( return (t3)(t2)result; \ )w2c_template" R"w2c_template( } )w2c_template" R"w2c_template( -#define DEFINE_STORE(name, t1, t2) \ +#define DEFINE_STORE(name, t1, t2) \ +)w2c_template" +R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ )w2c_template" -R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ +R"w2c_template( MEMCHECK(mem, addr, t1); \ )w2c_template" -R"w2c_template( MEMCHECK(mem, addr, t1); \ +R"w2c_template( t1 wrapped = (t1)value; \ )w2c_template" -R"w2c_template( t1 wrapped = (t1)value; \ +R"w2c_template( wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ )w2c_template" -R"w2c_template( wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +R"w2c_template( sizeof(t1)); \ )w2c_template" R"w2c_template( } )w2c_template" diff --git a/src/template/wasm2c.declarations.c b/src/template/wasm2c.declarations.c index 6399affe..5261a25b 100644 --- a/src/template/wasm2c.declarations.c +++ b/src/template/wasm2c.declarations.c @@ -20,6 +20,39 @@ #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -67,10 +100,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -109,20 +155,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ - } - -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ + } + +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) |