summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorShravan Narayan <shravanrn@gmail.com>2024-06-26 11:30:44 -0500
committerGitHub <noreply@github.com>2024-06-26 09:30:44 -0700
commit0e871afa4aaac9fe0b1f00cb42a59be666657a06 (patch)
tree22c449953033d0ea98200d9117c11419054a762e /src
parentf820d171654de2dcb8cbf7078b4c98336c8e3c69 (diff)
downloadwabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.gz
wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.bz2
wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.zip
wasm2c: Segue optimization for modules with a single unshared memory (#2395)
Diffstat (limited to 'src')
-rw-r--r--src/c-writer.cc64
-rw-r--r--src/prebuilt/wasm2c_source_declarations.cc121
-rw-r--r--src/template/wasm2c.declarations.c80
3 files changed, 233 insertions, 32 deletions
diff --git a/src/c-writer.cc b/src/c-writer.cc
index cd0ee1fd..1a091957 100644
--- a/src/c-writer.cc
+++ b/src/c-writer.cc
@@ -308,6 +308,7 @@ class CWriter {
void Indent(int size = INDENT_SIZE);
void Dedent(int size = INDENT_SIZE);
+ void NonIndented(std::function<void()> func);
void WriteIndent();
void WriteData(const char* src, size_t size);
void Writef(const char* format, ...);
@@ -402,6 +403,9 @@ class CWriter {
void WriteElemInitializerDecls();
void WriteElemInitializers();
void WriteElemTableInit(bool, const ElemSegment*, const Table*);
+ bool IsSingleUnsharedMemory();
+ void InstallSegueBase(Memory* memory, bool save_old_value);
+ void RestoreSegueBase();
void WriteExports(CWriterPhase);
void WriteTailCallExports(CWriterPhase);
void WriteInitDecl();
@@ -1021,6 +1025,13 @@ void CWriter::Dedent(int size) {
assert(indent_ >= 0);
}
+void CWriter::NonIndented(std::function<void()> func) {
+ int copy = indent_;
+ indent_ = 0;
+ func();
+ indent_ = copy;
+}
+
void CWriter::WriteIndent() {
static char s_indent[] =
" "
@@ -1479,6 +1490,11 @@ std::string CWriter::GenerateHeaderGuard() const {
void CWriter::WriteSourceTop() {
Write(s_source_includes);
Write(Newline(), "#include \"", header_name_, "\"", Newline());
+
+ if (IsSingleUnsharedMemory()) {
+ Write("#define IS_SINGLE_UNSHARED_MEMORY 1", Newline());
+ }
+
Write(s_source_declarations, Newline());
if (module_->features_used.simd) {
@@ -2425,6 +2441,28 @@ void CWriter::WriteElemTableInit(bool active_initialization,
Write(");", Newline());
}
+bool CWriter::IsSingleUnsharedMemory() {
+ return module_->memories.size() == 1 &&
+ !module_->memories[0]->page_limits.is_shared;
+}
+
+void CWriter::InstallSegueBase(Memory* memory, bool save_old_value) {
+ NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); });
+ if (save_old_value) {
+ Write("uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();", Newline());
+ }
+ auto primary_memory =
+ ExternalInstanceRef(ModuleFieldType::Memory, memory->name);
+ Write("WASM_RT_SEGUE_WRITE_BASE(", primary_memory, ".data);", Newline());
+ NonIndented([&] { Write("#endif", Newline()); });
+}
+
+void CWriter::RestoreSegueBase() {
+ NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); });
+ Write("WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);", Newline());
+ NonIndented([&] { Write("#endif", Newline()); });
+}
+
void CWriter::WriteExports(CWriterPhase kind) {
if (module_->exports.empty())
return;
@@ -2500,8 +2538,14 @@ void CWriter::WriteExports(CWriterPhase kind) {
switch (export_->kind) {
case ExternalKind::Func: {
Write(OpenBrace());
- if (func_->GetNumResults() > 0) {
- Write("return ");
+ if (IsSingleUnsharedMemory()) {
+ InstallSegueBase(module_->memories[0], true /* save_old_value */);
+ }
+ auto num_results = func_->GetNumResults();
+ if (num_results > 1) {
+ Write(func_->decl.sig.result_types, " ret = ");
+ } else if (num_results == 1) {
+ Write(func_->GetResultType(0), " ret = ");
}
Write(ExternalRef(ModuleFieldType::Func, internal_name), "(");
@@ -2513,6 +2557,12 @@ void CWriter::WriteExports(CWriterPhase kind) {
Write("instance");
}
WriteParamSymbols(index_to_name);
+ if (IsSingleUnsharedMemory()) {
+ RestoreSegueBase();
+ }
+ if (num_results > 0) {
+ Write("return ret;", Newline());
+ }
Write(CloseBrace(), Newline());
local_sym_map_.clear();
@@ -2611,6 +2661,9 @@ void CWriter::WriteInit() {
}
if (!module_->memories.empty()) {
Write("init_memories(instance);", Newline());
+ if (IsSingleUnsharedMemory()) {
+ InstallSegueBase(module_->memories[0], true /* save_old_value */);
+ }
}
if (!module_->tables.empty() && !module_->elem_segments.empty()) {
Write("init_elem_instances(instance);", Newline());
@@ -2631,6 +2684,10 @@ void CWriter::WriteInit() {
}
Write(Newline());
}
+
+ if (IsSingleUnsharedMemory()) {
+ RestoreSegueBase();
+ }
Write(CloseBrace(), Newline());
}
@@ -3733,6 +3790,9 @@ void CWriter::Write(const ExprList& exprs) {
Write(StackVar(0), " = ", func, "(",
ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", ",
StackVar(0), ");", Newline());
+ if (IsSingleUnsharedMemory()) {
+ InstallSegueBase(module_->memories[0], false /* save_old_value */);
+ }
break;
}
diff --git a/src/prebuilt/wasm2c_source_declarations.cc b/src/prebuilt/wasm2c_source_declarations.cc
index ac0629e4..11aac524 100644
--- a/src/prebuilt/wasm2c_source_declarations.cc
+++ b/src/prebuilt/wasm2c_source_declarations.cc
@@ -40,6 +40,70 @@ R"w2c_template(#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
+#ifndef WASM_RT_USE_SEGUE
+)w2c_template"
+R"w2c_template(// Memory functions can use the segue optimization if allowed. The segue
+)w2c_template"
+R"w2c_template(// optimization uses x86 segments to point to a linear memory. We use this
+)w2c_template"
+R"w2c_template(// optimization when:
+)w2c_template"
+R"w2c_template(//
+)w2c_template"
+R"w2c_template(// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+)w2c_template"
+R"w2c_template(// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+)w2c_template"
+R"w2c_template(// (3) the Wasm module uses a single unshared imported or exported memory
+)w2c_template"
+R"w2c_template(// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+)w2c_template"
+R"w2c_template(// for accessing pointers, and supports memcpy on pointers with custom
+)w2c_template"
+R"w2c_template(// "address namespaces". GCC does not support the memcpy requirement, so
+)w2c_template"
+R"w2c_template(// this leaves only clang for now.
+)w2c_template"
+R"w2c_template(// (5) The OS doesn't replace the segment register on context switch which
+)w2c_template"
+R"w2c_template(// eliminates windows for now
+)w2c_template"
+R"w2c_template(#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+)w2c_template"
+R"w2c_template( (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+)w2c_template"
+R"w2c_template( __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+)w2c_template"
+R"w2c_template(#define WASM_RT_USE_SEGUE 1
+)w2c_template"
+R"w2c_template(#else
+)w2c_template"
+R"w2c_template(#define WASM_RT_USE_SEGUE 0
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(
+#if WASM_RT_USE_SEGUE
+)w2c_template"
+R"w2c_template(// POSIX uses FS for TLS, GS is free
+)w2c_template"
+R"w2c_template(#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+)w2c_template"
+R"w2c_template(#define WASM_RT_SEGUE_WRITE_BASE(base) \
+)w2c_template"
+R"w2c_template( __builtin_ia32_wrgsbase64((uintptr_t)base)
+)w2c_template"
+R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+)w2c_template"
+R"w2c_template(#else
+)w2c_template"
+R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
)w2c_template"
R"w2c_template(
@@ -124,13 +188,38 @@ R"w2c_template( TRAP(OOB);
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+)w2c_template"
+R"w2c_template(#include <stdio.h>
+)w2c_template"
+R"w2c_template(#define WASM_RT_CHECK_BASE(mem) \
+)w2c_template"
+R"w2c_template( if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+)w2c_template"
+R"w2c_template( puts("Segment register mismatch\n"); \
+)w2c_template"
+R"w2c_template( abort(); \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(#else
+)w2c_template"
+R"w2c_template(#define WASM_RT_CHECK_BASE(mem)
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(
#if WASM_RT_MEMCHECK_GUARD_PAGES
)w2c_template"
-R"w2c_template(#define MEMCHECK(mem, a, t)
+R"w2c_template(#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
)w2c_template"
R"w2c_template(#else
)w2c_template"
-R"w2c_template(#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+R"w2c_template(#define MEMCHECK(mem, a, t) \
+)w2c_template"
+R"w2c_template( WASM_RT_CHECK_BASE(mem); \
+)w2c_template"
+R"w2c_template( RANGE_CHECK(mem, a, sizeof(t))
)w2c_template"
R"w2c_template(#endif
)w2c_template"
@@ -204,32 +293,36 @@ R"w2c_template( load_data(MEM_ADDR(&m, o, s), i, s); \
R"w2c_template( } while (0)
)w2c_template"
R"w2c_template(
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+)w2c_template"
+R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
)w2c_template"
-R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+R"w2c_template( MEMCHECK(mem, addr, t1); \
)w2c_template"
-R"w2c_template( MEMCHECK(mem, addr, t1); \
+R"w2c_template( t1 result; \
)w2c_template"
-R"w2c_template( t1 result; \
+R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
)w2c_template"
-R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
+R"w2c_template( sizeof(t1)); \
)w2c_template"
-R"w2c_template( force_read(result); \
+R"w2c_template( force_read(result); \
)w2c_template"
-R"w2c_template( return (t3)(t2)result; \
+R"w2c_template( return (t3)(t2)result; \
)w2c_template"
R"w2c_template( }
)w2c_template"
R"w2c_template(
-#define DEFINE_STORE(name, t1, t2) \
+#define DEFINE_STORE(name, t1, t2) \
+)w2c_template"
+R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
)w2c_template"
-R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+R"w2c_template( MEMCHECK(mem, addr, t1); \
)w2c_template"
-R"w2c_template( MEMCHECK(mem, addr, t1); \
+R"w2c_template( t1 wrapped = (t1)value; \
)w2c_template"
-R"w2c_template( t1 wrapped = (t1)value; \
+R"w2c_template( wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
)w2c_template"
-R"w2c_template( wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+R"w2c_template( sizeof(t1)); \
)w2c_template"
R"w2c_template( }
)w2c_template"
diff --git a/src/template/wasm2c.declarations.c b/src/template/wasm2c.declarations.c
index 6399affe..5261a25b 100644
--- a/src/template/wasm2c.declarations.c
+++ b/src/template/wasm2c.declarations.c
@@ -20,6 +20,39 @@
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -67,10 +100,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -109,20 +155,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
- }
-
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
+ }
+
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)