summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShravan Narayan <shravanrn@gmail.com>2024-06-26 11:30:44 -0500
committerGitHub <noreply@github.com>2024-06-26 09:30:44 -0700
commit0e871afa4aaac9fe0b1f00cb42a59be666657a06 (patch)
tree22c449953033d0ea98200d9117c11419054a762e
parentf820d171654de2dcb8cbf7078b4c98336c8e3c69 (diff)
downloadwabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.gz
wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.bz2
wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.zip
wasm2c: Segue optimization for modules with a single unshared memory (#2395)
-rw-r--r--.github/workflows/build.yml3
-rw-r--r--src/c-writer.cc64
-rw-r--r--src/prebuilt/wasm2c_source_declarations.cc121
-rw-r--r--src/template/wasm2c.declarations.c80
-rw-r--r--test/wasm2c/add.txt79
-rw-r--r--test/wasm2c/check-imports.txt84
-rw-r--r--test/wasm2c/export-names.txt119
-rw-r--r--test/wasm2c/hello.txt91
-rw-r--r--test/wasm2c/minimal.txt80
-rw-r--r--test/wasm2c/tail-calls.txt80
-rw-r--r--wasm2c/README.md39
-rw-r--r--wasm2c/benchmarks/dhrystone/.gitignore5
-rw-r--r--wasm2c/benchmarks/dhrystone/Makefile38
-rwxr-xr-xwasm2c/benchmarks/dhrystone/dhrystone.wasmbin0 -> 33999 bytes
-rw-r--r--wasm2c/benchmarks/dhrystone/main.c265
-rw-r--r--wasm2c/benchmarks/dhrystone/src/README.md23
-rw-r--r--wasm2c/benchmarks/dhrystone/src/dhry.h306
-rw-r--r--wasm2c/benchmarks/dhrystone/src/dhry_1.c485
-rw-r--r--wasm2c/benchmarks/dhrystone/src/dhry_2.c187
-rw-r--r--wasm2c/examples/fac/fac.c71
-rw-r--r--wasm2c/wasm-rt.h20
21 files changed, 2105 insertions, 135 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 21604429..5932a2c4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -177,7 +177,8 @@ jobs:
runs-on: ubuntu-latest
env:
USE_NINJA: "1"
- WASM2C_CFLAGS: "-DWASM_RT_USE_MMAP=1 -DWASM_RT_SKIP_SIGNAL_RECOVERY=1 -DWASM_RT_NONCONFORMING_UNCHECKED_STACK_EXHAUSTION=1 -DWASM2C_TEST_EMBEDDER_SIGNAL_HANDLING"
+ WASM2C_CC: "clang"
+ WASM2C_CFLAGS: "-DWASM_RT_USE_MMAP=1 -DWASM_RT_SKIP_SIGNAL_RECOVERY=1 -DWASM_RT_NONCONFORMING_UNCHECKED_STACK_EXHAUSTION=1 -DWASM2C_TEST_EMBEDDER_SIGNAL_HANDLING -DWASM_RT_ALLOW_SEGUE=1 -mfsgsbase -DWASM_RT_SANITY_CHECKS=1 -Wno-pass-failed"
steps:
- uses: actions/setup-python@v1
with:
diff --git a/src/c-writer.cc b/src/c-writer.cc
index cd0ee1fd..1a091957 100644
--- a/src/c-writer.cc
+++ b/src/c-writer.cc
@@ -308,6 +308,7 @@ class CWriter {
void Indent(int size = INDENT_SIZE);
void Dedent(int size = INDENT_SIZE);
+ void NonIndented(std::function<void()> func);
void WriteIndent();
void WriteData(const char* src, size_t size);
void Writef(const char* format, ...);
@@ -402,6 +403,9 @@ class CWriter {
void WriteElemInitializerDecls();
void WriteElemInitializers();
void WriteElemTableInit(bool, const ElemSegment*, const Table*);
+ bool IsSingleUnsharedMemory();
+ void InstallSegueBase(Memory* memory, bool save_old_value);
+ void RestoreSegueBase();
void WriteExports(CWriterPhase);
void WriteTailCallExports(CWriterPhase);
void WriteInitDecl();
@@ -1021,6 +1025,13 @@ void CWriter::Dedent(int size) {
assert(indent_ >= 0);
}
+void CWriter::NonIndented(std::function<void()> func) {
+ int copy = indent_;
+ indent_ = 0;
+ func();
+ indent_ = copy;
+}
+
void CWriter::WriteIndent() {
static char s_indent[] =
" "
@@ -1479,6 +1490,11 @@ std::string CWriter::GenerateHeaderGuard() const {
void CWriter::WriteSourceTop() {
Write(s_source_includes);
Write(Newline(), "#include \"", header_name_, "\"", Newline());
+
+ if (IsSingleUnsharedMemory()) {
+ Write("#define IS_SINGLE_UNSHARED_MEMORY 1", Newline());
+ }
+
Write(s_source_declarations, Newline());
if (module_->features_used.simd) {
@@ -2425,6 +2441,28 @@ void CWriter::WriteElemTableInit(bool active_initialization,
Write(");", Newline());
}
+bool CWriter::IsSingleUnsharedMemory() {
+ return module_->memories.size() == 1 &&
+ !module_->memories[0]->page_limits.is_shared;
+}
+
+void CWriter::InstallSegueBase(Memory* memory, bool save_old_value) {
+ NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); });
+ if (save_old_value) {
+ Write("uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();", Newline());
+ }
+ auto primary_memory =
+ ExternalInstanceRef(ModuleFieldType::Memory, memory->name);
+ Write("WASM_RT_SEGUE_WRITE_BASE(", primary_memory, ".data);", Newline());
+ NonIndented([&] { Write("#endif", Newline()); });
+}
+
+void CWriter::RestoreSegueBase() {
+ NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); });
+ Write("WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);", Newline());
+ NonIndented([&] { Write("#endif", Newline()); });
+}
+
void CWriter::WriteExports(CWriterPhase kind) {
if (module_->exports.empty())
return;
@@ -2500,8 +2538,14 @@ void CWriter::WriteExports(CWriterPhase kind) {
switch (export_->kind) {
case ExternalKind::Func: {
Write(OpenBrace());
- if (func_->GetNumResults() > 0) {
- Write("return ");
+ if (IsSingleUnsharedMemory()) {
+ InstallSegueBase(module_->memories[0], true /* save_old_value */);
+ }
+ auto num_results = func_->GetNumResults();
+ if (num_results > 1) {
+ Write(func_->decl.sig.result_types, " ret = ");
+ } else if (num_results == 1) {
+ Write(func_->GetResultType(0), " ret = ");
}
Write(ExternalRef(ModuleFieldType::Func, internal_name), "(");
@@ -2513,6 +2557,12 @@ void CWriter::WriteExports(CWriterPhase kind) {
Write("instance");
}
WriteParamSymbols(index_to_name);
+ if (IsSingleUnsharedMemory()) {
+ RestoreSegueBase();
+ }
+ if (num_results > 0) {
+ Write("return ret;", Newline());
+ }
Write(CloseBrace(), Newline());
local_sym_map_.clear();
@@ -2611,6 +2661,9 @@ void CWriter::WriteInit() {
}
if (!module_->memories.empty()) {
Write("init_memories(instance);", Newline());
+ if (IsSingleUnsharedMemory()) {
+ InstallSegueBase(module_->memories[0], true /* save_old_value */);
+ }
}
if (!module_->tables.empty() && !module_->elem_segments.empty()) {
Write("init_elem_instances(instance);", Newline());
@@ -2631,6 +2684,10 @@ void CWriter::WriteInit() {
}
Write(Newline());
}
+
+ if (IsSingleUnsharedMemory()) {
+ RestoreSegueBase();
+ }
Write(CloseBrace(), Newline());
}
@@ -3733,6 +3790,9 @@ void CWriter::Write(const ExprList& exprs) {
Write(StackVar(0), " = ", func, "(",
ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", ",
StackVar(0), ");", Newline());
+ if (IsSingleUnsharedMemory()) {
+ InstallSegueBase(module_->memories[0], false /* save_old_value */);
+ }
break;
}
diff --git a/src/prebuilt/wasm2c_source_declarations.cc b/src/prebuilt/wasm2c_source_declarations.cc
index ac0629e4..11aac524 100644
--- a/src/prebuilt/wasm2c_source_declarations.cc
+++ b/src/prebuilt/wasm2c_source_declarations.cc
@@ -40,6 +40,70 @@ R"w2c_template(#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
+#ifndef WASM_RT_USE_SEGUE
+)w2c_template"
+R"w2c_template(// Memory functions can use the segue optimization if allowed. The segue
+)w2c_template"
+R"w2c_template(// optimization uses x86 segments to point to a linear memory. We use this
+)w2c_template"
+R"w2c_template(// optimization when:
+)w2c_template"
+R"w2c_template(//
+)w2c_template"
+R"w2c_template(// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+)w2c_template"
+R"w2c_template(// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+)w2c_template"
+R"w2c_template(// (3) the Wasm module uses a single unshared imported or exported memory
+)w2c_template"
+R"w2c_template(// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+)w2c_template"
+R"w2c_template(// for accessing pointers, and supports memcpy on pointers with custom
+)w2c_template"
+R"w2c_template(// "address namespaces". GCC does not support the memcpy requirement, so
+)w2c_template"
+R"w2c_template(// this leaves only clang for now.
+)w2c_template"
+R"w2c_template(// (5) The OS doesn't replace the segment register on context switch which
+)w2c_template"
+R"w2c_template(// eliminates windows for now
+)w2c_template"
+R"w2c_template(#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+)w2c_template"
+R"w2c_template( (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+)w2c_template"
+R"w2c_template( __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+)w2c_template"
+R"w2c_template(#define WASM_RT_USE_SEGUE 1
+)w2c_template"
+R"w2c_template(#else
+)w2c_template"
+R"w2c_template(#define WASM_RT_USE_SEGUE 0
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(
+#if WASM_RT_USE_SEGUE
+)w2c_template"
+R"w2c_template(// POSIX uses FS for TLS, GS is free
+)w2c_template"
+R"w2c_template(#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+)w2c_template"
+R"w2c_template(#define WASM_RT_SEGUE_WRITE_BASE(base) \
+)w2c_template"
+R"w2c_template( __builtin_ia32_wrgsbase64((uintptr_t)base)
+)w2c_template"
+R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+)w2c_template"
+R"w2c_template(#else
+)w2c_template"
+R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
)w2c_template"
R"w2c_template(
@@ -124,13 +188,38 @@ R"w2c_template( TRAP(OOB);
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+)w2c_template"
+R"w2c_template(#include <stdio.h>
+)w2c_template"
+R"w2c_template(#define WASM_RT_CHECK_BASE(mem) \
+)w2c_template"
+R"w2c_template( if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+)w2c_template"
+R"w2c_template( puts("Segment register mismatch\n"); \
+)w2c_template"
+R"w2c_template( abort(); \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(#else
+)w2c_template"
+R"w2c_template(#define WASM_RT_CHECK_BASE(mem)
+)w2c_template"
+R"w2c_template(#endif
+)w2c_template"
+R"w2c_template(
#if WASM_RT_MEMCHECK_GUARD_PAGES
)w2c_template"
-R"w2c_template(#define MEMCHECK(mem, a, t)
+R"w2c_template(#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
)w2c_template"
R"w2c_template(#else
)w2c_template"
-R"w2c_template(#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+R"w2c_template(#define MEMCHECK(mem, a, t) \
+)w2c_template"
+R"w2c_template( WASM_RT_CHECK_BASE(mem); \
+)w2c_template"
+R"w2c_template( RANGE_CHECK(mem, a, sizeof(t))
)w2c_template"
R"w2c_template(#endif
)w2c_template"
@@ -204,32 +293,36 @@ R"w2c_template( load_data(MEM_ADDR(&m, o, s), i, s); \
R"w2c_template( } while (0)
)w2c_template"
R"w2c_template(
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+)w2c_template"
+R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
)w2c_template"
-R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+R"w2c_template( MEMCHECK(mem, addr, t1); \
)w2c_template"
-R"w2c_template( MEMCHECK(mem, addr, t1); \
+R"w2c_template( t1 result; \
)w2c_template"
-R"w2c_template( t1 result; \
+R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
)w2c_template"
-R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
+R"w2c_template( sizeof(t1)); \
)w2c_template"
-R"w2c_template( force_read(result); \
+R"w2c_template( force_read(result); \
)w2c_template"
-R"w2c_template( return (t3)(t2)result; \
+R"w2c_template( return (t3)(t2)result; \
)w2c_template"
R"w2c_template( }
)w2c_template"
R"w2c_template(
-#define DEFINE_STORE(name, t1, t2) \
+#define DEFINE_STORE(name, t1, t2) \
+)w2c_template"
+R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
)w2c_template"
-R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+R"w2c_template( MEMCHECK(mem, addr, t1); \
)w2c_template"
-R"w2c_template( MEMCHECK(mem, addr, t1); \
+R"w2c_template( t1 wrapped = (t1)value; \
)w2c_template"
-R"w2c_template( t1 wrapped = (t1)value; \
+R"w2c_template( wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
)w2c_template"
-R"w2c_template( wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+R"w2c_template( sizeof(t1)); \
)w2c_template"
R"w2c_template( }
)w2c_template"
diff --git a/src/template/wasm2c.declarations.c b/src/template/wasm2c.declarations.c
index 6399affe..5261a25b 100644
--- a/src/template/wasm2c.declarations.c
+++ b/src/template/wasm2c.declarations.c
@@ -20,6 +20,39 @@
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -67,10 +100,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -109,20 +155,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
- }
-
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
+ }
+
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
diff --git a/test/wasm2c/add.txt b/test/wasm2c/add.txt
index d250c4c0..ffb27f59 100644
--- a/test/wasm2c/add.txt
+++ b/test/wasm2c/add.txt
@@ -87,6 +87,39 @@ u32 w2c_test_add(w2c_test*, u32, u32);
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -134,10 +167,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -176,20 +222,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
}
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
@@ -704,7 +752,8 @@ FUNC_TYPE_T(w2c_test_t0) = "\x92\xfb\x6a\xdf\x49\x07\x0a\x83\xbe\x08\x02\x68\xcd
/* export: 'add' */
u32 w2c_test_add(w2c_test* instance, u32 var_p0, u32 var_p1) {
- return w2c_test_add_0(instance, var_p0, var_p1);
+ u32 ret = w2c_test_add_0(instance, var_p0, var_p1);
+ return ret;
}
void wasm2c_test_instantiate(w2c_test* instance) {
diff --git a/test/wasm2c/check-imports.txt b/test/wasm2c/check-imports.txt
index 3da1741e..caa5dc24 100644
--- a/test/wasm2c/check-imports.txt
+++ b/test/wasm2c/check-imports.txt
@@ -88,6 +88,7 @@ extern const u8 wasm2c_test_is64_env_0x5F_linear_memory;
#endif
#include "wasm.h"
+#define IS_SINGLE_UNSHARED_MEMORY 1
// Computes a pointer to an object of the given size in a little-endian memory.
//
@@ -110,6 +111,39 @@ extern const u8 wasm2c_test_is64_env_0x5F_linear_memory;
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -157,10 +191,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -199,20 +246,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
}
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
@@ -759,7 +808,14 @@ void wasm2c_test_instantiate(w2c_test* instance, struct w2c_env* w2c_env_instanc
init_instance_import(instance, w2c_env_instance);
init_tables(instance);
init_memories(instance);
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_env_0x5F_linear_memory).data);
+#endif
init_elem_instances(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
void wasm2c_test_free(w2c_test* instance) {
diff --git a/test/wasm2c/export-names.txt b/test/wasm2c/export-names.txt
index ed9772b9..7c4e6eda 100644
--- a/test/wasm2c/export-names.txt
+++ b/test/wasm2c/export-names.txt
@@ -88,6 +88,7 @@ void w2c_test_0xE20x9D0xA40xEF0xB80x8F(w2c_test*);
#endif
#include "wasm.h"
+#define IS_SINGLE_UNSHARED_MEMORY 1
// Computes a pointer to an object of the given size in a little-endian memory.
//
@@ -110,6 +111,39 @@ void w2c_test_0xE20x9D0xA40xEF0xB80x8F(w2c_test*);
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -157,10 +191,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -199,20 +246,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
}
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
@@ -730,27 +779,62 @@ static void init_memories(w2c_test* instance) {
/* export: '' */
void w2c_test_(w2c_test* instance) {
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data);
+#endif
w2c_test__0(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
/* export: '*\2F' */
void w2c_test_0x2A0x2F(w2c_test* instance) {
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data);
+#endif
w2c_test__0(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
/* export: '\3F\3F\2F' */
void w2c_test_0x3F0x3F0x2F(w2c_test* instance) {
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data);
+#endif
w2c_test__0(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
/* export: '\0A' */
void w2c_test_0x0A(w2c_test* instance) {
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data);
+#endif
w2c_test__0(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
/* export: '\E2\9D\A4\EF\B8\8F' */
void w2c_test_0xE20x9D0xA40xEF0xB80x8F(w2c_test* instance) {
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data);
+#endif
w2c_test__0(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
static void init_instance_import(w2c_test* instance, struct w2c_0x5Cmodule* w2c_0x5Cmodule_instance) {
@@ -765,6 +849,13 @@ void wasm2c_test_instantiate(w2c_test* instance, struct w2c_0x5Cmodule* w2c_0x5C
assert(wasm_rt_is_initialized());
init_instance_import(instance, w2c_0x5Cmodule_instance);
init_memories(instance);
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data);
+#endif
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
void wasm2c_test_free(w2c_test* instance) {
diff --git a/test/wasm2c/hello.txt b/test/wasm2c/hello.txt
index d56c7216..77608b6c 100644
--- a/test/wasm2c/hello.txt
+++ b/test/wasm2c/hello.txt
@@ -96,6 +96,7 @@ void w2c_test_0x5Fstart(w2c_test*);
#endif
#include "wasm.h"
+#define IS_SINGLE_UNSHARED_MEMORY 1
// Computes a pointer to an object of the given size in a little-endian memory.
//
@@ -118,6 +119,39 @@ void w2c_test_0x5Fstart(w2c_test*);
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -165,10 +199,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -207,20 +254,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
}
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
@@ -767,7 +816,14 @@ wasm_rt_memory_t* w2c_test_memory(w2c_test* instance) {
/* export: '_start' */
void w2c_test_0x5Fstart(w2c_test* instance) {
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE(instance->w2c_memory.data);
+#endif
w2c_test_0x5Fstart_0(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
static void init_instance_import(w2c_test* instance, struct w2c_wasi__snapshot__preview1* w2c_wasi__snapshot__preview1_instance) {
@@ -779,8 +835,15 @@ void wasm2c_test_instantiate(w2c_test* instance, struct w2c_wasi__snapshot__prev
init_instance_import(instance, w2c_wasi__snapshot__preview1_instance);
init_tables(instance);
init_memories(instance);
+#if WASM_RT_USE_SEGUE
+ uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();
+ WASM_RT_SEGUE_WRITE_BASE(instance->w2c_memory.data);
+#endif
init_elem_instances(instance);
init_data_instances(instance);
+#if WASM_RT_USE_SEGUE
+ WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);
+#endif
}
void wasm2c_test_free(w2c_test* instance) {
diff --git a/test/wasm2c/minimal.txt b/test/wasm2c/minimal.txt
index 9199efb8..e22e3662 100644
--- a/test/wasm2c/minimal.txt
+++ b/test/wasm2c/minimal.txt
@@ -81,6 +81,39 @@ wasm_rt_func_type_t wasm2c_test_get_func_type(uint32_t param_count, uint32_t res
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -128,10 +161,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -170,20 +216,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
- }
-
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
+ }
+
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
diff --git a/test/wasm2c/tail-calls.txt b/test/wasm2c/tail-calls.txt
index b2ee451b..dd97badf 100644
--- a/test/wasm2c/tail-calls.txt
+++ b/test/wasm2c/tail-calls.txt
@@ -111,6 +111,39 @@ void wasm_tailcall_w2c_test_tailcaller(void **instance_ptr, void *tail_call_stac
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces"
+// for accessing pointers, and supports memcpy on pointers with custom
+// "address namespaces". GCC does not support the memcpy requirement, so
+// this leaves only clang for now.
+// (5) The OS doesn't replace the segment register on context switch which
+// eliminates windows for now
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \
+ __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// POSIX uses FS for TLS, GS is free
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -158,10 +191,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
TRAP(OOB);
#endif
+#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS
+#include <stdio.h>
+#define WASM_RT_CHECK_BASE(mem) \
+ if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \
+ puts("Segment register mismatch\n"); \
+ abort(); \
+ }
+#else
+#define WASM_RT_CHECK_BASE(mem)
+#endif
+
#if WASM_RT_MEMCHECK_GUARD_PAGES
-#define MEMCHECK(mem, a, t)
+#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem);
#else
-#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+#define MEMCHECK(mem, a, t) \
+ WASM_RT_CHECK_BASE(mem); \
+ RANGE_CHECK(mem, a, sizeof(t))
#endif
#ifdef __GNUC__
@@ -200,20 +246,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
- }
-
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
+ }
+
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
diff --git a/wasm2c/README.md b/wasm2c/README.md
index f928ae40..6d8b9999 100644
--- a/wasm2c/README.md
+++ b/wasm2c/README.md
@@ -141,6 +141,45 @@ fac(10) -> 3628800
You can take a look at the all of these files in
[wasm2c/examples/fac](/wasm2c/examples/fac).
+### Enabling extra sanity checks
+
+Wasm2c provides a macro `WASM_RT_SANITY_CHECKS` that if defined enables
+additional sanity checks in the produced Wasm2c code. Note that this may have a
+high performance overhead, and is thus only recommended for debug builds.
+
+### Enabling Segue (a Linux x86_64 target specific optimization)
+
+Wasm2c can use the "Segue" optimization if allowed. The segue optimization uses
+an x86 segment register to store the location of Wasm's linear memory, when
+compiling a Wasm module with clang, running on x86_64 Linux, and the macro
+`WASM_RT_ALLOW_SEGUE` is defined. Segue is not used if
+
+1. The Wasm module uses a more than a single unshared imported or exported
+ memory
+2. The wasm2c code is compiled with GCC. Segue requires intrinsics for
+ (rd|wr)gsbase, "address namespaces" for accessing pointers, and support for
+ memcpy on pointers with custom "address namespaces". GCC does not support the
+ memcpy requirement.
+3. The code is compiled for Windows as Windows doesn't restore the segment
+ register on context switch.
+
+The wasm2c generated code automatically sets the unused segment register (the
+`%gs` register on x86_64 Linux) during the function calls into wasm2c generated
+module, restores it after calls to external modules etc. Any host function
+written in C would continue to work without changes as C code does not modify
+the unused segment register `%gs` (See
+[here](https://www.kernel.org/doc/html/next/x86/x86_64/fsgs.html) for details).
+However, any host functions written in assembly that clobber the free segment
+register must restore the value of this register prior to executing or returning
+control to wasm2c generated code.
+
+You can test the performance of the Segue optimization by running Dhrystone with
+and without Segue:
+
+```bash
+cd wasm2c/benchmarks/segue && make
+```
+
## Looking at the generated header, `fac.h`
The generated header file looks something like this:
diff --git a/wasm2c/benchmarks/dhrystone/.gitignore b/wasm2c/benchmarks/dhrystone/.gitignore
new file mode 100644
index 00000000..7cc06514
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/.gitignore
@@ -0,0 +1,5 @@
+dhrystone_native
+dhrystone
+dhrystone_segue
+dhrystone.h
+dhrystone.c
diff --git a/wasm2c/benchmarks/dhrystone/Makefile b/wasm2c/benchmarks/dhrystone/Makefile
new file mode 100644
index 00000000..b7af5d4b
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/Makefile
@@ -0,0 +1,38 @@
+WABT_ROOT=../../..
+CC=clang
+CFLAGS=-I$(WABT_ROOT)/wasm2c -I $(WABT_ROOT)/third_party/uvwasi/include/ -O3
+CFLAGS_SEGUE=-DWASM_RT_ALLOW_SEGUE=1 -mfsgsbase
+LDFLAGS=-L$(WABT_ROOT)/build/_deps/libuv-build -L$(WABT_ROOT)/build/third_party/uvwasi
+LDLIBS=-luvwasi_a -luv_a -lm
+
+all: benchmark
+
+clean:
+ rm -rf dhrystone dhrystone.wasm dhrystone.c dhrystone.h
+
+dhrystone.wasm: src/dhry_1.c src/dhry_2.c
+ /opt/wasi-sdk/bin/clang -O3 $^ -o $@
+
+dhrystone.c: dhrystone.wasm $(WABT_ROOT)/bin/wasm2c
+ $(WABT_ROOT)/bin/wasm2c $< -o $@ --disable-simd
+
+dhrystone_native: src/dhry_1.c src/dhry_2.c
+ clang -O3 $^ -o $@
+
+dhrystone: main.c dhrystone.c $(WABT_ROOT)/wasm2c/wasm-rt-impl.c $(WABT_ROOT)/wasm2c/wasm-rt-mem-impl.c
+ $(CC) $(LDFLAGS) $(CFLAGS) $^ -o $@ $(LDLIBS)
+
+dhrystone_segue: main.c dhrystone.c $(WABT_ROOT)/wasm2c/wasm-rt-impl.c $(WABT_ROOT)/wasm2c/wasm-rt-mem-impl.c
+ $(CC) $(LDFLAGS) $(CFLAGS) $(CFLAGS_SEGUE) $^ -o $@ $(LDLIBS)
+
+benchmark: dhrystone_native dhrystone dhrystone_segue
+ @echo "Starting Dhrystone benchmark. (Smaller number is better)"
+ @sleep 2
+ @echo "Native"
+ @./dhrystone_native | grep "one run through Dhrystone"
+ @sleep 2
+ @echo "Wasm"
+ @./dhrystone | grep "one run through Dhrystone"
+ @sleep 2
+ @echo "Wasm+Segue"
+ @./dhrystone_segue | grep "one run through Dhrystone"
diff --git a/wasm2c/benchmarks/dhrystone/dhrystone.wasm b/wasm2c/benchmarks/dhrystone/dhrystone.wasm
new file mode 100755
index 00000000..b652757f
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/dhrystone.wasm
Binary files differ
diff --git a/wasm2c/benchmarks/dhrystone/main.c b/wasm2c/benchmarks/dhrystone/main.c
new file mode 100644
index 00000000..5f7350e4
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/main.c
@@ -0,0 +1,265 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "uvwasi.h"
+
+#include "dhrystone.h"
+
+struct w2c_wasi__snapshot__preview1 {
+ wasm_rt_memory_t* w2c_memory;
+ uvwasi_t* uvwasi;
+};
+
+#define WASI_SUCCESS 0
+#define WASI_BADF_ERROR 8
+
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+#if WABT_BIG_ENDIAN
+#define MEM_ADDR(mem, addr, n) &(mem)->data[(mem)->size - (addr) - (n)]
+#else
+#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
+#endif
+
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+
+#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
+
+#define RANGE_CHECK(mem, offset, len) \
+ if (offset + (uint64_t)len > mem->size) \
+ TRAP(OOB);
+
+static inline void memory_fill(wasm_rt_memory_t* mem, u32 d, u32 val, u32 n) {
+ RANGE_CHECK(mem, d, n);
+ memset(MEM_ADDR(mem, d, n), val, n);
+}
+
+#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t))
+
+#ifdef __GNUC__
+#define FORCE_READ_INT(var) __asm__("" ::"r"(var));
+#else
+#define FORCE_READ_INT(var)
+#endif
+
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
+ }
+
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
+ }
+
+DEFINE_LOAD(i8_load, u8, u8, u8, FORCE_READ_INT)
+DEFINE_LOAD(i16_load, u16, u16, u16, FORCE_READ_INT)
+DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
+DEFINE_LOAD(i64_load, u64, u64, u64, FORCE_READ_INT)
+DEFINE_STORE(i8_store, u8, u8)
+DEFINE_STORE(i16_store, u16, u16)
+DEFINE_STORE(i32_store, u32, u32)
+DEFINE_STORE(i64_store, u64, u64)
+
+u32 w2c_wasi__snapshot__preview1_args_get(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u32 c) {
+ return WASI_SUCCESS;
+}
+u32 w2c_wasi__snapshot__preview1_args_sizes_get(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 str_count,
+ u32 buff_size) {
+ i32_store(a->w2c_memory, str_count, 0);
+ i32_store(a->w2c_memory, buff_size, 0);
+ return WASI_SUCCESS;
+}
+u32 w2c_wasi__snapshot__preview1_fd_prestat_get(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u32 c) {
+ return WASI_BADF_ERROR;
+}
+
+u32 w2c_wasi__snapshot__preview1_fd_write(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 fd,
+ u32 iovs_offset,
+ u32 iovs_len,
+ u32 nwritten) {
+ if (iovs_len > 32)
+ return UVWASI_EINVAL;
+ uvwasi_ciovec_t iovs[iovs_len];
+
+ for (uvwasi_size_t i = 0; i < iovs_len; ++i) {
+ u32 wasi_iovs_i = iovs_offset + i * sizeof(uvwasi_size_t[2]);
+ u32 buf_loc = i32_load(a->w2c_memory, wasi_iovs_i);
+ u32 buf_len = i32_load(a->w2c_memory, wasi_iovs_i + sizeof(uvwasi_size_t));
+ iovs[i].buf = MEM_ADDR(a->w2c_memory, buf_loc, buf_len);
+ iovs[i].buf_len = buf_len;
+ }
+
+ uvwasi_size_t num_written;
+ uvwasi_errno_t ret =
+ uvwasi_fd_write(a->uvwasi, fd, iovs, iovs_len, &num_written);
+ i32_store(a->w2c_memory, nwritten, num_written);
+ return ret;
+}
+
+uint32_t w2c_wasi__snapshot__preview1_fd_fdstat_get(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 fd,
+ u32 stat) {
+ uvwasi_fdstat_t uvstat;
+ uvwasi_errno_t ret = uvwasi_fd_fdstat_get(a->uvwasi, fd, &uvstat);
+ if (ret == UVWASI_ESUCCESS) {
+ memory_fill(a->w2c_memory, stat, 0, 24);
+ i8_store(a->w2c_memory, stat, uvstat.fs_filetype);
+ i16_store(a->w2c_memory, stat + 2, uvstat.fs_flags);
+ i64_store(a->w2c_memory, stat + 8, uvstat.fs_rights_base);
+ i64_store(a->w2c_memory, stat + 16, uvstat.fs_rights_inheriting);
+ }
+ return ret;
+}
+
+u32 w2c_wasi__snapshot__preview1_clock_time_get(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 clk_id,
+ u64 precision,
+ u32 result) {
+ uvwasi_timestamp_t t;
+ uvwasi_errno_t ret = uvwasi_clock_time_get(a->uvwasi, clk_id, precision, &t);
+ i64_store(a->w2c_memory, result, t);
+ return ret;
+}
+
+u32 w2c_wasi__snapshot__preview1_clock_res_get(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 clk_id,
+ u32 result) {
+ uvwasi_timestamp_t t;
+ uvwasi_errno_t ret = uvwasi_clock_res_get(a->uvwasi, clk_id, &t);
+ i64_store(a->w2c_memory, result, t);
+ return ret;
+}
+
+u32 w2c_wasi__snapshot__preview1_fd_seek(struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u64 c,
+ u32 d,
+ u32 e) {
+ printf("fd_seek not implemented\n");
+ abort();
+}
+u32 w2c_wasi__snapshot__preview1_fd_read(struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u32 c,
+ u32 d,
+ u32 e) {
+ printf("fd_read not implemented\n");
+ abort();
+}
+u32 w2c_wasi__snapshot__preview1_fd_close(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b) {
+ printf("fd_close not implemented\n");
+ abort();
+}
+u32 w2c_wasi__snapshot__preview1_fd_fdstat_set_flags(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u32 c) {
+ printf("fd_fdstat_set_flags not implemented\n");
+ abort();
+}
+u32 w2c_wasi__snapshot__preview1_fd_prestat_dir_name(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u32 c,
+ u32 d) {
+ printf("fd_prestat_dir_name not implemented\n");
+ abort();
+}
+u32 w2c_wasi__snapshot__preview1_path_open(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b,
+ u32 c,
+ u32 d,
+ u32 e,
+ u32 f,
+ u64 g,
+ u64 h,
+ u32 i,
+ u32 end) {
+ printf("path_open not implemented\n");
+ abort();
+}
+void w2c_wasi__snapshot__preview1_proc_exit(
+ struct w2c_wasi__snapshot__preview1* a,
+ u32 b) {
+ printf("proc_exit not implemented\n");
+ abort();
+}
+
+int main(int argc, char const* argv[]) {
+ w2c_dhrystone dhrystone;
+ struct w2c_wasi__snapshot__preview1 wasi;
+ uvwasi_t local_uvwasi_state;
+ uvwasi_options_t init_options;
+
+ // pass in standard descriptors
+ init_options.in = 0;
+ init_options.out = 1;
+ init_options.err = 2;
+ init_options.fd_table_size = 10;
+
+ // pass in args and environement
+ extern const char** environ;
+ init_options.argc = argc;
+ init_options.argv = argv;
+ init_options.envp = (const char**)environ;
+
+ // no sandboxing enforced, binary has access to everything user does
+ init_options.preopenc = 2;
+ init_options.preopens = calloc(2, sizeof(uvwasi_preopen_t));
+
+ init_options.preopens[0].mapped_path = "/";
+ init_options.preopens[0].real_path = "/";
+ init_options.preopens[1].mapped_path = "./";
+ init_options.preopens[1].real_path = ".";
+
+ init_options.allocator = NULL;
+
+ wasm_rt_init();
+ uvwasi_errno_t ret = uvwasi_init(&local_uvwasi_state, &init_options);
+
+ if (ret != UVWASI_ESUCCESS) {
+ printf("uvwasi_init failed with error %d\n", ret);
+ exit(1);
+ }
+
+ wasi.w2c_memory = &dhrystone.w2c_memory;
+ wasi.uvwasi = &local_uvwasi_state,
+
+ wasm2c_dhrystone_instantiate(&dhrystone, &wasi);
+
+ w2c_dhrystone_0x5Fstart(&dhrystone);
+
+ wasm2c_dhrystone_free(&dhrystone);
+
+ uvwasi_destroy(&local_uvwasi_state);
+ wasm_rt_free();
+
+ return 0;
+}
diff --git a/wasm2c/benchmarks/dhrystone/src/README.md b/wasm2c/benchmarks/dhrystone/src/README.md
new file mode 100644
index 00000000..30d270e9
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/src/README.md
@@ -0,0 +1,23 @@
+The Dhrystone benchmark: a popular benchmark for CPU/compiler performance
+measurement. Description and sources available
+[here](https://www.netlib.org/benchmark/dhry-c).
+
+# Running the benchmark
+Use the command `make benchmark` to run the benchmark.
+
+This compares the performance of three builds of Dhrystone (1) Native (2) Wasm2c
+(3) Wasm2C + Segue optimization. The Segue optimization is enabled only on
+specific CPU+OS+Compiler combinations. If unsupported on your platform, builds
+(2) and (3) above will be identical
+
+# Sample output
+
+```
+Starting Dhrystone benchmark. (Smaller number is better)
+Native
+Microseconds for one run through Dhrystone: 0.011133
+Wasm
+Microseconds for one run through Dhrystone: 0.013670
+Wasm+Segue
+Microseconds for one run through Dhrystone: 0.008666
+``` \ No newline at end of file
diff --git a/wasm2c/benchmarks/dhrystone/src/dhry.h b/wasm2c/benchmarks/dhrystone/src/dhry.h
new file mode 100644
index 00000000..be0f701e
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/src/dhry.h
@@ -0,0 +1,306 @@
+/*
+ **************************************************************************
+ * DHRYSTONE 2.1 BENCHMARK PC VERSION
+ **************************************************************************
+ *
+ * "DHRYSTONE" Benchmark Program
+ * -----------------------------
+ *
+ * Version: C, Version 2.1
+ *
+ * File: dhry.h (part 1 of 3)
+ *
+ * Date: May 25, 1988
+ *
+ * Author: Reinhold P. Weicker
+ * Siemens AG, AUT E 51
+ * Postfach 3220
+ * 8520 Erlangen
+ * Germany (West)
+ * Phone: [+49]-9131-7-20330
+ * (8-17 Central European Time)
+ * Usenet: ..!mcsun!unido!estevax!weicker
+ *
+ * Original Version (in Ada) published in
+ * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
+ * pp. 1013 - 1030, together with the statistics
+ * on which the distribution of statements etc. is based.
+ *
+ * In this C version, the following C library functions are used:
+ * - strcpy, strcmp (inside the measurement loop)
+ * - printf, scanf (outside the measurement loop)
+ * In addition, Berkeley UNIX system calls "times ()" or "time ()"
+ * are used for execution time measurement. For measurements
+ * on other systems, these calls have to be changed.
+ *
+ * Collection of Results:
+ * Reinhold Weicker (address see above) and
+ *
+ * Rick Richardson
+ * PC Research. Inc.
+ * 94 Apple Orchard Drive
+ * Tinton Falls, NJ 07724
+ * Phone: (201) 389-8963 (9-17 EST)
+ * Usenet: ...!uunet!pcrat!rick
+ *
+ * Please send results to Rick Richardson and/or Reinhold Weicker.
+ * Complete information should be given on hardware and software used.
+ * Hardware information includes: Machine type, CPU, type and size
+ * of caches; for microprocessors: clock frequency, memory speed
+ * (number of wait states).
+ * Software information includes: Compiler (and runtime library)
+ * manufacturer and version, compilation switches, OS version.
+ * The Operating System version may give an indication about the
+ * compiler; Dhrystone itself performs no OS calls in the measurement
+ * loop.
+ *
+ * The complete output generated by the program should be mailed
+ * such that at least some checks for correctness can be made.
+ *
+ **************************************************************************
+ *
+ * This version has changes made by Roy Longbottom to conform to a common
+ * format for a series of standard benchmarks for PCs:
+ *
+ * Running time greater than 5 seconds due to inaccuracy of the PC clock.
+ *
+ * Automatic adjustment of run time, no manually inserted parameters.
+ *
+ * Initial display of calibration times to confirm linearity.
+ *
+ * Display of results within one screen (or at a slow speed as the test
+ * progresses) so that it can be seen to have run successfully.
+ *
+ * Facilities to type in details of system used etc.
+ *
+ * All results and details appended to a results file.
+ *
+ *
+ * Roy Longbottom
+ * 101323.2241@compuserve.com
+ *
+ **************************************************************************
+ *
+ * For details of history, changes, other defines, benchmark construction
+ * statistics see official versions from ftp.nosc.mil/pub/aburto where
+ * the latest table of results (dhry.tbl) are available. See also
+ * netlib@ornl.gov
+ *
+ **************************************************************************
+ *
+ * Defines: The following "Defines" are possible:
+ * -DREG=register (default: Not defined)
+ * As an approximation to what an average C programmer
+ * might do, the "register" storage class is applied
+ * (if enabled by -DREG=register)
+ * - for local variables, if they are used (dynamically)
+ * five or more times
+ * - for parameters if they are used (dynamically)
+ * six or more times
+ * Note that an optimal "register" strategy is
+ * compiler-dependent, and that "register" declarations
+ * do not necessarily lead to faster execution.
+ * -DNOSTRUCTASSIGN (default: Not defined)
+ * Define if the C compiler does not support
+ * assignment of structures.
+ * -DNOENUMS (default: Not defined)
+ * Define if the C compiler does not support
+ * enumeration types.
+ ***************************************************************************
+ *
+ * Compilation model and measurement (IMPORTANT):
+ *
+ * This C version of Dhrystone consists of three files:
+ * - dhry.h (this file, containing global definitions and comments)
+ * - dhry_1.c (containing the code corresponding to Ada package Pack_1)
+ * - dhry_2.c (containing the code corresponding to Ada package Pack_2)
+ *
+ * The following "ground rules" apply for measurements:
+ * - Separate compilation
+ * - No procedure merging
+ * - Otherwise, compiler optimizations are allowed but should be indicated
+ * - Default results are those without register declarations
+ * See the companion paper "Rationale for Dhrystone Version 2" for a more
+ * detailed discussion of these ground rules.
+ *
+ * For 16-Bit processors (e.g. 80186, 80286), times for all compilation
+ * models ("small", "medium", "large" etc.) should be given if possible,
+ * together with a definition of these models for the compiler system used.
+ *
+ **************************************************************************
+ * Examples of Pentium Results
+ *
+ * Dhrystone Benchmark Version 2.1 (Language: C)
+ *
+ * Month run 4/1996
+ * PC model Escom
+ * CPU Pentium
+ * Clock MHz 100
+ * Cache 256K
+ * Options Neptune chipset
+ * OS/DOS Windows 95
+ * Compiler Watcom C/ C++ 10.5 Win386
+ * OptLevel -otexan -zp8 -fp5 -5r
+ * Run by Roy Longbottom
+ * From UK
+ * Mail 101323.2241@compuserve.com
+ *
+ * Final values (* implementation-dependent):
+ *
+ * Int_Glob: O.K. 5
+ * Bool_Glob: O.K. 1
+ * Ch_1_Glob: O.K. A
+ * Ch_2_Glob: O.K. B
+ * Arr_1_Glob[8]: O.K. 7
+ * Arr_2_Glob8/7: O.K. 1600010
+ * Ptr_Glob->
+ * Ptr_Comp: * 98008
+ * Discr: O.K. 0
+ * Enum_Comp: O.K. 2
+ * Int_Comp: O.K. 17
+ * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
+ * Next_Ptr_Glob->
+ * Ptr_Comp: * 98008 same as above
+ * Discr: O.K. 0
+ * Enum_Comp: O.K. 1
+ * Int_Comp: O.K. 18
+ * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
+ * Int_1_Loc: O.K. 5
+ * Int_2_Loc: O.K. 13
+ * Int_3_Loc: O.K. 7
+ * Enum_Loc: O.K. 1
+ * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
+ * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
+ *
+ * Register option Selected.
+ *
+ * Microseconds 1 loop: 4.53
+ * Dhrystones / second: 220690
+ * VAX MIPS rating: 125.61
+ *
+ *
+ * Dhrystone Benchmark Version 2.1 (Language: C)
+ *
+ * Month run 4/1996
+ * PC model Escom
+ * CPU Pentium
+ * Clock MHz 100
+ * Cache 256K
+ * Options Neptune chipset
+ * OS/DOS Windows 95
+ * Compiler Watcom C/ C++ 10.5 Win386
+ * OptLevel No optimisation
+ * Run by Roy Longbottom
+ * From UK
+ * Mail 101323.2241@compuserve.com
+ *
+ * Final values (* implementation-dependent):
+ *
+ * Int_Glob: O.K. 5
+ * Bool_Glob: O.K. 1
+ * Ch_1_Glob: O.K. A
+ * Ch_2_Glob: O.K. B
+ * Arr_1_Glob[8]: O.K. 7
+ * Arr_2_Glob8/7: O.K. 320010
+ * Ptr_Glob->
+ * Ptr_Comp: * 98004
+ * Discr: O.K. 0
+ * Enum_Comp: O.K. 2
+ * Int_Comp: O.K. 17
+ * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
+ * Next_Ptr_Glob->
+ * Ptr_Comp: * 98004 same as above
+ * Discr: O.K. 0
+ * Enum_Comp: O.K. 1
+ * Int_Comp: O.K. 18
+ * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
+ * Int_1_Loc: O.K. 5
+ * Int_2_Loc: O.K. 13
+ * Int_3_Loc: O.K. 7
+ * Enum_Loc: O.K. 1
+ * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
+ * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
+ *
+ * Register option Not selected.
+ *
+ * Microseconds 1 loop: 20.06
+ * Dhrystones / second: 49844
+ * VAX MIPS rating: 28.37
+ *
+ **************************************************************************
+ */
+
+/* Compiler and system dependent definitions: */
+
+#ifndef TIME
+#define TIMES
+#endif
+/* Use times(2) time function unless */
+/* explicitly defined otherwise */
+
+#ifdef TIMES
+/* #include <sys/types.h>
+ #include <sys/times.h> */
+/* for "times" */
+#endif
+
+#define Mic_secs_Per_Second 1000000.0
+/* Berkeley UNIX C returns process times in seconds/HZ */
+
+#ifdef NOSTRUCTASSIGN
+#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
+#else
+#define structassign(d, s) d = s
+#endif
+
+#ifdef NOENUM
+#define Ident_1 0
+#define Ident_2 1
+#define Ident_3 2
+#define Ident_4 3
+#define Ident_5 4
+typedef int Enumeration;
+#else
+typedef enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } Enumeration;
+#endif
+/* for boolean and enumeration types in Ada, Pascal */
+
+/* General definitions: */
+
+#include <stdio.h>
+#include <string.h>
+
+/* for strcpy, strcmp */
+
+#define Null 0
+/* Value of a Null pointer */
+#define true 1
+#define false 0
+
+typedef int One_Thirty;
+typedef int One_Fifty;
+typedef char Capital_Letter;
+typedef int Boolean;
+typedef char Str_30[31];
+typedef int Arr_1_Dim[50];
+typedef int Arr_2_Dim[50][50];
+
+typedef struct record {
+ struct record *Ptr_Comp;
+ Enumeration Discr;
+ union {
+ struct {
+ Enumeration Enum_Comp;
+ int Int_Comp;
+ char Str_Comp[31];
+ } var_1;
+ struct {
+ Enumeration E_Comp_2;
+ char Str_2_Comp[31];
+ } var_2;
+ struct {
+ char Ch_1_Comp;
+ char Ch_2_Comp;
+ } var_3;
+ } variant;
+} Rec_Type, *Rec_Pointer;
diff --git a/wasm2c/benchmarks/dhrystone/src/dhry_1.c b/wasm2c/benchmarks/dhrystone/src/dhry_1.c
new file mode 100644
index 00000000..fb23dd3a
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/src/dhry_1.c
@@ -0,0 +1,485 @@
+/*
+ *************************************************************************
+ *
+ * "DHRYSTONE" Benchmark Program
+ * -----------------------------
+ *
+ * Version: C, Version 2.1
+ *
+ * File: dhry_1.c (part 2 of 3)
+ *
+ * Date: May 25, 1988
+ *
+ * Author: Reinhold P. Weicker
+ *
+ *************************************************************************
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "dhry.h"
+
+/* Global Variables: */
+
+Rec_Pointer Ptr_Glob, Next_Ptr_Glob;
+int Int_Glob;
+Boolean Bool_Glob;
+char Ch_1_Glob, Ch_2_Glob;
+int Arr_1_Glob[50];
+int Arr_2_Glob[50][50];
+
+Enumeration
+Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val);
+/*
+forward declaration necessary since Enumeration may not simply be int
+*/
+
+#ifndef ROPT
+#define REG
+/* REG becomes defined as empty */
+/* i.e. no register variables */
+#else
+#define REG register
+#endif
+
+void
+Proc_1(REG Rec_Pointer Ptr_Val_Par);
+void
+Proc_2(One_Fifty *Int_Par_Ref);
+void
+Proc_3(Rec_Pointer *Ptr_Ref_Par);
+void
+Proc_4();
+void
+Proc_5();
+void
+Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
+void
+Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
+ One_Fifty *Int_Par_Ref);
+void
+Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val,
+ int Int_2_Par_Val);
+
+Boolean
+Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
+
+/* variables for time measurement: */
+
+#define Too_Small_Time 2
+/* Measurements should last at least 2 seconds */
+
+#define BILLION 1000000000L
+#define MILLION 1000000
+struct timespec Begin_Time, End_Time;
+double User_Time;
+
+double Microseconds, Dhrystones_Per_Second, Vax_Mips;
+
+/* end of variables for time measurement */
+
+int
+main(int argc, char *argv[])
+/*****/
+
+/* main program, corresponds to procedures */
+/* Main and Proc_0 in the Ada version */
+{
+ One_Fifty Int_1_Loc;
+ REG One_Fifty Int_2_Loc;
+ One_Fifty Int_3_Loc;
+ REG char Ch_Index;
+ Enumeration Enum_Loc;
+ Str_30 Str_1_Loc;
+ Str_30 Str_2_Loc;
+ REG int Run_Index;
+ REG int Number_Of_Runs;
+ int endit, count = 10;
+ char general[9][80] = { " " };
+
+ /***********************************************************************
+ * Change for compiler and optimisation used *
+ ***********************************************************************/
+
+ Next_Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type));
+ Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type));
+
+ Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
+ Ptr_Glob->Discr = Ident_1;
+ Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
+ Ptr_Glob->variant.var_1.Int_Comp = 40;
+ strcpy(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING");
+ strcpy(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
+
+ Arr_2_Glob[8][7] = 10;
+ /* Was missing in published program. Without this statement, */
+ /* Arr_2_Glob [8][7] would have an undefined value. */
+ /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
+ /* overflow may occur for this array element. */
+
+ printf("\n");
+ printf("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
+ printf("\n");
+
+ Number_Of_Runs = 5000;
+
+ do {
+
+ Number_Of_Runs = Number_Of_Runs * 2;
+ count = count - 1;
+ Arr_2_Glob[8][7] = 10;
+
+ /***************/
+ /* Start timer */
+ /***************/
+
+ clock_gettime(CLOCK_MONOTONIC, &Begin_Time);
+
+ for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) {
+
+ Proc_5();
+ Proc_4();
+ /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
+ Int_1_Loc = 2;
+ Int_2_Loc = 3;
+ strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
+ Enum_Loc = Ident_2;
+ Bool_Glob = !Func_2(Str_1_Loc, Str_2_Loc);
+ /* Bool_Glob == 1 */
+ while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
+ {
+ Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
+ /* Int_3_Loc == 7 */
+ Proc_7(Int_1_Loc, Int_2_Loc, &Int_3_Loc);
+ /* Int_3_Loc == 7 */
+ Int_1_Loc += 1;
+ } /* while */
+ /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
+ Proc_8(Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
+ /* Int_Glob == 5 */
+ Proc_1(Ptr_Glob);
+ for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
+ /* loop body executed twice */
+ {
+ if (Enum_Loc == Func_1(Ch_Index, 'C'))
+ /* then, not executed */
+ {
+ Proc_6(Ident_1, &Enum_Loc);
+ strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
+ Int_2_Loc = Run_Index;
+ Int_Glob = Run_Index;
+ }
+ }
+ /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
+ Int_2_Loc = Int_2_Loc * Int_1_Loc;
+ Int_1_Loc = Int_2_Loc / Int_3_Loc;
+ Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
+ /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
+ Proc_2(&Int_1_Loc);
+ /* Int_1_Loc == 5 */
+
+ } /* loop "for Run_Index" */
+
+ /**************/
+ /* Stop timer */
+ /**************/
+
+ clock_gettime(CLOCK_MONOTONIC, &End_Time);
+
+ User_Time = (End_Time.tv_sec - Begin_Time.tv_sec) * MILLION
+ + (End_Time.tv_nsec - Begin_Time.tv_nsec) / 1000;
+ User_Time = User_Time / MILLION; /* convert to seconds */
+
+ printf("%ld runs %lf seconds \n", (long)Number_Of_Runs, User_Time);
+ if (User_Time > 5.0) {
+ count = 0;
+ }
+ else {
+ if (User_Time < 0.1) {
+ Number_Of_Runs = Number_Of_Runs * 5;
+ }
+ }
+ } /* calibrate/run do while */
+ while (count > 0);
+
+ printf("\n");
+ printf("Final values (* implementation-dependent):\n");
+ printf("\n");
+ printf("Int_Glob: ");
+ if (Int_Glob == 5)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Int_Glob);
+
+ printf("Bool_Glob: ");
+ if (Bool_Glob == 1)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d\n", Bool_Glob);
+
+ printf("Ch_1_Glob: ");
+ if (Ch_1_Glob == 'A')
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%c ", Ch_1_Glob);
+
+ printf("Ch_2_Glob: ");
+ if (Ch_2_Glob == 'B')
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%c\n", Ch_2_Glob);
+
+ printf("Arr_1_Glob[8]: ");
+ if (Arr_1_Glob[8] == 7)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Arr_1_Glob[8]);
+
+ printf("Arr_2_Glob8/7: ");
+ if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%10d\n", Arr_2_Glob[8][7]);
+
+ printf("Ptr_Glob-> ");
+ printf(" Ptr_Comp: * %p\n", Ptr_Glob->Ptr_Comp);
+
+ printf(" Discr: ");
+ if (Ptr_Glob->Discr == 0)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Ptr_Glob->Discr);
+
+ printf("Enum_Comp: ");
+ if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
+
+ printf(" Int_Comp: ");
+ if (Ptr_Glob->variant.var_1.Int_Comp == 17)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Ptr_Glob->variant.var_1.Int_Comp);
+
+ printf("Str_Comp: ");
+ if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
+ "DHRYSTONE PROGRAM, SOME STRING")
+ == 0)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
+
+ printf("Next_Ptr_Glob-> ");
+ printf(" Ptr_Comp: * %p", Next_Ptr_Glob->Ptr_Comp);
+ printf(" same as above\n");
+
+ printf(" Discr: ");
+ if (Next_Ptr_Glob->Discr == 0)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Next_Ptr_Glob->Discr);
+
+ printf("Enum_Comp: ");
+ if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
+
+ printf(" Int_Comp: ");
+ if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
+
+ printf("Str_Comp: ");
+ if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
+ "DHRYSTONE PROGRAM, SOME STRING")
+ == 0)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
+
+ printf("Int_1_Loc: ");
+ if (Int_1_Loc == 5)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Int_1_Loc);
+
+ printf("Int_2_Loc: ");
+ if (Int_2_Loc == 13)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d\n", Int_2_Loc);
+
+ printf("Int_3_Loc: ");
+ if (Int_3_Loc == 7)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d ", Int_3_Loc);
+
+ printf("Enum_Loc: ");
+ if (Enum_Loc == 1)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%d\n", Enum_Loc);
+
+ printf("Str_1_Loc: ");
+ if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%s\n", Str_1_Loc);
+
+ printf("Str_2_Loc: ");
+ if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
+ printf("O.K. ");
+ else
+ printf("WRONG ");
+ printf("%s\n", Str_2_Loc);
+
+ printf("\n");
+
+ if (User_Time < Too_Small_Time) {
+ printf("Measured time too small to obtain meaningful results\n");
+ printf("Please increase number of runs\n");
+ printf("\n");
+ }
+ else {
+ Microseconds = User_Time * Mic_secs_Per_Second / (double)Number_Of_Runs;
+ Dhrystones_Per_Second = (double)Number_Of_Runs / User_Time;
+ Vax_Mips = Dhrystones_Per_Second / 1757.0;
+
+ printf("Microseconds for one run through Dhrystone: ");
+ printf("%lf \n", Microseconds);
+ printf("Dhrystones per Second: ");
+ printf("%lf \n", Dhrystones_Per_Second);
+ printf("VAX MIPS rating = ");
+ printf("%lf \n", Vax_Mips);
+ printf("\n");
+ }
+
+ free(Next_Ptr_Glob);
+ free(Ptr_Glob);
+ return 0;
+}
+
+void
+Proc_1(REG Rec_Pointer Ptr_Val_Par)
+/******************/
+
+/* executed once */
+{
+ REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
+ /* == Ptr_Glob_Next */
+ /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
+ /* corresponds to "rename" in Ada, "with" in Pascal */
+
+ structassign(*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
+ Ptr_Val_Par->variant.var_1.Int_Comp = 5;
+ Next_Record->variant.var_1.Int_Comp = Ptr_Val_Par->variant.var_1.Int_Comp;
+ Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
+ Proc_3(&Next_Record->Ptr_Comp);
+ /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
+ == Ptr_Glob->Ptr_Comp */
+ if (Next_Record->Discr == Ident_1)
+ /* then, executed */
+ {
+ Next_Record->variant.var_1.Int_Comp = 6;
+ Proc_6(Ptr_Val_Par->variant.var_1.Enum_Comp,
+ &Next_Record->variant.var_1.Enum_Comp);
+ Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
+ Proc_7(Next_Record->variant.var_1.Int_Comp, 10,
+ &Next_Record->variant.var_1.Int_Comp);
+ }
+ else { /* not executed */
+ structassign(*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
+ }
+} /* Proc_1 */
+
+void
+Proc_2(One_Fifty *Int_Par_Ref)
+/******************/
+/* executed once */
+/* *Int_Par_Ref == 1, becomes 4 */
+
+{
+ One_Fifty Int_Loc;
+ Enumeration Enum_Loc;
+
+ Int_Loc = *Int_Par_Ref + 10;
+ do /* executed once */
+ if (Ch_1_Glob == 'A')
+ /* then, executed */
+ {
+ Int_Loc -= 1;
+ *Int_Par_Ref = Int_Loc - Int_Glob;
+ Enum_Loc = Ident_1;
+ } /* if */
+ while (Enum_Loc != Ident_1); /* true */
+} /* Proc_2 */
+
+void
+Proc_3(Rec_Pointer *Ptr_Ref_Par)
+/******************/
+/* executed once */
+/* Ptr_Ref_Par becomes Ptr_Glob */
+
+{
+ if (Ptr_Glob != Null)
+ /* then, executed */
+ *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
+ Proc_7(10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
+} /* Proc_3 */
+
+void
+Proc_4() /* without parameters */
+/*******/
+/* executed once */
+{
+ Boolean Bool_Loc;
+
+ Bool_Loc = Ch_1_Glob == 'A';
+ Bool_Glob = Bool_Loc | Bool_Glob;
+ Ch_2_Glob = 'B';
+} /* Proc_4 */
+
+void
+Proc_5() /* without parameters */
+/*******/
+/* executed once */
+{
+ Ch_1_Glob = 'A';
+ Bool_Glob = false;
+} /* Proc_5 */
+
+/* Procedure for the assignment of structures, */
+/* if the C compiler doesn't support this feature */
+#ifdef NOSTRUCTASSIGN
+memcpy(d, s, l) register char *d;
+register char *s;
+register int l;
+{
+ while (l--)
+ *d++ = *s++;
+}
+#endif
diff --git a/wasm2c/benchmarks/dhrystone/src/dhry_2.c b/wasm2c/benchmarks/dhrystone/src/dhry_2.c
new file mode 100644
index 00000000..276785cb
--- /dev/null
+++ b/wasm2c/benchmarks/dhrystone/src/dhry_2.c
@@ -0,0 +1,187 @@
+/*
+ *************************************************************************
+ *
+ * "DHRYSTONE" Benchmark Program
+ * -----------------------------
+ *
+ * Version: C, Version 2.1
+ *
+ * File: dhry_2.c (part 3 of 3)
+ *
+ * Date: May 25, 1988
+ *
+ * Author: Reinhold P. Weicker
+ *
+ *************************************************************************
+ */
+
+#include "dhry.h"
+
+#ifndef REG
+#define REG
+/* REG becomes defined as empty */
+/* i.e. no register variables */
+#else
+#define REG register
+#endif
+
+extern int Int_Glob;
+extern char Ch_1_Glob;
+
+Boolean
+Func_3(Enumeration Enum_Par_Val);
+
+void
+Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
+/*********************************/
+/* executed once */
+/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
+
+{
+ *Enum_Ref_Par = Enum_Val_Par;
+ if (!Func_3(Enum_Val_Par))
+ /* then, not executed */
+ *Enum_Ref_Par = Ident_4;
+ switch (Enum_Val_Par) {
+ case Ident_1:
+ *Enum_Ref_Par = Ident_1;
+ break;
+ case Ident_2:
+ if (Int_Glob > 100)
+ /* then */
+ *Enum_Ref_Par = Ident_1;
+ else
+ *Enum_Ref_Par = Ident_4;
+ break;
+ case Ident_3: /* executed */
+ *Enum_Ref_Par = Ident_2;
+ break;
+ case Ident_4:
+ break;
+ case Ident_5:
+ *Enum_Ref_Par = Ident_3;
+ break;
+ } /* switch */
+} /* Proc_6 */
+
+void
+Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref)
+/**********************************************/
+/* executed three times */
+/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
+/* Int_Par_Ref becomes 7 */
+/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
+/* Int_Par_Ref becomes 17 */
+/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
+/* Int_Par_Ref becomes 18 */
+
+{
+ One_Fifty Int_Loc;
+
+ Int_Loc = Int_1_Par_Val + 2;
+ *Int_Par_Ref = Int_2_Par_Val + Int_Loc;
+} /* Proc_7 */
+
+void
+Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val,
+ int Int_2_Par_Val)
+/*********************************************************************/
+/* executed once */
+/* Int_Par_Val_1 == 3 */
+/* Int_Par_Val_2 == 7 */
+
+{
+ REG One_Fifty Int_Index;
+ REG One_Fifty Int_Loc;
+
+ Int_Loc = Int_1_Par_Val + 5;
+ Arr_1_Par_Ref[Int_Loc] = Int_2_Par_Val;
+ Arr_1_Par_Ref[Int_Loc + 1] = Arr_1_Par_Ref[Int_Loc];
+ Arr_1_Par_Ref[Int_Loc + 30] = Int_Loc;
+ for (Int_Index = Int_Loc; Int_Index <= Int_Loc + 1; ++Int_Index)
+ Arr_2_Par_Ref[Int_Loc][Int_Index] = Int_Loc;
+ Arr_2_Par_Ref[Int_Loc][Int_Loc - 1] += 1;
+ Arr_2_Par_Ref[Int_Loc + 20][Int_Loc] = Arr_1_Par_Ref[Int_Loc];
+ Int_Glob = 5;
+} /* Proc_8 */
+
+Enumeration
+Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val)
+/*************************************************/
+/* executed three times */
+/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
+/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
+/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
+
+{
+ Capital_Letter Ch_1_Loc;
+ Capital_Letter Ch_2_Loc;
+
+ Ch_1_Loc = Ch_1_Par_Val;
+ Ch_2_Loc = Ch_1_Loc;
+ if (Ch_2_Loc != Ch_2_Par_Val)
+ /* then, executed */
+ return (Ident_1);
+ else /* not executed */
+ {
+ Ch_1_Glob = Ch_1_Loc;
+ return (Ident_2);
+ }
+} /* Func_1 */
+
+Boolean
+Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
+/*************************************************/
+/* executed once */
+/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
+/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
+
+{
+ REG One_Thirty Int_Loc;
+ Capital_Letter Ch_Loc;
+
+ Int_Loc = 2;
+ while (Int_Loc <= 2) /* loop body executed once */
+ if (Func_1(Str_1_Par_Ref[Int_Loc], Str_2_Par_Ref[Int_Loc + 1])
+ == Ident_1)
+ /* then, executed */
+ {
+ Ch_Loc = 'A';
+ Int_Loc += 1;
+ } /* if, while */
+ if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
+ /* then, not executed */
+ Int_Loc = 7;
+ if (Ch_Loc == 'R')
+ /* then, not executed */
+ return (true);
+ else /* executed */
+ {
+ if (strcmp(Str_1_Par_Ref, Str_2_Par_Ref) > 0)
+ /* then, not executed */
+ {
+ Int_Loc += 7;
+ Int_Glob = Int_Loc;
+ return (true);
+ }
+ else /* executed */
+ return (false);
+ } /* if Ch_Loc */
+} /* Func_2 */
+
+Boolean
+Func_3(Enumeration Enum_Par_Val)
+/***************************/
+/* executed once */
+/* Enum_Par_Val == Ident_3 */
+
+{
+ Enumeration Enum_Loc;
+
+ Enum_Loc = Enum_Par_Val;
+ if (Enum_Loc == Ident_3)
+ /* then, executed */
+ return (true);
+ else /* not executed */
+ return (false);
+} /* Func_3 */
diff --git a/wasm2c/examples/fac/fac.c b/wasm2c/examples/fac/fac.c
index 1365def8..d9f821df 100644
--- a/wasm2c/examples/fac/fac.c
+++ b/wasm2c/examples/fac/fac.c
@@ -39,6 +39,48 @@
#define MEM_ADDR(mem, addr, n) &(mem)->data[addr]
#endif
+#ifndef WASM_RT_USE_SEGUE
+// Memory functions can use the segue optimization if allowed. The segue
+// optimization uses x86 segments to point to a linear memory. We use this
+// optimization when:
+//
+// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE
+// (2) on x86_64 without WABT_BIG_ENDIAN enabled
+// (3) the Wasm module uses a single unshared imported or exported memory
+// (4) the compiler supports: intrinsics for (rd|wr)(fs|gs)base, "address
+// namespaces" for accessing pointers, and supports memcpy on pointers with
+// custom "address namespaces". GCC does not support the memcpy requirement,
+// so this leaves only clang for now.
+#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \
+ (defined(__x86_64__) || defined(_M_X64)) && \
+ WASM_RT_MODULE_IS_SINGLE_UNSHARED_MEMORY && __clang__ && \
+ __has_builtin(__builtin_ia32_wrgsbase64)
+#define WASM_RT_USE_SEGUE 1
+#else
+#define WASM_RT_USE_SEGUE 0
+#endif
+#endif
+
+#if WASM_RT_USE_SEGUE
+// Different segments are free on different platforms
+// Windows uses GS for TLS, FS is free
+// Linux uses FS for TLS, GS is free
+#if defined(__WIN32)
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdfsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrfsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_fs*)(uintptr_t)addr)
+#else
+// POSIX style OS
+#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64()
+#define WASM_RT_SEGUE_WRITE_BASE(base) \
+ __builtin_ia32_wrgsbase64((uintptr_t)base)
+#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr)
+#endif
+#else
+#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n)
+#endif
+
#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0)
#if WASM_RT_STACK_DEPTH_COUNT
@@ -128,20 +170,22 @@ static inline void load_data(void* dest, const void* src, size_t n) {
load_data(MEM_ADDR(&m, o, s), i, s); \
} while (0)
-#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
- static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
- MEMCHECK(mem, addr, t1); \
- t1 result; \
- wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \
- force_read(result); \
- return (t3)(t2)result; \
+#define DEFINE_LOAD(name, t1, t2, t3, force_read) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \
+ sizeof(t1)); \
+ force_read(result); \
+ return (t3)(t2)result; \
}
-#define DEFINE_STORE(name, t1, t2) \
- static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
- MEMCHECK(mem, addr, t1); \
- t1 wrapped = (t1)value; \
- wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \
+#define DEFINE_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ t1 wrapped = (t1)value; \
+ wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \
+ sizeof(t1)); \
}
DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT)
@@ -656,7 +700,8 @@ FUNC_TYPE_T(w2c_fac_t0) = "\x07\x80\x96\x7a\x42\xf7\x3e\xe6\x70\x5c\x2f\xac\x83\
/* export: 'fac' */
u32 w2c_fac_fac(w2c_fac* instance, u32 var_p0) {
- return w2c_fac_fac_0(instance, var_p0);
+ u32 ret = w2c_fac_fac_0(instance, var_p0);
+ return ret;
}
void wasm2c_fac_instantiate(w2c_fac* instance) {
diff --git a/wasm2c/wasm-rt.h b/wasm2c/wasm-rt.h
index f06748eb..5c07ff44 100644
--- a/wasm2c/wasm-rt.h
+++ b/wasm2c/wasm-rt.h
@@ -88,6 +88,14 @@ extern "C" {
#endif
/**
+ * If enabled, perform additional sanity checks in the generated wasm2c code and
+ * wasm2c runtime. This is useful to enable on debug builds.
+ */
+#ifndef WASM_RT_SANITY_CHECKS
+#define WASM_RT_SANITY_CHECKS 0
+#endif
+
+/**
* Backward compatibility: Convert the previously exposed
* WASM_RT_MEMCHECK_SIGNAL_HANDLER macro to the ALLOCATION and CHECK macros that
* are now used.
@@ -197,6 +205,18 @@ extern "C" {
#endif
/**
+ * This macro, if defined to 1 (i.e., allows the "segue" optimization), allows
+ * Wasm2c to use segment registers to speedup access to the linear heap. Note
+ * that even if allowed in this way, the segment registers would only be used if
+ * Wasm2c output is compiled for a suitable architecture and OS and the produces
+ * C file is compiled by supported compilers. The extact restrictions are listed
+ * in detail in src/template/wasm2c.declarations.c
+ */
+#ifndef WASM_RT_ALLOW_SEGUE
+#define WASM_RT_ALLOW_SEGUE 0
+#endif
+
+/**
* This macro, if defined, allows the embedder to disable all stack exhaustion
* checks. This a non conformant configuration, i.e., this does not respect
* Wasm's specification, and may compromise security. Use with caution.