diff options
author | Shravan Narayan <shravanrn@gmail.com> | 2024-06-26 11:30:44 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-26 09:30:44 -0700 |
commit | 0e871afa4aaac9fe0b1f00cb42a59be666657a06 (patch) | |
tree | 22c449953033d0ea98200d9117c11419054a762e | |
parent | f820d171654de2dcb8cbf7078b4c98336c8e3c69 (diff) | |
download | wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.gz wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.tar.bz2 wabt-0e871afa4aaac9fe0b1f00cb42a59be666657a06.zip |
wasm2c: Segue optimization for modules with a single unshared memory (#2395)
-rw-r--r-- | .github/workflows/build.yml | 3 | ||||
-rw-r--r-- | src/c-writer.cc | 64 | ||||
-rw-r--r-- | src/prebuilt/wasm2c_source_declarations.cc | 121 | ||||
-rw-r--r-- | src/template/wasm2c.declarations.c | 80 | ||||
-rw-r--r-- | test/wasm2c/add.txt | 79 | ||||
-rw-r--r-- | test/wasm2c/check-imports.txt | 84 | ||||
-rw-r--r-- | test/wasm2c/export-names.txt | 119 | ||||
-rw-r--r-- | test/wasm2c/hello.txt | 91 | ||||
-rw-r--r-- | test/wasm2c/minimal.txt | 80 | ||||
-rw-r--r-- | test/wasm2c/tail-calls.txt | 80 | ||||
-rw-r--r-- | wasm2c/README.md | 39 | ||||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/.gitignore | 5 | ||||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/Makefile | 38 | ||||
-rwxr-xr-x | wasm2c/benchmarks/dhrystone/dhrystone.wasm | bin | 0 -> 33999 bytes | |||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/main.c | 265 | ||||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/src/README.md | 23 | ||||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/src/dhry.h | 306 | ||||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/src/dhry_1.c | 485 | ||||
-rw-r--r-- | wasm2c/benchmarks/dhrystone/src/dhry_2.c | 187 | ||||
-rw-r--r-- | wasm2c/examples/fac/fac.c | 71 | ||||
-rw-r--r-- | wasm2c/wasm-rt.h | 20 |
21 files changed, 2105 insertions, 135 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 21604429..5932a2c4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -177,7 +177,8 @@ jobs: runs-on: ubuntu-latest env: USE_NINJA: "1" - WASM2C_CFLAGS: "-DWASM_RT_USE_MMAP=1 -DWASM_RT_SKIP_SIGNAL_RECOVERY=1 -DWASM_RT_NONCONFORMING_UNCHECKED_STACK_EXHAUSTION=1 -DWASM2C_TEST_EMBEDDER_SIGNAL_HANDLING" + WASM2C_CC: "clang" + WASM2C_CFLAGS: "-DWASM_RT_USE_MMAP=1 -DWASM_RT_SKIP_SIGNAL_RECOVERY=1 -DWASM_RT_NONCONFORMING_UNCHECKED_STACK_EXHAUSTION=1 -DWASM2C_TEST_EMBEDDER_SIGNAL_HANDLING -DWASM_RT_ALLOW_SEGUE=1 -mfsgsbase -DWASM_RT_SANITY_CHECKS=1 -Wno-pass-failed" steps: - uses: actions/setup-python@v1 with: diff --git a/src/c-writer.cc b/src/c-writer.cc index cd0ee1fd..1a091957 100644 --- a/src/c-writer.cc +++ b/src/c-writer.cc @@ -308,6 +308,7 @@ class CWriter { void Indent(int size = INDENT_SIZE); void Dedent(int size = INDENT_SIZE); + void NonIndented(std::function<void()> func); void WriteIndent(); void WriteData(const char* src, size_t size); void Writef(const char* format, ...); @@ -402,6 +403,9 @@ class CWriter { void WriteElemInitializerDecls(); void WriteElemInitializers(); void WriteElemTableInit(bool, const ElemSegment*, const Table*); + bool IsSingleUnsharedMemory(); + void InstallSegueBase(Memory* memory, bool save_old_value); + void RestoreSegueBase(); void WriteExports(CWriterPhase); void WriteTailCallExports(CWriterPhase); void WriteInitDecl(); @@ -1021,6 +1025,13 @@ void CWriter::Dedent(int size) { assert(indent_ >= 0); } +void CWriter::NonIndented(std::function<void()> func) { + int copy = indent_; + indent_ = 0; + func(); + indent_ = copy; +} + void CWriter::WriteIndent() { static char s_indent[] = " " @@ -1479,6 +1490,11 @@ std::string CWriter::GenerateHeaderGuard() const { void CWriter::WriteSourceTop() { Write(s_source_includes); Write(Newline(), "#include \"", header_name_, "\"", Newline()); + + if (IsSingleUnsharedMemory()) { + Write("#define IS_SINGLE_UNSHARED_MEMORY 1", Newline()); + } + Write(s_source_declarations, Newline()); if (module_->features_used.simd) { @@ -2425,6 +2441,28 @@ void CWriter::WriteElemTableInit(bool active_initialization, Write(");", Newline()); } +bool CWriter::IsSingleUnsharedMemory() { + return module_->memories.size() == 1 && + !module_->memories[0]->page_limits.is_shared; +} + +void CWriter::InstallSegueBase(Memory* memory, bool save_old_value) { + NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); }); + if (save_old_value) { + Write("uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE();", Newline()); + } + auto primary_memory = + ExternalInstanceRef(ModuleFieldType::Memory, memory->name); + Write("WASM_RT_SEGUE_WRITE_BASE(", primary_memory, ".data);", Newline()); + NonIndented([&] { Write("#endif", Newline()); }); +} + +void CWriter::RestoreSegueBase() { + NonIndented([&] { Write("#if WASM_RT_USE_SEGUE", Newline()); }); + Write("WASM_RT_SEGUE_WRITE_BASE(segue_saved_base);", Newline()); + NonIndented([&] { Write("#endif", Newline()); }); +} + void CWriter::WriteExports(CWriterPhase kind) { if (module_->exports.empty()) return; @@ -2500,8 +2538,14 @@ void CWriter::WriteExports(CWriterPhase kind) { switch (export_->kind) { case ExternalKind::Func: { Write(OpenBrace()); - if (func_->GetNumResults() > 0) { - Write("return "); + if (IsSingleUnsharedMemory()) { + InstallSegueBase(module_->memories[0], true /* save_old_value */); + } + auto num_results = func_->GetNumResults(); + if (num_results > 1) { + Write(func_->decl.sig.result_types, " ret = "); + } else if (num_results == 1) { + Write(func_->GetResultType(0), " ret = "); } Write(ExternalRef(ModuleFieldType::Func, internal_name), "("); @@ -2513,6 +2557,12 @@ void CWriter::WriteExports(CWriterPhase kind) { Write("instance"); } WriteParamSymbols(index_to_name); + if (IsSingleUnsharedMemory()) { + RestoreSegueBase(); + } + if (num_results > 0) { + Write("return ret;", Newline()); + } Write(CloseBrace(), Newline()); local_sym_map_.clear(); @@ -2611,6 +2661,9 @@ void CWriter::WriteInit() { } if (!module_->memories.empty()) { Write("init_memories(instance);", Newline()); + if (IsSingleUnsharedMemory()) { + InstallSegueBase(module_->memories[0], true /* save_old_value */); + } } if (!module_->tables.empty() && !module_->elem_segments.empty()) { Write("init_elem_instances(instance);", Newline()); @@ -2631,6 +2684,10 @@ void CWriter::WriteInit() { } Write(Newline()); } + + if (IsSingleUnsharedMemory()) { + RestoreSegueBase(); + } Write(CloseBrace(), Newline()); } @@ -3733,6 +3790,9 @@ void CWriter::Write(const ExprList& exprs) { Write(StackVar(0), " = ", func, "(", ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", ", StackVar(0), ");", Newline()); + if (IsSingleUnsharedMemory()) { + InstallSegueBase(module_->memories[0], false /* save_old_value */); + } break; } diff --git a/src/prebuilt/wasm2c_source_declarations.cc b/src/prebuilt/wasm2c_source_declarations.cc index ac0629e4..11aac524 100644 --- a/src/prebuilt/wasm2c_source_declarations.cc +++ b/src/prebuilt/wasm2c_source_declarations.cc @@ -40,6 +40,70 @@ R"w2c_template(#define MEM_ADDR(mem, addr, n) &(mem)->data[addr] R"w2c_template(#endif )w2c_template" R"w2c_template( +#ifndef WASM_RT_USE_SEGUE +)w2c_template" +R"w2c_template(// Memory functions can use the segue optimization if allowed. The segue +)w2c_template" +R"w2c_template(// optimization uses x86 segments to point to a linear memory. We use this +)w2c_template" +R"w2c_template(// optimization when: +)w2c_template" +R"w2c_template(// +)w2c_template" +R"w2c_template(// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +)w2c_template" +R"w2c_template(// (2) on x86_64 without WABT_BIG_ENDIAN enabled +)w2c_template" +R"w2c_template(// (3) the Wasm module uses a single unshared imported or exported memory +)w2c_template" +R"w2c_template(// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +)w2c_template" +R"w2c_template(// for accessing pointers, and supports memcpy on pointers with custom +)w2c_template" +R"w2c_template(// "address namespaces". GCC does not support the memcpy requirement, so +)w2c_template" +R"w2c_template(// this leaves only clang for now. +)w2c_template" +R"w2c_template(// (5) The OS doesn't replace the segment register on context switch which +)w2c_template" +R"w2c_template(// eliminates windows for now +)w2c_template" +R"w2c_template(#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ +)w2c_template" +R"w2c_template( (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ +)w2c_template" +R"w2c_template( __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +)w2c_template" +R"w2c_template(#define WASM_RT_USE_SEGUE 1 +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define WASM_RT_USE_SEGUE 0 +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( +#if WASM_RT_USE_SEGUE +)w2c_template" +R"w2c_template(// POSIX uses FS for TLS, GS is free +)w2c_template" +R"w2c_template(#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +)w2c_template" +R"w2c_template(#define WASM_RT_SEGUE_WRITE_BASE(base) \ +)w2c_template" +R"w2c_template( __builtin_ia32_wrgsbase64((uintptr_t)base) +)w2c_template" +R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) )w2c_template" R"w2c_template( @@ -124,13 +188,38 @@ R"w2c_template( TRAP(OOB); R"w2c_template(#endif )w2c_template" R"w2c_template( +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +)w2c_template" +R"w2c_template(#include <stdio.h> +)w2c_template" +R"w2c_template(#define WASM_RT_CHECK_BASE(mem) \ +)w2c_template" +R"w2c_template( if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ +)w2c_template" +R"w2c_template( puts("Segment register mismatch\n"); \ +)w2c_template" +R"w2c_template( abort(); \ +)w2c_template" +R"w2c_template( } +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define WASM_RT_CHECK_BASE(mem) +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( #if WASM_RT_MEMCHECK_GUARD_PAGES )w2c_template" -R"w2c_template(#define MEMCHECK(mem, a, t) +R"w2c_template(#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); )w2c_template" R"w2c_template(#else )w2c_template" -R"w2c_template(#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +R"w2c_template(#define MEMCHECK(mem, a, t) \ +)w2c_template" +R"w2c_template( WASM_RT_CHECK_BASE(mem); \ +)w2c_template" +R"w2c_template( RANGE_CHECK(mem, a, sizeof(t)) )w2c_template" R"w2c_template(#endif )w2c_template" @@ -204,32 +293,36 @@ R"w2c_template( load_data(MEM_ADDR(&m, o, s), i, s); \ R"w2c_template( } while (0) )w2c_template" R"w2c_template( -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ +)w2c_template" +R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ )w2c_template" -R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ +R"w2c_template( MEMCHECK(mem, addr, t1); \ )w2c_template" -R"w2c_template( MEMCHECK(mem, addr, t1); \ +R"w2c_template( t1 result; \ )w2c_template" -R"w2c_template( t1 result; \ +R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ )w2c_template" -R"w2c_template( wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ +R"w2c_template( sizeof(t1)); \ )w2c_template" -R"w2c_template( force_read(result); \ +R"w2c_template( force_read(result); \ )w2c_template" -R"w2c_template( return (t3)(t2)result; \ +R"w2c_template( return (t3)(t2)result; \ )w2c_template" R"w2c_template( } )w2c_template" R"w2c_template( -#define DEFINE_STORE(name, t1, t2) \ +#define DEFINE_STORE(name, t1, t2) \ +)w2c_template" +R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ )w2c_template" -R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ +R"w2c_template( MEMCHECK(mem, addr, t1); \ )w2c_template" -R"w2c_template( MEMCHECK(mem, addr, t1); \ +R"w2c_template( t1 wrapped = (t1)value; \ )w2c_template" -R"w2c_template( t1 wrapped = (t1)value; \ +R"w2c_template( wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ )w2c_template" -R"w2c_template( wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +R"w2c_template( sizeof(t1)); \ )w2c_template" R"w2c_template( } )w2c_template" diff --git a/src/template/wasm2c.declarations.c b/src/template/wasm2c.declarations.c index 6399affe..5261a25b 100644 --- a/src/template/wasm2c.declarations.c +++ b/src/template/wasm2c.declarations.c @@ -20,6 +20,39 @@ #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -67,10 +100,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -109,20 +155,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ - } - -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ + } + +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) diff --git a/test/wasm2c/add.txt b/test/wasm2c/add.txt index d250c4c0..ffb27f59 100644 --- a/test/wasm2c/add.txt +++ b/test/wasm2c/add.txt @@ -87,6 +87,39 @@ u32 w2c_test_add(w2c_test*, u32, u32); #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -134,10 +167,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -176,20 +222,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ } -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) @@ -704,7 +752,8 @@ FUNC_TYPE_T(w2c_test_t0) = "\x92\xfb\x6a\xdf\x49\x07\x0a\x83\xbe\x08\x02\x68\xcd /* export: 'add' */ u32 w2c_test_add(w2c_test* instance, u32 var_p0, u32 var_p1) { - return w2c_test_add_0(instance, var_p0, var_p1); + u32 ret = w2c_test_add_0(instance, var_p0, var_p1); + return ret; } void wasm2c_test_instantiate(w2c_test* instance) { diff --git a/test/wasm2c/check-imports.txt b/test/wasm2c/check-imports.txt index 3da1741e..caa5dc24 100644 --- a/test/wasm2c/check-imports.txt +++ b/test/wasm2c/check-imports.txt @@ -88,6 +88,7 @@ extern const u8 wasm2c_test_is64_env_0x5F_linear_memory; #endif #include "wasm.h" +#define IS_SINGLE_UNSHARED_MEMORY 1 // Computes a pointer to an object of the given size in a little-endian memory. // @@ -110,6 +111,39 @@ extern const u8 wasm2c_test_is64_env_0x5F_linear_memory; #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -157,10 +191,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -199,20 +246,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ } -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) @@ -759,7 +808,14 @@ void wasm2c_test_instantiate(w2c_test* instance, struct w2c_env* w2c_env_instanc init_instance_import(instance, w2c_env_instance); init_tables(instance); init_memories(instance); +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_env_0x5F_linear_memory).data); +#endif init_elem_instances(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } void wasm2c_test_free(w2c_test* instance) { diff --git a/test/wasm2c/export-names.txt b/test/wasm2c/export-names.txt index ed9772b9..7c4e6eda 100644 --- a/test/wasm2c/export-names.txt +++ b/test/wasm2c/export-names.txt @@ -88,6 +88,7 @@ void w2c_test_0xE20x9D0xA40xEF0xB80x8F(w2c_test*); #endif #include "wasm.h" +#define IS_SINGLE_UNSHARED_MEMORY 1 // Computes a pointer to an object of the given size in a little-endian memory. // @@ -110,6 +111,39 @@ void w2c_test_0xE20x9D0xA40xEF0xB80x8F(w2c_test*); #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -157,10 +191,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -199,20 +246,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ } -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) @@ -730,27 +779,62 @@ static void init_memories(w2c_test* instance) { /* export: '' */ void w2c_test_(w2c_test* instance) { +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data); +#endif w2c_test__0(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } /* export: '*\2F' */ void w2c_test_0x2A0x2F(w2c_test* instance) { +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data); +#endif w2c_test__0(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } /* export: '\3F\3F\2F' */ void w2c_test_0x3F0x3F0x2F(w2c_test* instance) { +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data); +#endif w2c_test__0(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } /* export: '\0A' */ void w2c_test_0x0A(w2c_test* instance) { +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data); +#endif w2c_test__0(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } /* export: '\E2\9D\A4\EF\B8\8F' */ void w2c_test_0xE20x9D0xA40xEF0xB80x8F(w2c_test* instance) { +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data); +#endif w2c_test__0(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } static void init_instance_import(w2c_test* instance, struct w2c_0x5Cmodule* w2c_0x5Cmodule_instance) { @@ -765,6 +849,13 @@ void wasm2c_test_instantiate(w2c_test* instance, struct w2c_0x5Cmodule* w2c_0x5C assert(wasm_rt_is_initialized()); init_instance_import(instance, w2c_0x5Cmodule_instance); init_memories(instance); +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE((*instance->w2c_0x5Cmodule_import0x200x2A0x2F).data); +#endif +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } void wasm2c_test_free(w2c_test* instance) { diff --git a/test/wasm2c/hello.txt b/test/wasm2c/hello.txt index d56c7216..77608b6c 100644 --- a/test/wasm2c/hello.txt +++ b/test/wasm2c/hello.txt @@ -96,6 +96,7 @@ void w2c_test_0x5Fstart(w2c_test*); #endif #include "wasm.h" +#define IS_SINGLE_UNSHARED_MEMORY 1 // Computes a pointer to an object of the given size in a little-endian memory. // @@ -118,6 +119,39 @@ void w2c_test_0x5Fstart(w2c_test*); #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -165,10 +199,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -207,20 +254,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ } -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) @@ -767,7 +816,14 @@ wasm_rt_memory_t* w2c_test_memory(w2c_test* instance) { /* export: '_start' */ void w2c_test_0x5Fstart(w2c_test* instance) { +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE(instance->w2c_memory.data); +#endif w2c_test_0x5Fstart_0(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } static void init_instance_import(w2c_test* instance, struct w2c_wasi__snapshot__preview1* w2c_wasi__snapshot__preview1_instance) { @@ -779,8 +835,15 @@ void wasm2c_test_instantiate(w2c_test* instance, struct w2c_wasi__snapshot__prev init_instance_import(instance, w2c_wasi__snapshot__preview1_instance); init_tables(instance); init_memories(instance); +#if WASM_RT_USE_SEGUE + uintptr_t segue_saved_base = WASM_RT_SEGUE_READ_BASE(); + WASM_RT_SEGUE_WRITE_BASE(instance->w2c_memory.data); +#endif init_elem_instances(instance); init_data_instances(instance); +#if WASM_RT_USE_SEGUE + WASM_RT_SEGUE_WRITE_BASE(segue_saved_base); +#endif } void wasm2c_test_free(w2c_test* instance) { diff --git a/test/wasm2c/minimal.txt b/test/wasm2c/minimal.txt index 9199efb8..e22e3662 100644 --- a/test/wasm2c/minimal.txt +++ b/test/wasm2c/minimal.txt @@ -81,6 +81,39 @@ wasm_rt_func_type_t wasm2c_test_get_func_type(uint32_t param_count, uint32_t res #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -128,10 +161,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -170,20 +216,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ - } - -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ + } + +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) diff --git a/test/wasm2c/tail-calls.txt b/test/wasm2c/tail-calls.txt index b2ee451b..dd97badf 100644 --- a/test/wasm2c/tail-calls.txt +++ b/test/wasm2c/tail-calls.txt @@ -111,6 +111,39 @@ void wasm_tailcall_w2c_test_tailcaller(void **instance_ptr, void *tail_call_stac #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)gsbase, "address namespaces" +// for accessing pointers, and supports memcpy on pointers with custom +// "address namespaces". GCC does not support the memcpy requirement, so +// this leaves only clang for now. +// (5) The OS doesn't replace the segment register on context switch which +// eliminates windows for now +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && IS_SINGLE_UNSHARED_MEMORY && \ + __clang__ && __has_builtin(__builtin_ia32_wrgsbase64) && !defined(_WIN32) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// POSIX uses FS for TLS, GS is free +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -158,10 +191,23 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a, TRAP(OOB); #endif +#if WASM_RT_USE_SEGUE && WASM_RT_SANITY_CHECKS +#include <stdio.h> +#define WASM_RT_CHECK_BASE(mem) \ + if (((uintptr_t)((mem)->data)) != ((uintptr_t)WASM_RT_SEGUE_READ_BASE())) { \ + puts("Segment register mismatch\n"); \ + abort(); \ + } +#else +#define WASM_RT_CHECK_BASE(mem) +#endif + #if WASM_RT_MEMCHECK_GUARD_PAGES -#define MEMCHECK(mem, a, t) +#define MEMCHECK(mem, a, t) WASM_RT_CHECK_BASE(mem); #else -#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) +#define MEMCHECK(mem, a, t) \ + WASM_RT_CHECK_BASE(mem); \ + RANGE_CHECK(mem, a, sizeof(t)) #endif #ifdef __GNUC__ @@ -200,20 +246,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ - } - -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ + } + +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) diff --git a/wasm2c/README.md b/wasm2c/README.md index f928ae40..6d8b9999 100644 --- a/wasm2c/README.md +++ b/wasm2c/README.md @@ -141,6 +141,45 @@ fac(10) -> 3628800 You can take a look at the all of these files in [wasm2c/examples/fac](/wasm2c/examples/fac). +### Enabling extra sanity checks + +Wasm2c provides a macro `WASM_RT_SANITY_CHECKS` that if defined enables +additional sanity checks in the produced Wasm2c code. Note that this may have a +high performance overhead, and is thus only recommended for debug builds. + +### Enabling Segue (a Linux x86_64 target specific optimization) + +Wasm2c can use the "Segue" optimization if allowed. The segue optimization uses +an x86 segment register to store the location of Wasm's linear memory, when +compiling a Wasm module with clang, running on x86_64 Linux, and the macro +`WASM_RT_ALLOW_SEGUE` is defined. Segue is not used if + +1. The Wasm module uses a more than a single unshared imported or exported + memory +2. The wasm2c code is compiled with GCC. Segue requires intrinsics for + (rd|wr)gsbase, "address namespaces" for accessing pointers, and support for + memcpy on pointers with custom "address namespaces". GCC does not support the + memcpy requirement. +3. The code is compiled for Windows as Windows doesn't restore the segment + register on context switch. + +The wasm2c generated code automatically sets the unused segment register (the +`%gs` register on x86_64 Linux) during the function calls into wasm2c generated +module, restores it after calls to external modules etc. Any host function +written in C would continue to work without changes as C code does not modify +the unused segment register `%gs` (See +[here](https://www.kernel.org/doc/html/next/x86/x86_64/fsgs.html) for details). +However, any host functions written in assembly that clobber the free segment +register must restore the value of this register prior to executing or returning +control to wasm2c generated code. + +You can test the performance of the Segue optimization by running Dhrystone with +and without Segue: + +```bash +cd wasm2c/benchmarks/segue && make +``` + ## Looking at the generated header, `fac.h` The generated header file looks something like this: diff --git a/wasm2c/benchmarks/dhrystone/.gitignore b/wasm2c/benchmarks/dhrystone/.gitignore new file mode 100644 index 00000000..7cc06514 --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/.gitignore @@ -0,0 +1,5 @@ +dhrystone_native +dhrystone +dhrystone_segue +dhrystone.h +dhrystone.c diff --git a/wasm2c/benchmarks/dhrystone/Makefile b/wasm2c/benchmarks/dhrystone/Makefile new file mode 100644 index 00000000..b7af5d4b --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/Makefile @@ -0,0 +1,38 @@ +WABT_ROOT=../../.. +CC=clang +CFLAGS=-I$(WABT_ROOT)/wasm2c -I $(WABT_ROOT)/third_party/uvwasi/include/ -O3 +CFLAGS_SEGUE=-DWASM_RT_ALLOW_SEGUE=1 -mfsgsbase +LDFLAGS=-L$(WABT_ROOT)/build/_deps/libuv-build -L$(WABT_ROOT)/build/third_party/uvwasi +LDLIBS=-luvwasi_a -luv_a -lm + +all: benchmark + +clean: + rm -rf dhrystone dhrystone.wasm dhrystone.c dhrystone.h + +dhrystone.wasm: src/dhry_1.c src/dhry_2.c + /opt/wasi-sdk/bin/clang -O3 $^ -o $@ + +dhrystone.c: dhrystone.wasm $(WABT_ROOT)/bin/wasm2c + $(WABT_ROOT)/bin/wasm2c $< -o $@ --disable-simd + +dhrystone_native: src/dhry_1.c src/dhry_2.c + clang -O3 $^ -o $@ + +dhrystone: main.c dhrystone.c $(WABT_ROOT)/wasm2c/wasm-rt-impl.c $(WABT_ROOT)/wasm2c/wasm-rt-mem-impl.c + $(CC) $(LDFLAGS) $(CFLAGS) $^ -o $@ $(LDLIBS) + +dhrystone_segue: main.c dhrystone.c $(WABT_ROOT)/wasm2c/wasm-rt-impl.c $(WABT_ROOT)/wasm2c/wasm-rt-mem-impl.c + $(CC) $(LDFLAGS) $(CFLAGS) $(CFLAGS_SEGUE) $^ -o $@ $(LDLIBS) + +benchmark: dhrystone_native dhrystone dhrystone_segue + @echo "Starting Dhrystone benchmark. (Smaller number is better)" + @sleep 2 + @echo "Native" + @./dhrystone_native | grep "one run through Dhrystone" + @sleep 2 + @echo "Wasm" + @./dhrystone | grep "one run through Dhrystone" + @sleep 2 + @echo "Wasm+Segue" + @./dhrystone_segue | grep "one run through Dhrystone" diff --git a/wasm2c/benchmarks/dhrystone/dhrystone.wasm b/wasm2c/benchmarks/dhrystone/dhrystone.wasm Binary files differnew file mode 100755 index 00000000..b652757f --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/dhrystone.wasm diff --git a/wasm2c/benchmarks/dhrystone/main.c b/wasm2c/benchmarks/dhrystone/main.c new file mode 100644 index 00000000..5f7350e4 --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/main.c @@ -0,0 +1,265 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "uvwasi.h" + +#include "dhrystone.h" + +struct w2c_wasi__snapshot__preview1 { + wasm_rt_memory_t* w2c_memory; + uvwasi_t* uvwasi; +}; + +#define WASI_SUCCESS 0 +#define WASI_BADF_ERROR 8 + +typedef uint32_t u32; +typedef uint64_t u64; + +#if WABT_BIG_ENDIAN +#define MEM_ADDR(mem, addr, n) &(mem)->data[(mem)->size - (addr) - (n)] +#else +#define MEM_ADDR(mem, addr, n) &(mem)->data[addr] +#endif + +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) + +#define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) + +#define RANGE_CHECK(mem, offset, len) \ + if (offset + (uint64_t)len > mem->size) \ + TRAP(OOB); + +static inline void memory_fill(wasm_rt_memory_t* mem, u32 d, u32 val, u32 n) { + RANGE_CHECK(mem, d, n); + memset(MEM_ADDR(mem, d, n), val, n); +} + +#define MEMCHECK(mem, a, t) RANGE_CHECK(mem, a, sizeof(t)) + +#ifdef __GNUC__ +#define FORCE_READ_INT(var) __asm__("" ::"r"(var)); +#else +#define FORCE_READ_INT(var) +#endif + +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ + } + +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ + } + +DEFINE_LOAD(i8_load, u8, u8, u8, FORCE_READ_INT) +DEFINE_LOAD(i16_load, u16, u16, u16, FORCE_READ_INT) +DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) +DEFINE_LOAD(i64_load, u64, u64, u64, FORCE_READ_INT) +DEFINE_STORE(i8_store, u8, u8) +DEFINE_STORE(i16_store, u16, u16) +DEFINE_STORE(i32_store, u32, u32) +DEFINE_STORE(i64_store, u64, u64) + +u32 w2c_wasi__snapshot__preview1_args_get( + struct w2c_wasi__snapshot__preview1* a, + u32 b, + u32 c) { + return WASI_SUCCESS; +} +u32 w2c_wasi__snapshot__preview1_args_sizes_get( + struct w2c_wasi__snapshot__preview1* a, + u32 str_count, + u32 buff_size) { + i32_store(a->w2c_memory, str_count, 0); + i32_store(a->w2c_memory, buff_size, 0); + return WASI_SUCCESS; +} +u32 w2c_wasi__snapshot__preview1_fd_prestat_get( + struct w2c_wasi__snapshot__preview1* a, + u32 b, + u32 c) { + return WASI_BADF_ERROR; +} + +u32 w2c_wasi__snapshot__preview1_fd_write( + struct w2c_wasi__snapshot__preview1* a, + u32 fd, + u32 iovs_offset, + u32 iovs_len, + u32 nwritten) { + if (iovs_len > 32) + return UVWASI_EINVAL; + uvwasi_ciovec_t iovs[iovs_len]; + + for (uvwasi_size_t i = 0; i < iovs_len; ++i) { + u32 wasi_iovs_i = iovs_offset + i * sizeof(uvwasi_size_t[2]); + u32 buf_loc = i32_load(a->w2c_memory, wasi_iovs_i); + u32 buf_len = i32_load(a->w2c_memory, wasi_iovs_i + sizeof(uvwasi_size_t)); + iovs[i].buf = MEM_ADDR(a->w2c_memory, buf_loc, buf_len); + iovs[i].buf_len = buf_len; + } + + uvwasi_size_t num_written; + uvwasi_errno_t ret = + uvwasi_fd_write(a->uvwasi, fd, iovs, iovs_len, &num_written); + i32_store(a->w2c_memory, nwritten, num_written); + return ret; +} + +uint32_t w2c_wasi__snapshot__preview1_fd_fdstat_get( + struct w2c_wasi__snapshot__preview1* a, + u32 fd, + u32 stat) { + uvwasi_fdstat_t uvstat; + uvwasi_errno_t ret = uvwasi_fd_fdstat_get(a->uvwasi, fd, &uvstat); + if (ret == UVWASI_ESUCCESS) { + memory_fill(a->w2c_memory, stat, 0, 24); + i8_store(a->w2c_memory, stat, uvstat.fs_filetype); + i16_store(a->w2c_memory, stat + 2, uvstat.fs_flags); + i64_store(a->w2c_memory, stat + 8, uvstat.fs_rights_base); + i64_store(a->w2c_memory, stat + 16, uvstat.fs_rights_inheriting); + } + return ret; +} + +u32 w2c_wasi__snapshot__preview1_clock_time_get( + struct w2c_wasi__snapshot__preview1* a, + u32 clk_id, + u64 precision, + u32 result) { + uvwasi_timestamp_t t; + uvwasi_errno_t ret = uvwasi_clock_time_get(a->uvwasi, clk_id, precision, &t); + i64_store(a->w2c_memory, result, t); + return ret; +} + +u32 w2c_wasi__snapshot__preview1_clock_res_get( + struct w2c_wasi__snapshot__preview1* a, + u32 clk_id, + u32 result) { + uvwasi_timestamp_t t; + uvwasi_errno_t ret = uvwasi_clock_res_get(a->uvwasi, clk_id, &t); + i64_store(a->w2c_memory, result, t); + return ret; +} + +u32 w2c_wasi__snapshot__preview1_fd_seek(struct w2c_wasi__snapshot__preview1* a, + u32 b, + u64 c, + u32 d, + u32 e) { + printf("fd_seek not implemented\n"); + abort(); +} +u32 w2c_wasi__snapshot__preview1_fd_read(struct w2c_wasi__snapshot__preview1* a, + u32 b, + u32 c, + u32 d, + u32 e) { + printf("fd_read not implemented\n"); + abort(); +} +u32 w2c_wasi__snapshot__preview1_fd_close( + struct w2c_wasi__snapshot__preview1* a, + u32 b) { + printf("fd_close not implemented\n"); + abort(); +} +u32 w2c_wasi__snapshot__preview1_fd_fdstat_set_flags( + struct w2c_wasi__snapshot__preview1* a, + u32 b, + u32 c) { + printf("fd_fdstat_set_flags not implemented\n"); + abort(); +} +u32 w2c_wasi__snapshot__preview1_fd_prestat_dir_name( + struct w2c_wasi__snapshot__preview1* a, + u32 b, + u32 c, + u32 d) { + printf("fd_prestat_dir_name not implemented\n"); + abort(); +} +u32 w2c_wasi__snapshot__preview1_path_open( + struct w2c_wasi__snapshot__preview1* a, + u32 b, + u32 c, + u32 d, + u32 e, + u32 f, + u64 g, + u64 h, + u32 i, + u32 end) { + printf("path_open not implemented\n"); + abort(); +} +void w2c_wasi__snapshot__preview1_proc_exit( + struct w2c_wasi__snapshot__preview1* a, + u32 b) { + printf("proc_exit not implemented\n"); + abort(); +} + +int main(int argc, char const* argv[]) { + w2c_dhrystone dhrystone; + struct w2c_wasi__snapshot__preview1 wasi; + uvwasi_t local_uvwasi_state; + uvwasi_options_t init_options; + + // pass in standard descriptors + init_options.in = 0; + init_options.out = 1; + init_options.err = 2; + init_options.fd_table_size = 10; + + // pass in args and environement + extern const char** environ; + init_options.argc = argc; + init_options.argv = argv; + init_options.envp = (const char**)environ; + + // no sandboxing enforced, binary has access to everything user does + init_options.preopenc = 2; + init_options.preopens = calloc(2, sizeof(uvwasi_preopen_t)); + + init_options.preopens[0].mapped_path = "/"; + init_options.preopens[0].real_path = "/"; + init_options.preopens[1].mapped_path = "./"; + init_options.preopens[1].real_path = "."; + + init_options.allocator = NULL; + + wasm_rt_init(); + uvwasi_errno_t ret = uvwasi_init(&local_uvwasi_state, &init_options); + + if (ret != UVWASI_ESUCCESS) { + printf("uvwasi_init failed with error %d\n", ret); + exit(1); + } + + wasi.w2c_memory = &dhrystone.w2c_memory; + wasi.uvwasi = &local_uvwasi_state, + + wasm2c_dhrystone_instantiate(&dhrystone, &wasi); + + w2c_dhrystone_0x5Fstart(&dhrystone); + + wasm2c_dhrystone_free(&dhrystone); + + uvwasi_destroy(&local_uvwasi_state); + wasm_rt_free(); + + return 0; +} diff --git a/wasm2c/benchmarks/dhrystone/src/README.md b/wasm2c/benchmarks/dhrystone/src/README.md new file mode 100644 index 00000000..30d270e9 --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/src/README.md @@ -0,0 +1,23 @@ +The Dhrystone benchmark: a popular benchmark for CPU/compiler performance +measurement. Description and sources available +[here](https://www.netlib.org/benchmark/dhry-c). + +# Running the benchmark +Use the command `make benchmark` to run the benchmark. + +This compares the performance of three builds of Dhrystone (1) Native (2) Wasm2c +(3) Wasm2C + Segue optimization. The Segue optimization is enabled only on +specific CPU+OS+Compiler combinations. If unsupported on your platform, builds +(2) and (3) above will be identical + +# Sample output + +``` +Starting Dhrystone benchmark. (Smaller number is better) +Native +Microseconds for one run through Dhrystone: 0.011133 +Wasm +Microseconds for one run through Dhrystone: 0.013670 +Wasm+Segue +Microseconds for one run through Dhrystone: 0.008666 +```
\ No newline at end of file diff --git a/wasm2c/benchmarks/dhrystone/src/dhry.h b/wasm2c/benchmarks/dhrystone/src/dhry.h new file mode 100644 index 00000000..be0f701e --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/src/dhry.h @@ -0,0 +1,306 @@ +/* + ************************************************************************** + * DHRYSTONE 2.1 BENCHMARK PC VERSION + ************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry.h (part 1 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * Siemens AG, AUT E 51 + * Postfach 3220 + * 8520 Erlangen + * Germany (West) + * Phone: [+49]-9131-7-20330 + * (8-17 Central European Time) + * Usenet: ..!mcsun!unido!estevax!weicker + * + * Original Version (in Ada) published in + * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984), + * pp. 1013 - 1030, together with the statistics + * on which the distribution of statements etc. is based. + * + * In this C version, the following C library functions are used: + * - strcpy, strcmp (inside the measurement loop) + * - printf, scanf (outside the measurement loop) + * In addition, Berkeley UNIX system calls "times ()" or "time ()" + * are used for execution time measurement. For measurements + * on other systems, these calls have to be changed. + * + * Collection of Results: + * Reinhold Weicker (address see above) and + * + * Rick Richardson + * PC Research. Inc. + * 94 Apple Orchard Drive + * Tinton Falls, NJ 07724 + * Phone: (201) 389-8963 (9-17 EST) + * Usenet: ...!uunet!pcrat!rick + * + * Please send results to Rick Richardson and/or Reinhold Weicker. + * Complete information should be given on hardware and software used. + * Hardware information includes: Machine type, CPU, type and size + * of caches; for microprocessors: clock frequency, memory speed + * (number of wait states). + * Software information includes: Compiler (and runtime library) + * manufacturer and version, compilation switches, OS version. + * The Operating System version may give an indication about the + * compiler; Dhrystone itself performs no OS calls in the measurement + * loop. + * + * The complete output generated by the program should be mailed + * such that at least some checks for correctness can be made. + * + ************************************************************************** + * + * This version has changes made by Roy Longbottom to conform to a common + * format for a series of standard benchmarks for PCs: + * + * Running time greater than 5 seconds due to inaccuracy of the PC clock. + * + * Automatic adjustment of run time, no manually inserted parameters. + * + * Initial display of calibration times to confirm linearity. + * + * Display of results within one screen (or at a slow speed as the test + * progresses) so that it can be seen to have run successfully. + * + * Facilities to type in details of system used etc. + * + * All results and details appended to a results file. + * + * + * Roy Longbottom + * 101323.2241@compuserve.com + * + ************************************************************************** + * + * For details of history, changes, other defines, benchmark construction + * statistics see official versions from ftp.nosc.mil/pub/aburto where + * the latest table of results (dhry.tbl) are available. See also + * netlib@ornl.gov + * + ************************************************************************** + * + * Defines: The following "Defines" are possible: + * -DREG=register (default: Not defined) + * As an approximation to what an average C programmer + * might do, the "register" storage class is applied + * (if enabled by -DREG=register) + * - for local variables, if they are used (dynamically) + * five or more times + * - for parameters if they are used (dynamically) + * six or more times + * Note that an optimal "register" strategy is + * compiler-dependent, and that "register" declarations + * do not necessarily lead to faster execution. + * -DNOSTRUCTASSIGN (default: Not defined) + * Define if the C compiler does not support + * assignment of structures. + * -DNOENUMS (default: Not defined) + * Define if the C compiler does not support + * enumeration types. + *************************************************************************** + * + * Compilation model and measurement (IMPORTANT): + * + * This C version of Dhrystone consists of three files: + * - dhry.h (this file, containing global definitions and comments) + * - dhry_1.c (containing the code corresponding to Ada package Pack_1) + * - dhry_2.c (containing the code corresponding to Ada package Pack_2) + * + * The following "ground rules" apply for measurements: + * - Separate compilation + * - No procedure merging + * - Otherwise, compiler optimizations are allowed but should be indicated + * - Default results are those without register declarations + * See the companion paper "Rationale for Dhrystone Version 2" for a more + * detailed discussion of these ground rules. + * + * For 16-Bit processors (e.g. 80186, 80286), times for all compilation + * models ("small", "medium", "large" etc.) should be given if possible, + * together with a definition of these models for the compiler system used. + * + ************************************************************************** + * Examples of Pentium Results + * + * Dhrystone Benchmark Version 2.1 (Language: C) + * + * Month run 4/1996 + * PC model Escom + * CPU Pentium + * Clock MHz 100 + * Cache 256K + * Options Neptune chipset + * OS/DOS Windows 95 + * Compiler Watcom C/ C++ 10.5 Win386 + * OptLevel -otexan -zp8 -fp5 -5r + * Run by Roy Longbottom + * From UK + * Mail 101323.2241@compuserve.com + * + * Final values (* implementation-dependent): + * + * Int_Glob: O.K. 5 + * Bool_Glob: O.K. 1 + * Ch_1_Glob: O.K. A + * Ch_2_Glob: O.K. B + * Arr_1_Glob[8]: O.K. 7 + * Arr_2_Glob8/7: O.K. 1600010 + * Ptr_Glob-> + * Ptr_Comp: * 98008 + * Discr: O.K. 0 + * Enum_Comp: O.K. 2 + * Int_Comp: O.K. 17 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Next_Ptr_Glob-> + * Ptr_Comp: * 98008 same as above + * Discr: O.K. 0 + * Enum_Comp: O.K. 1 + * Int_Comp: O.K. 18 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Int_1_Loc: O.K. 5 + * Int_2_Loc: O.K. 13 + * Int_3_Loc: O.K. 7 + * Enum_Loc: O.K. 1 + * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING + * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING + * + * Register option Selected. + * + * Microseconds 1 loop: 4.53 + * Dhrystones / second: 220690 + * VAX MIPS rating: 125.61 + * + * + * Dhrystone Benchmark Version 2.1 (Language: C) + * + * Month run 4/1996 + * PC model Escom + * CPU Pentium + * Clock MHz 100 + * Cache 256K + * Options Neptune chipset + * OS/DOS Windows 95 + * Compiler Watcom C/ C++ 10.5 Win386 + * OptLevel No optimisation + * Run by Roy Longbottom + * From UK + * Mail 101323.2241@compuserve.com + * + * Final values (* implementation-dependent): + * + * Int_Glob: O.K. 5 + * Bool_Glob: O.K. 1 + * Ch_1_Glob: O.K. A + * Ch_2_Glob: O.K. B + * Arr_1_Glob[8]: O.K. 7 + * Arr_2_Glob8/7: O.K. 320010 + * Ptr_Glob-> + * Ptr_Comp: * 98004 + * Discr: O.K. 0 + * Enum_Comp: O.K. 2 + * Int_Comp: O.K. 17 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Next_Ptr_Glob-> + * Ptr_Comp: * 98004 same as above + * Discr: O.K. 0 + * Enum_Comp: O.K. 1 + * Int_Comp: O.K. 18 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Int_1_Loc: O.K. 5 + * Int_2_Loc: O.K. 13 + * Int_3_Loc: O.K. 7 + * Enum_Loc: O.K. 1 + * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING + * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING + * + * Register option Not selected. + * + * Microseconds 1 loop: 20.06 + * Dhrystones / second: 49844 + * VAX MIPS rating: 28.37 + * + ************************************************************************** + */ + +/* Compiler and system dependent definitions: */ + +#ifndef TIME +#define TIMES +#endif +/* Use times(2) time function unless */ +/* explicitly defined otherwise */ + +#ifdef TIMES +/* #include <sys/types.h> + #include <sys/times.h> */ +/* for "times" */ +#endif + +#define Mic_secs_Per_Second 1000000.0 +/* Berkeley UNIX C returns process times in seconds/HZ */ + +#ifdef NOSTRUCTASSIGN +#define structassign(d, s) memcpy(&(d), &(s), sizeof(d)) +#else +#define structassign(d, s) d = s +#endif + +#ifdef NOENUM +#define Ident_1 0 +#define Ident_2 1 +#define Ident_3 2 +#define Ident_4 3 +#define Ident_5 4 +typedef int Enumeration; +#else +typedef enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } Enumeration; +#endif +/* for boolean and enumeration types in Ada, Pascal */ + +/* General definitions: */ + +#include <stdio.h> +#include <string.h> + +/* for strcpy, strcmp */ + +#define Null 0 +/* Value of a Null pointer */ +#define true 1 +#define false 0 + +typedef int One_Thirty; +typedef int One_Fifty; +typedef char Capital_Letter; +typedef int Boolean; +typedef char Str_30[31]; +typedef int Arr_1_Dim[50]; +typedef int Arr_2_Dim[50][50]; + +typedef struct record { + struct record *Ptr_Comp; + Enumeration Discr; + union { + struct { + Enumeration Enum_Comp; + int Int_Comp; + char Str_Comp[31]; + } var_1; + struct { + Enumeration E_Comp_2; + char Str_2_Comp[31]; + } var_2; + struct { + char Ch_1_Comp; + char Ch_2_Comp; + } var_3; + } variant; +} Rec_Type, *Rec_Pointer; diff --git a/wasm2c/benchmarks/dhrystone/src/dhry_1.c b/wasm2c/benchmarks/dhrystone/src/dhry_1.c new file mode 100644 index 00000000..fb23dd3a --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/src/dhry_1.c @@ -0,0 +1,485 @@ +/* + ************************************************************************* + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_1.c (part 2 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + ************************************************************************* + */ + +#include <time.h> +#include <stdlib.h> +#include <stdio.h> +#include "dhry.h" + +/* Global Variables: */ + +Rec_Pointer Ptr_Glob, Next_Ptr_Glob; +int Int_Glob; +Boolean Bool_Glob; +char Ch_1_Glob, Ch_2_Glob; +int Arr_1_Glob[50]; +int Arr_2_Glob[50][50]; + +Enumeration +Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val); +/* +forward declaration necessary since Enumeration may not simply be int +*/ + +#ifndef ROPT +#define REG +/* REG becomes defined as empty */ +/* i.e. no register variables */ +#else +#define REG register +#endif + +void +Proc_1(REG Rec_Pointer Ptr_Val_Par); +void +Proc_2(One_Fifty *Int_Par_Ref); +void +Proc_3(Rec_Pointer *Ptr_Ref_Par); +void +Proc_4(); +void +Proc_5(); +void +Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par); +void +Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, + One_Fifty *Int_Par_Ref); +void +Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, + int Int_2_Par_Val); + +Boolean +Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref); + +/* variables for time measurement: */ + +#define Too_Small_Time 2 +/* Measurements should last at least 2 seconds */ + +#define BILLION 1000000000L +#define MILLION 1000000 +struct timespec Begin_Time, End_Time; +double User_Time; + +double Microseconds, Dhrystones_Per_Second, Vax_Mips; + +/* end of variables for time measurement */ + +int +main(int argc, char *argv[]) +/*****/ + +/* main program, corresponds to procedures */ +/* Main and Proc_0 in the Ada version */ +{ + One_Fifty Int_1_Loc; + REG One_Fifty Int_2_Loc; + One_Fifty Int_3_Loc; + REG char Ch_Index; + Enumeration Enum_Loc; + Str_30 Str_1_Loc; + Str_30 Str_2_Loc; + REG int Run_Index; + REG int Number_Of_Runs; + int endit, count = 10; + char general[9][80] = { " " }; + + /*********************************************************************** + * Change for compiler and optimisation used * + ***********************************************************************/ + + Next_Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type)); + Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type)); + + Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; + Ptr_Glob->Discr = Ident_1; + Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; + Ptr_Glob->variant.var_1.Int_Comp = 40; + strcpy(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING"); + strcpy(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + + Arr_2_Glob[8][7] = 10; + /* Was missing in published program. Without this statement, */ + /* Arr_2_Glob [8][7] would have an undefined value. */ + /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ + /* overflow may occur for this array element. */ + + printf("\n"); + printf("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n"); + printf("\n"); + + Number_Of_Runs = 5000; + + do { + + Number_Of_Runs = Number_Of_Runs * 2; + count = count - 1; + Arr_2_Glob[8][7] = 10; + + /***************/ + /* Start timer */ + /***************/ + + clock_gettime(CLOCK_MONOTONIC, &Begin_Time); + + for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) { + + Proc_5(); + Proc_4(); + /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ + Int_1_Loc = 2; + Int_2_Loc = 3; + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + Enum_Loc = Ident_2; + Bool_Glob = !Func_2(Str_1_Loc, Str_2_Loc); + /* Bool_Glob == 1 */ + while (Int_1_Loc < Int_2_Loc) /* loop body executed once */ + { + Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; + /* Int_3_Loc == 7 */ + Proc_7(Int_1_Loc, Int_2_Loc, &Int_3_Loc); + /* Int_3_Loc == 7 */ + Int_1_Loc += 1; + } /* while */ + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Proc_8(Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); + /* Int_Glob == 5 */ + Proc_1(Ptr_Glob); + for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) + /* loop body executed twice */ + { + if (Enum_Loc == Func_1(Ch_Index, 'C')) + /* then, not executed */ + { + Proc_6(Ident_1, &Enum_Loc); + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); + Int_2_Loc = Run_Index; + Int_Glob = Run_Index; + } + } + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Int_2_Loc = Int_2_Loc * Int_1_Loc; + Int_1_Loc = Int_2_Loc / Int_3_Loc; + Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; + /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ + Proc_2(&Int_1_Loc); + /* Int_1_Loc == 5 */ + + } /* loop "for Run_Index" */ + + /**************/ + /* Stop timer */ + /**************/ + + clock_gettime(CLOCK_MONOTONIC, &End_Time); + + User_Time = (End_Time.tv_sec - Begin_Time.tv_sec) * MILLION + + (End_Time.tv_nsec - Begin_Time.tv_nsec) / 1000; + User_Time = User_Time / MILLION; /* convert to seconds */ + + printf("%ld runs %lf seconds \n", (long)Number_Of_Runs, User_Time); + if (User_Time > 5.0) { + count = 0; + } + else { + if (User_Time < 0.1) { + Number_Of_Runs = Number_Of_Runs * 5; + } + } + } /* calibrate/run do while */ + while (count > 0); + + printf("\n"); + printf("Final values (* implementation-dependent):\n"); + printf("\n"); + printf("Int_Glob: "); + if (Int_Glob == 5) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_Glob); + + printf("Bool_Glob: "); + if (Bool_Glob == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Bool_Glob); + + printf("Ch_1_Glob: "); + if (Ch_1_Glob == 'A') + printf("O.K. "); + else + printf("WRONG "); + printf("%c ", Ch_1_Glob); + + printf("Ch_2_Glob: "); + if (Ch_2_Glob == 'B') + printf("O.K. "); + else + printf("WRONG "); + printf("%c\n", Ch_2_Glob); + + printf("Arr_1_Glob[8]: "); + if (Arr_1_Glob[8] == 7) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Arr_1_Glob[8]); + + printf("Arr_2_Glob8/7: "); + if (Arr_2_Glob[8][7] == Number_Of_Runs + 10) + printf("O.K. "); + else + printf("WRONG "); + printf("%10d\n", Arr_2_Glob[8][7]); + + printf("Ptr_Glob-> "); + printf(" Ptr_Comp: * %p\n", Ptr_Glob->Ptr_Comp); + + printf(" Discr: "); + if (Ptr_Glob->Discr == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Ptr_Glob->Discr); + + printf("Enum_Comp: "); + if (Ptr_Glob->variant.var_1.Enum_Comp == 2) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Ptr_Glob->variant.var_1.Enum_Comp); + + printf(" Int_Comp: "); + if (Ptr_Glob->variant.var_1.Int_Comp == 17) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Ptr_Glob->variant.var_1.Int_Comp); + + printf("Str_Comp: "); + if (strcmp(Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING") + == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Ptr_Glob->variant.var_1.Str_Comp); + + printf("Next_Ptr_Glob-> "); + printf(" Ptr_Comp: * %p", Next_Ptr_Glob->Ptr_Comp); + printf(" same as above\n"); + + printf(" Discr: "); + if (Next_Ptr_Glob->Discr == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Next_Ptr_Glob->Discr); + + printf("Enum_Comp: "); + if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp); + + printf(" Int_Comp: "); + if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp); + + printf("Str_Comp: "); + if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING") + == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp); + + printf("Int_1_Loc: "); + if (Int_1_Loc == 5) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_1_Loc); + + printf("Int_2_Loc: "); + if (Int_2_Loc == 13) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Int_2_Loc); + + printf("Int_3_Loc: "); + if (Int_3_Loc == 7) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_3_Loc); + + printf("Enum_Loc: "); + if (Enum_Loc == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Enum_Loc); + + printf("Str_1_Loc: "); + if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Str_1_Loc); + + printf("Str_2_Loc: "); + if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Str_2_Loc); + + printf("\n"); + + if (User_Time < Too_Small_Time) { + printf("Measured time too small to obtain meaningful results\n"); + printf("Please increase number of runs\n"); + printf("\n"); + } + else { + Microseconds = User_Time * Mic_secs_Per_Second / (double)Number_Of_Runs; + Dhrystones_Per_Second = (double)Number_Of_Runs / User_Time; + Vax_Mips = Dhrystones_Per_Second / 1757.0; + + printf("Microseconds for one run through Dhrystone: "); + printf("%lf \n", Microseconds); + printf("Dhrystones per Second: "); + printf("%lf \n", Dhrystones_Per_Second); + printf("VAX MIPS rating = "); + printf("%lf \n", Vax_Mips); + printf("\n"); + } + + free(Next_Ptr_Glob); + free(Ptr_Glob); + return 0; +} + +void +Proc_1(REG Rec_Pointer Ptr_Val_Par) +/******************/ + +/* executed once */ +{ + REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; + /* == Ptr_Glob_Next */ + /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ + /* corresponds to "rename" in Ada, "with" in Pascal */ + + structassign(*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); + Ptr_Val_Par->variant.var_1.Int_Comp = 5; + Next_Record->variant.var_1.Int_Comp = Ptr_Val_Par->variant.var_1.Int_Comp; + Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; + Proc_3(&Next_Record->Ptr_Comp); + /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp + == Ptr_Glob->Ptr_Comp */ + if (Next_Record->Discr == Ident_1) + /* then, executed */ + { + Next_Record->variant.var_1.Int_Comp = 6; + Proc_6(Ptr_Val_Par->variant.var_1.Enum_Comp, + &Next_Record->variant.var_1.Enum_Comp); + Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; + Proc_7(Next_Record->variant.var_1.Int_Comp, 10, + &Next_Record->variant.var_1.Int_Comp); + } + else { /* not executed */ + structassign(*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp); + } +} /* Proc_1 */ + +void +Proc_2(One_Fifty *Int_Par_Ref) +/******************/ +/* executed once */ +/* *Int_Par_Ref == 1, becomes 4 */ + +{ + One_Fifty Int_Loc; + Enumeration Enum_Loc; + + Int_Loc = *Int_Par_Ref + 10; + do /* executed once */ + if (Ch_1_Glob == 'A') + /* then, executed */ + { + Int_Loc -= 1; + *Int_Par_Ref = Int_Loc - Int_Glob; + Enum_Loc = Ident_1; + } /* if */ + while (Enum_Loc != Ident_1); /* true */ +} /* Proc_2 */ + +void +Proc_3(Rec_Pointer *Ptr_Ref_Par) +/******************/ +/* executed once */ +/* Ptr_Ref_Par becomes Ptr_Glob */ + +{ + if (Ptr_Glob != Null) + /* then, executed */ + *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; + Proc_7(10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); +} /* Proc_3 */ + +void +Proc_4() /* without parameters */ +/*******/ +/* executed once */ +{ + Boolean Bool_Loc; + + Bool_Loc = Ch_1_Glob == 'A'; + Bool_Glob = Bool_Loc | Bool_Glob; + Ch_2_Glob = 'B'; +} /* Proc_4 */ + +void +Proc_5() /* without parameters */ +/*******/ +/* executed once */ +{ + Ch_1_Glob = 'A'; + Bool_Glob = false; +} /* Proc_5 */ + +/* Procedure for the assignment of structures, */ +/* if the C compiler doesn't support this feature */ +#ifdef NOSTRUCTASSIGN +memcpy(d, s, l) register char *d; +register char *s; +register int l; +{ + while (l--) + *d++ = *s++; +} +#endif diff --git a/wasm2c/benchmarks/dhrystone/src/dhry_2.c b/wasm2c/benchmarks/dhrystone/src/dhry_2.c new file mode 100644 index 00000000..276785cb --- /dev/null +++ b/wasm2c/benchmarks/dhrystone/src/dhry_2.c @@ -0,0 +1,187 @@ +/* + ************************************************************************* + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_2.c (part 3 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + ************************************************************************* + */ + +#include "dhry.h" + +#ifndef REG +#define REG +/* REG becomes defined as empty */ +/* i.e. no register variables */ +#else +#define REG register +#endif + +extern int Int_Glob; +extern char Ch_1_Glob; + +Boolean +Func_3(Enumeration Enum_Par_Val); + +void +Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par) +/*********************************/ +/* executed once */ +/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ + +{ + *Enum_Ref_Par = Enum_Val_Par; + if (!Func_3(Enum_Val_Par)) + /* then, not executed */ + *Enum_Ref_Par = Ident_4; + switch (Enum_Val_Par) { + case Ident_1: + *Enum_Ref_Par = Ident_1; + break; + case Ident_2: + if (Int_Glob > 100) + /* then */ + *Enum_Ref_Par = Ident_1; + else + *Enum_Ref_Par = Ident_4; + break; + case Ident_3: /* executed */ + *Enum_Ref_Par = Ident_2; + break; + case Ident_4: + break; + case Ident_5: + *Enum_Ref_Par = Ident_3; + break; + } /* switch */ +} /* Proc_6 */ + +void +Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref) +/**********************************************/ +/* executed three times */ +/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */ +/* Int_Par_Ref becomes 7 */ +/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */ +/* Int_Par_Ref becomes 17 */ +/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */ +/* Int_Par_Ref becomes 18 */ + +{ + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 2; + *Int_Par_Ref = Int_2_Par_Val + Int_Loc; +} /* Proc_7 */ + +void +Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, + int Int_2_Par_Val) +/*********************************************************************/ +/* executed once */ +/* Int_Par_Val_1 == 3 */ +/* Int_Par_Val_2 == 7 */ + +{ + REG One_Fifty Int_Index; + REG One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 5; + Arr_1_Par_Ref[Int_Loc] = Int_2_Par_Val; + Arr_1_Par_Ref[Int_Loc + 1] = Arr_1_Par_Ref[Int_Loc]; + Arr_1_Par_Ref[Int_Loc + 30] = Int_Loc; + for (Int_Index = Int_Loc; Int_Index <= Int_Loc + 1; ++Int_Index) + Arr_2_Par_Ref[Int_Loc][Int_Index] = Int_Loc; + Arr_2_Par_Ref[Int_Loc][Int_Loc - 1] += 1; + Arr_2_Par_Ref[Int_Loc + 20][Int_Loc] = Arr_1_Par_Ref[Int_Loc]; + Int_Glob = 5; +} /* Proc_8 */ + +Enumeration +Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val) +/*************************************************/ +/* executed three times */ +/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ +/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ +/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ + +{ + Capital_Letter Ch_1_Loc; + Capital_Letter Ch_2_Loc; + + Ch_1_Loc = Ch_1_Par_Val; + Ch_2_Loc = Ch_1_Loc; + if (Ch_2_Loc != Ch_2_Par_Val) + /* then, executed */ + return (Ident_1); + else /* not executed */ + { + Ch_1_Glob = Ch_1_Loc; + return (Ident_2); + } +} /* Func_1 */ + +Boolean +Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref) +/*************************************************/ +/* executed once */ +/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ +/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ + +{ + REG One_Thirty Int_Loc; + Capital_Letter Ch_Loc; + + Int_Loc = 2; + while (Int_Loc <= 2) /* loop body executed once */ + if (Func_1(Str_1_Par_Ref[Int_Loc], Str_2_Par_Ref[Int_Loc + 1]) + == Ident_1) + /* then, executed */ + { + Ch_Loc = 'A'; + Int_Loc += 1; + } /* if, while */ + if (Ch_Loc >= 'W' && Ch_Loc < 'Z') + /* then, not executed */ + Int_Loc = 7; + if (Ch_Loc == 'R') + /* then, not executed */ + return (true); + else /* executed */ + { + if (strcmp(Str_1_Par_Ref, Str_2_Par_Ref) > 0) + /* then, not executed */ + { + Int_Loc += 7; + Int_Glob = Int_Loc; + return (true); + } + else /* executed */ + return (false); + } /* if Ch_Loc */ +} /* Func_2 */ + +Boolean +Func_3(Enumeration Enum_Par_Val) +/***************************/ +/* executed once */ +/* Enum_Par_Val == Ident_3 */ + +{ + Enumeration Enum_Loc; + + Enum_Loc = Enum_Par_Val; + if (Enum_Loc == Ident_3) + /* then, executed */ + return (true); + else /* not executed */ + return (false); +} /* Func_3 */ diff --git a/wasm2c/examples/fac/fac.c b/wasm2c/examples/fac/fac.c index 1365def8..d9f821df 100644 --- a/wasm2c/examples/fac/fac.c +++ b/wasm2c/examples/fac/fac.c @@ -39,6 +39,48 @@ #define MEM_ADDR(mem, addr, n) &(mem)->data[addr] #endif +#ifndef WASM_RT_USE_SEGUE +// Memory functions can use the segue optimization if allowed. The segue +// optimization uses x86 segments to point to a linear memory. We use this +// optimization when: +// +// (1) Segue is allowed using WASM_RT_ALLOW_SEGUE +// (2) on x86_64 without WABT_BIG_ENDIAN enabled +// (3) the Wasm module uses a single unshared imported or exported memory +// (4) the compiler supports: intrinsics for (rd|wr)(fs|gs)base, "address +// namespaces" for accessing pointers, and supports memcpy on pointers with +// custom "address namespaces". GCC does not support the memcpy requirement, +// so this leaves only clang for now. +#if WASM_RT_ALLOW_SEGUE && !WABT_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64)) && \ + WASM_RT_MODULE_IS_SINGLE_UNSHARED_MEMORY && __clang__ && \ + __has_builtin(__builtin_ia32_wrgsbase64) +#define WASM_RT_USE_SEGUE 1 +#else +#define WASM_RT_USE_SEGUE 0 +#endif +#endif + +#if WASM_RT_USE_SEGUE +// Different segments are free on different platforms +// Windows uses GS for TLS, FS is free +// Linux uses FS for TLS, GS is free +#if defined(__WIN32) +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdfsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrfsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_fs*)(uintptr_t)addr) +#else +// POSIX style OS +#define WASM_RT_SEGUE_READ_BASE() __builtin_ia32_rdgsbase64() +#define WASM_RT_SEGUE_WRITE_BASE(base) \ + __builtin_ia32_wrgsbase64((uintptr_t)base) +#define MEM_ADDR_MEMOP(mem, addr, n) ((uint8_t __seg_gs*)(uintptr_t)addr) +#endif +#else +#define MEM_ADDR_MEMOP(mem, addr, n) MEM_ADDR(mem, addr, n) +#endif + #define TRAP(x) (wasm_rt_trap(WASM_RT_TRAP_##x), 0) #if WASM_RT_STACK_DEPTH_COUNT @@ -128,20 +170,22 @@ static inline void load_data(void* dest, const void* src, size_t n) { load_data(MEM_ADDR(&m, o, s), i, s); \ } while (0) -#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ - static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ - MEMCHECK(mem, addr, t1); \ - t1 result; \ - wasm_rt_memcpy(&result, MEM_ADDR(mem, addr, sizeof(t1)), sizeof(t1)); \ - force_read(result); \ - return (t3)(t2)result; \ +#define DEFINE_LOAD(name, t1, t2, t3, force_read) \ + static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \ + MEMCHECK(mem, addr, t1); \ + t1 result; \ + wasm_rt_memcpy(&result, MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), \ + sizeof(t1)); \ + force_read(result); \ + return (t3)(t2)result; \ } -#define DEFINE_STORE(name, t1, t2) \ - static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ - MEMCHECK(mem, addr, t1); \ - t1 wrapped = (t1)value; \ - wasm_rt_memcpy(MEM_ADDR(mem, addr, sizeof(t1)), &wrapped, sizeof(t1)); \ +#define DEFINE_STORE(name, t1, t2) \ + static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \ + MEMCHECK(mem, addr, t1); \ + t1 wrapped = (t1)value; \ + wasm_rt_memcpy(MEM_ADDR_MEMOP(mem, addr, sizeof(t1)), &wrapped, \ + sizeof(t1)); \ } DEFINE_LOAD(i32_load, u32, u32, u32, FORCE_READ_INT) @@ -656,7 +700,8 @@ FUNC_TYPE_T(w2c_fac_t0) = "\x07\x80\x96\x7a\x42\xf7\x3e\xe6\x70\x5c\x2f\xac\x83\ /* export: 'fac' */ u32 w2c_fac_fac(w2c_fac* instance, u32 var_p0) { - return w2c_fac_fac_0(instance, var_p0); + u32 ret = w2c_fac_fac_0(instance, var_p0); + return ret; } void wasm2c_fac_instantiate(w2c_fac* instance) { diff --git a/wasm2c/wasm-rt.h b/wasm2c/wasm-rt.h index f06748eb..5c07ff44 100644 --- a/wasm2c/wasm-rt.h +++ b/wasm2c/wasm-rt.h @@ -88,6 +88,14 @@ extern "C" { #endif /** + * If enabled, perform additional sanity checks in the generated wasm2c code and + * wasm2c runtime. This is useful to enable on debug builds. + */ +#ifndef WASM_RT_SANITY_CHECKS +#define WASM_RT_SANITY_CHECKS 0 +#endif + +/** * Backward compatibility: Convert the previously exposed * WASM_RT_MEMCHECK_SIGNAL_HANDLER macro to the ALLOCATION and CHECK macros that * are now used. @@ -197,6 +205,18 @@ extern "C" { #endif /** + * This macro, if defined to 1 (i.e., allows the "segue" optimization), allows + * Wasm2c to use segment registers to speedup access to the linear heap. Note + * that even if allowed in this way, the segment registers would only be used if + * Wasm2c output is compiled for a suitable architecture and OS and the produces + * C file is compiled by supported compilers. The extact restrictions are listed + * in detail in src/template/wasm2c.declarations.c + */ +#ifndef WASM_RT_ALLOW_SEGUE +#define WASM_RT_ALLOW_SEGUE 0 +#endif + +/** * This macro, if defined, allows the embedder to disable all stack exhaustion * checks. This a non conformant configuration, i.e., this does not respect * Wasm's specification, and may compromise security. Use with caution. |