summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/binary-reader-ir.cc1
-rw-r--r--src/c-writer.cc190
-rw-r--r--src/prebuilt/wasm2c_atomicops_source_declarations.cc438
-rw-r--r--src/template/wasm2c_atomicops.declarations.c238
-rw-r--r--src/tools/wasm2c.cc3
5 files changed, 864 insertions, 6 deletions
diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc
index 5736e302..2eb2342b 100644
--- a/src/binary-reader-ir.cc
+++ b/src/binary-reader-ir.cc
@@ -788,6 +788,7 @@ Result BinaryReaderIR::OnOpcode(Opcode opcode) {
return AppendExpr(std::move(metadata));
}
module_->features_used.simd |= (opcode.GetResultType() == Type::V128);
+ module_->features_used.threads |= (opcode.GetPrefix() == 0xfe);
return Result::Ok;
}
diff --git a/src/c-writer.cc b/src/c-writer.cc
index 2405c170..2e490dab 100644
--- a/src/c-writer.cc
+++ b/src/c-writer.cc
@@ -43,6 +43,7 @@ extern const char* s_header_bottom;
extern const char* s_source_includes;
extern const char* s_source_declarations;
extern const char* s_simd_source_declarations;
+extern const char* s_atomicops_source_declarations;
namespace wabt {
@@ -436,6 +437,11 @@ class CWriter {
void Write(const LoadZeroExpr&);
void Write(const Block&);
+ void Write(const AtomicLoadExpr& expr);
+ void Write(const AtomicStoreExpr& expr);
+ void Write(const AtomicRmwExpr& expr);
+ void Write(const AtomicRmwCmpxchgExpr& expr);
+
size_t BeginTry(const TryExpr& tryexpr);
void WriteTryCatch(const TryExpr& tryexpr);
void WriteTryDelegate(const TryExpr& tryexpr);
@@ -1424,6 +1430,10 @@ void CWriter::WriteSourceTop() {
}
Write(s_simd_source_declarations);
}
+
+ if (module_->features_used.threads) {
+ Write(s_atomicops_source_declarations);
+ }
}
void CWriter::WriteMultiCTop() {
@@ -3558,12 +3568,32 @@ void CWriter::Write(const ExprList& exprs) {
}
} break;
- case ExprType::AtomicLoad:
- case ExprType::AtomicRmw:
- case ExprType::AtomicRmwCmpxchg:
- case ExprType::AtomicStore:
+ case ExprType::AtomicLoad: {
+ Write(*cast<AtomicLoadExpr>(&expr));
+ break;
+ }
+
+ case ExprType::AtomicStore: {
+ Write(*cast<AtomicStoreExpr>(&expr));
+ break;
+ }
+
+ case ExprType::AtomicRmw: {
+ Write(*cast<AtomicRmwExpr>(&expr));
+ break;
+ }
+
+ case ExprType::AtomicRmwCmpxchg: {
+ Write(*cast<AtomicRmwCmpxchgExpr>(&expr));
+ break;
+ }
+
+ case ExprType::AtomicFence: {
+ Write("atomic_fence();", Newline());
+ break;
+ }
+
case ExprType::AtomicWait:
- case ExprType::AtomicFence:
case ExprType::AtomicNotify:
case ExprType::ReturnCall:
case ExprType::ReturnCallIndirect:
@@ -5170,6 +5200,156 @@ void CWriter::Write(const LoadZeroExpr& expr) {
PushType(result_type);
}
+void CWriter::Write(const AtomicLoadExpr& expr) {
+ const char* func = nullptr;
+ // clang-format off
+ switch (expr.opcode) {
+ case Opcode::I32AtomicLoad: func = "i32_atomic_load"; break;
+ case Opcode::I64AtomicLoad: func = "i64_atomic_load"; break;
+ case Opcode::I32AtomicLoad8U: func = "i32_atomic_load8_u"; break;
+ case Opcode::I64AtomicLoad8U: func = "i64_atomic_load8_u"; break;
+ case Opcode::I32AtomicLoad16U: func = "i32_atomic_load16_u"; break;
+ case Opcode::I64AtomicLoad16U: func = "i64_atomic_load16_u"; break;
+ case Opcode::I64AtomicLoad32U: func = "i64_atomic_load32_u"; break;
+
+ default:
+ WABT_UNREACHABLE;
+ }
+ // clang-format on
+
+ Memory* memory = module_->memories[module_->GetMemoryIndex(expr.memidx)];
+
+ Type result_type = expr.opcode.GetResultType();
+ Write(StackVar(0, result_type), " = ", func, "(",
+ ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", (u64)(",
+ StackVar(0), ")");
+ if (expr.offset != 0)
+ Write(" + ", expr.offset, "u");
+ Write(");", Newline());
+ DropTypes(1);
+ PushType(result_type);
+}
+
+void CWriter::Write(const AtomicStoreExpr& expr) {
+ const char* func = nullptr;
+ // clang-format off
+ switch (expr.opcode) {
+ case Opcode::I32AtomicStore: func = "i32_atomic_store"; break;
+ case Opcode::I64AtomicStore: func = "i64_atomic_store"; break;
+ case Opcode::I32AtomicStore8: func = "i32_atomic_store8"; break;
+ case Opcode::I64AtomicStore8: func = "i64_atomic_store8"; break;
+ case Opcode::I32AtomicStore16: func = "i32_atomic_store16"; break;
+ case Opcode::I64AtomicStore16: func = "i64_atomic_store16"; break;
+ case Opcode::I64AtomicStore32: func = "i64_atomic_store32"; break;
+
+ default:
+ WABT_UNREACHABLE;
+ }
+ // clang-format on
+
+ Memory* memory = module_->memories[module_->GetMemoryIndex(expr.memidx)];
+
+ Write(func, "(", ExternalInstancePtr(ModuleFieldType::Memory, memory->name),
+ ", (u64)(", StackVar(1), ")");
+ if (expr.offset != 0)
+ Write(" + ", expr.offset);
+ Write(", ", StackVar(0), ");", Newline());
+ DropTypes(2);
+}
+
+void CWriter::Write(const AtomicRmwExpr& expr) {
+ const char* func = nullptr;
+ // clang-format off
+ switch (expr.opcode) {
+ case Opcode::I32AtomicRmwAdd: func = "i32_atomic_rmw_add"; break;
+ case Opcode::I64AtomicRmwAdd: func = "i64_atomic_rmw_add"; break;
+ case Opcode::I32AtomicRmw8AddU: func = "i32_atomic_rmw8_add_u"; break;
+ case Opcode::I32AtomicRmw16AddU: func = "i32_atomic_rmw16_add_u"; break;
+ case Opcode::I64AtomicRmw8AddU: func = "i64_atomic_rmw8_add_u"; break;
+ case Opcode::I64AtomicRmw16AddU: func = "i64_atomic_rmw16_add_u"; break;
+ case Opcode::I64AtomicRmw32AddU: func = "i64_atomic_rmw32_add_u"; break;
+ case Opcode::I32AtomicRmwSub: func = "i32_atomic_rmw_sub"; break;
+ case Opcode::I64AtomicRmwSub: func = "i64_atomic_rmw_sub"; break;
+ case Opcode::I32AtomicRmw8SubU: func = "i32_atomic_rmw8_sub_u"; break;
+ case Opcode::I32AtomicRmw16SubU: func = "i32_atomic_rmw16_sub_u"; break;
+ case Opcode::I64AtomicRmw8SubU: func = "i64_atomic_rmw8_sub_u"; break;
+ case Opcode::I64AtomicRmw16SubU: func = "i64_atomic_rmw16_sub_u"; break;
+ case Opcode::I64AtomicRmw32SubU: func = "i64_atomic_rmw32_sub_u"; break;
+ case Opcode::I32AtomicRmwAnd: func = "i32_atomic_rmw_and"; break;
+ case Opcode::I64AtomicRmwAnd: func = "i64_atomic_rmw_and"; break;
+ case Opcode::I32AtomicRmw8AndU: func = "i32_atomic_rmw8_and_u"; break;
+ case Opcode::I32AtomicRmw16AndU: func = "i32_atomic_rmw16_and_u"; break;
+ case Opcode::I64AtomicRmw8AndU: func = "i64_atomic_rmw8_and_u"; break;
+ case Opcode::I64AtomicRmw16AndU: func = "i64_atomic_rmw16_and_u"; break;
+ case Opcode::I64AtomicRmw32AndU: func = "i64_atomic_rmw32_and_u"; break;
+ case Opcode::I32AtomicRmwOr: func = "i32_atomic_rmw_or"; break;
+ case Opcode::I64AtomicRmwOr: func = "i64_atomic_rmw_or"; break;
+ case Opcode::I32AtomicRmw8OrU: func = "i32_atomic_rmw8_or_u"; break;
+ case Opcode::I32AtomicRmw16OrU: func = "i32_atomic_rmw16_or_u"; break;
+ case Opcode::I64AtomicRmw8OrU: func = "i64_atomic_rmw8_or_u"; break;
+ case Opcode::I64AtomicRmw16OrU: func = "i64_atomic_rmw16_or_u"; break;
+ case Opcode::I64AtomicRmw32OrU: func = "i64_atomic_rmw32_or_u"; break;
+ case Opcode::I32AtomicRmwXor: func = "i32_atomic_rmw_xor"; break;
+ case Opcode::I64AtomicRmwXor: func = "i64_atomic_rmw_xor"; break;
+ case Opcode::I32AtomicRmw8XorU: func = "i32_atomic_rmw8_xor_u"; break;
+ case Opcode::I32AtomicRmw16XorU: func = "i32_atomic_rmw16_xor_u"; break;
+ case Opcode::I64AtomicRmw8XorU: func = "i64_atomic_rmw8_xor_u"; break;
+ case Opcode::I64AtomicRmw16XorU: func = "i64_atomic_rmw16_xor_u"; break;
+ case Opcode::I64AtomicRmw32XorU: func = "i64_atomic_rmw32_xor_u"; break;
+ case Opcode::I32AtomicRmwXchg: func = "i32_atomic_rmw_xchg"; break;
+ case Opcode::I64AtomicRmwXchg: func = "i64_atomic_rmw_xchg"; break;
+ case Opcode::I32AtomicRmw8XchgU: func = "i32_atomic_rmw8_xchg_u"; break;
+ case Opcode::I32AtomicRmw16XchgU: func = "i32_atomic_rmw16_xchg_u"; break;
+ case Opcode::I64AtomicRmw8XchgU: func = "i64_atomic_rmw8_xchg_u"; break;
+ case Opcode::I64AtomicRmw16XchgU: func = "i64_atomic_rmw16_xchg_u"; break;
+ case Opcode::I64AtomicRmw32XchgU: func = "i64_atomic_rmw32_xchg_u"; break;
+ default:
+ WABT_UNREACHABLE;
+ }
+ // clang-format on
+
+ Memory* memory = module_->memories[module_->GetMemoryIndex(expr.memidx)];
+ Type result_type = expr.opcode.GetResultType();
+
+ Write(StackVar(1, result_type), " = ", func, "(",
+ ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", (u64)(",
+ StackVar(1), ")");
+ if (expr.offset != 0)
+ Write(" + ", expr.offset);
+ Write(", ", StackVar(0), ");", Newline());
+ DropTypes(2);
+ PushType(result_type);
+}
+
+void CWriter::Write(const AtomicRmwCmpxchgExpr& expr) {
+ const char* func = nullptr;
+ // clang-format off
+ switch(expr.opcode) {
+ case Opcode::I32AtomicRmwCmpxchg: func = "i32_atomic_rmw_cmpxchg"; break;
+ case Opcode::I64AtomicRmwCmpxchg: func = "i64_atomic_rmw_cmpxchg"; break;
+ case Opcode::I32AtomicRmw8CmpxchgU: func = "i32_atomic_rmw8_cmpxchg_u"; break;
+ case Opcode::I32AtomicRmw16CmpxchgU: func = "i32_atomic_rmw16_cmpxchg_u"; break;
+ case Opcode::I64AtomicRmw8CmpxchgU: func = "i64_atomic_rmw8_cmpxchg_u"; break;
+ case Opcode::I64AtomicRmw16CmpxchgU: func = "i64_atomic_rmw16_cmpxchg_u"; break;
+ case Opcode::I64AtomicRmw32CmpxchgU: func = "i64_atomic_rmw32_cmpxchg_u"; break;
+ default:
+ WABT_UNREACHABLE;
+ }
+ // clang-format on
+
+ Memory* memory = module_->memories[module_->GetMemoryIndex(expr.memidx)];
+ Type result_type = expr.opcode.GetResultType();
+
+ Write(StackVar(2, result_type), " = ", func, "(",
+ ExternalInstancePtr(ModuleFieldType::Memory, memory->name), ", (u64)(",
+ StackVar(2), ")");
+ if (expr.offset != 0)
+ Write(" + ", expr.offset);
+ Write(", ", StackVar(1), ", ", StackVar(0), ");", Newline());
+ DropTypes(3);
+ PushType(result_type);
+}
+
void CWriter::ReserveExportNames() {
for (const Export* export_ : module_->exports) {
ReserveExportName(export_->name);
diff --git a/src/prebuilt/wasm2c_atomicops_source_declarations.cc b/src/prebuilt/wasm2c_atomicops_source_declarations.cc
new file mode 100644
index 00000000..9a223b70
--- /dev/null
+++ b/src/prebuilt/wasm2c_atomicops_source_declarations.cc
@@ -0,0 +1,438 @@
+const char* s_atomicops_source_declarations = R"w2c_template(#if defined(_MSC_VER)
+)w2c_template"
+R"w2c_template(
+#include <intrin.h>
+)w2c_template"
+R"w2c_template(
+// Use MSVC intrinsics
+)w2c_template"
+R"w2c_template(
+// For loads and stores, its not clear if we can rely on register width loads
+)w2c_template"
+R"w2c_template(// and stores to be atomic as reported here
+)w2c_template"
+R"w2c_template(// https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access?redirectedfrom=MSDN
+)w2c_template"
+R"w2c_template(// or if we have to reuse other instrinsics
+)w2c_template"
+R"w2c_template(// https://stackoverflow.com/questions/42660091/atomic-load-in-c-with-msvc
+)w2c_template"
+R"w2c_template(// We reuse other intrinsics to be cautious
+)w2c_template"
+R"w2c_template(#define atomic_load_u8(a, v) _InterlockedOr8(a, 0)
+)w2c_template"
+R"w2c_template(#define atomic_load_u16(a, v) _InterlockedOr16(a, 0)
+)w2c_template"
+R"w2c_template(#define atomic_load_u32(a, v) _InterlockedOr(a, 0)
+)w2c_template"
+R"w2c_template(#define atomic_load_u64(a, v) _InterlockedOr64(a, 0)
+)w2c_template"
+R"w2c_template(
+#define atomic_store_u8(a, v) _InterlockedExchange8(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_store_u16(a, v) _InterlockedExchange16(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_store_u32(a, v) _InterlockedExchange(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_store_u64(a, v) _InterlockedExchange64(a, v)
+)w2c_template"
+R"w2c_template(
+#define atomic_add_u8(a, v) _InterlockedExchangeAdd8(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_add_u16(a, v) _InterlockedExchangeAdd16(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_add_u32(a, v) _InterlockedExchangeAdd(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_add_u64(a, v) _InterlockedExchangeAdd64(a, v)
+)w2c_template"
+R"w2c_template(
+#define atomic_sub_u8(a, v) _InterlockedExchangeAdd8(a, -(v))
+)w2c_template"
+R"w2c_template(#define atomic_sub_u16(a, v) _InterlockedExchangeAdd16(a, -(v))
+)w2c_template"
+R"w2c_template(#define atomic_sub_u32(a, v) _InterlockedExchangeAdd(a, -(v))
+)w2c_template"
+R"w2c_template(#define atomic_sub_u64(a, v) _InterlockedExchangeAdd64(a, -(v))
+)w2c_template"
+R"w2c_template(
+#define atomic_and_u8(a, v) _InterlockedAnd8(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_and_u16(a, v) _InterlockedAnd16(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_and_u32(a, v) _InterlockedAnd(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_and_u64(a, v) _InterlockedAnd64(a, v)
+)w2c_template"
+R"w2c_template(
+#define atomic_or_u8(a, v) _InterlockedOr8(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_or_u16(a, v) _InterlockedOr16(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_or_u32(a, v) _InterlockedOr(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_or_u64(a, v) _InterlockedOr64(a, v)
+)w2c_template"
+R"w2c_template(
+#define atomic_xor_u8(a, v) _InterlockedXor8(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_xor_u16(a, v) _InterlockedXor16(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_xor_u32(a, v) _InterlockedXor(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_xor_u64(a, v) _InterlockedXor64(a, v)
+)w2c_template"
+R"w2c_template(
+#define atomic_exchange_u8(a, v) _InterlockedExchange8(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u16(a, v) _InterlockedExchange16(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u32(a, v) _InterlockedExchange(a, v)
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u64(a, v) _InterlockedExchange64(a, v)
+)w2c_template"
+R"w2c_template(
+// clang-format off
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u8(a, expected_ptr, desired) _InterlockedCompareExchange8(a, desired, *(expected_ptr))
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u16(a, expected_ptr, desired) _InterlockedCompareExchange16(a, desired, *(expected_ptr))
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u32(a, expected_ptr, desired) _InterlockedCompareExchange(a, desired, *(expected_ptr))
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u64(a, expected_ptr, desired) _InterlockedCompareExchange64(a, desired, *(expected_ptr))
+)w2c_template"
+R"w2c_template(// clang-format on
+)w2c_template"
+R"w2c_template(
+#define atomic_fence() _ReadWriteBarrier()
+)w2c_template"
+R"w2c_template(
+#else
+)w2c_template"
+R"w2c_template(
+// Use gcc/clang/icc intrinsics
+)w2c_template"
+R"w2c_template(#define atomic_load_u8(a) __atomic_load_n((u8*)(a), __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_load_u16(a) __atomic_load_n((u16*)(a), __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_load_u32(a) __atomic_load_n((u32*)(a), __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_load_u64(a) __atomic_load_n((u64*)(a), __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#define atomic_store_u8(a, v) __atomic_store_n((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_store_u16(a, v) __atomic_store_n((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_store_u32(a, v) __atomic_store_n((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_store_u64(a, v) __atomic_store_n((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#define atomic_add_u8(a, v) __atomic_fetch_add((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_add_u16(a, v) __atomic_fetch_add((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_add_u32(a, v) __atomic_fetch_add((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_add_u64(a, v) __atomic_fetch_add((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#define atomic_sub_u8(a, v) __atomic_fetch_sub((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_sub_u16(a, v) __atomic_fetch_sub((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_sub_u32(a, v) __atomic_fetch_sub((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_sub_u64(a, v) __atomic_fetch_sub((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#define atomic_and_u8(a, v) __atomic_fetch_and((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_and_u16(a, v) __atomic_fetch_and((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_and_u32(a, v) __atomic_fetch_and((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_and_u64(a, v) __atomic_fetch_and((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#define atomic_or_u8(a, v) __atomic_fetch_or((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_or_u16(a, v) __atomic_fetch_or((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_or_u32(a, v) __atomic_fetch_or((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_or_u64(a, v) __atomic_fetch_or((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#define atomic_xor_u8(a, v) __atomic_fetch_xor((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_xor_u16(a, v) __atomic_fetch_xor((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_xor_u32(a, v) __atomic_fetch_xor((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_xor_u64(a, v) __atomic_fetch_xor((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+// clang-format off
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u8(a, v) __atomic_exchange_n((u8*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u16(a, v) __atomic_exchange_n((u16*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u32(a, v) __atomic_exchange_n((u32*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(#define atomic_exchange_u64(a, v) __atomic_exchange_n((u64*)(a), v, __ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(// clang-format on
+)w2c_template"
+R"w2c_template(
+#define __atomic_compare_exchange_helper(a, expected_ptr, desired) \
+)w2c_template"
+R"w2c_template( (__atomic_compare_exchange_n(a, expected_ptr, desired, 0 /* is_weak */, \
+)w2c_template"
+R"w2c_template( __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST), \
+)w2c_template"
+R"w2c_template( *(expected_ptr))
+)w2c_template"
+R"w2c_template(
+// clang-format off
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u8(a, expected_ptr, desired) __atomic_compare_exchange_helper((u8*)(a), expected_ptr, desired)
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u16(a, expected_ptr, desired) __atomic_compare_exchange_helper((u16*)(a), expected_ptr, desired)
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u32(a, expected_ptr, desired) __atomic_compare_exchange_helper((u32*)(a), expected_ptr, desired)
+)w2c_template"
+R"w2c_template(#define atomic_compare_exchange_u64(a, expected_ptr, desired) __atomic_compare_exchange_helper((u64*)(a), expected_ptr, desired)
+)w2c_template"
+R"w2c_template(// clang-format on
+)w2c_template"
+R"w2c_template(
+#define atomic_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+)w2c_template"
+R"w2c_template(
+#endif
+)w2c_template"
+R"w2c_template(
+#define ATOMIC_ALIGNMENT_CHECK(addr, t1) \
+)w2c_template"
+R"w2c_template( if (UNLIKELY(addr % sizeof(t1))) { \
+)w2c_template"
+R"w2c_template( TRAP(UNALIGNED); \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(
+#define DEFINE_ATOMIC_LOAD(name, t1, t2, t3) \
+)w2c_template"
+R"w2c_template( static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+)w2c_template"
+R"w2c_template( MEMCHECK(mem, addr, t1); \
+)w2c_template"
+R"w2c_template( ATOMIC_ALIGNMENT_CHECK(addr, t1); \
+)w2c_template"
+R"w2c_template( t1 result; \
+)w2c_template"
+R"w2c_template( wasm_rt_memcpy(&result, &mem->data[addr], sizeof(t1)); \
+)w2c_template"
+R"w2c_template( result = atomic_load_##t1(&mem->data[addr]); \
+)w2c_template"
+R"w2c_template( wasm_asm("" ::"r"(result)); \
+)w2c_template"
+R"w2c_template( return (t3)(t2)result; \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_LOAD(i32_atomic_load, u32, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_LOAD(i64_atomic_load, u64, u64, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_LOAD(i32_atomic_load8_u, u8, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_LOAD(i64_atomic_load8_u, u8, u64, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_LOAD(i32_atomic_load16_u, u16, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_LOAD(i64_atomic_load16_u, u16, u64, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_LOAD(i64_atomic_load32_u, u32, u64, u64)
+)w2c_template"
+R"w2c_template(
+#define DEFINE_ATOMIC_STORE(name, t1, t2) \
+)w2c_template"
+R"w2c_template( static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+)w2c_template"
+R"w2c_template( MEMCHECK(mem, addr, t1); \
+)w2c_template"
+R"w2c_template( ATOMIC_ALIGNMENT_CHECK(addr, t1); \
+)w2c_template"
+R"w2c_template( t1 wrapped = (t1)value; \
+)w2c_template"
+R"w2c_template( atomic_store_##t1(&mem->data[addr], wrapped); \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_STORE(i32_atomic_store, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_STORE(i64_atomic_store, u64, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_STORE(i32_atomic_store8, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_STORE(i32_atomic_store16, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_STORE(i64_atomic_store8, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_STORE(i64_atomic_store16, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_STORE(i64_atomic_store32, u32, u64)
+)w2c_template"
+R"w2c_template(
+#define DEFINE_ATOMIC_RMW(name, op, t1, t2) \
+)w2c_template"
+R"w2c_template( static inline t2 name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+)w2c_template"
+R"w2c_template( MEMCHECK(mem, addr, t1); \
+)w2c_template"
+R"w2c_template( ATOMIC_ALIGNMENT_CHECK(addr, t1); \
+)w2c_template"
+R"w2c_template( t1 wrapped = (t1)value; \
+)w2c_template"
+R"w2c_template( t1 ret = atomic_##op##_##t1(&mem->data[addr], wrapped); \
+)w2c_template"
+R"w2c_template( return (t2)ret; \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_add_u, add, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw16_add_u, add, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw_add, add, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw8_add_u, add, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw16_add_u, add, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw32_add_u, add, u32, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw_add, add, u64, u64)
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_sub_u, sub, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw16_sub_u, sub, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw_sub, sub, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw8_sub_u, sub, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw16_sub_u, sub, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw32_sub_u, sub, u32, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw_sub, sub, u64, u64)
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_and_u, and, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw16_and_u, and, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw_and, and, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw8_and_u, and, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw16_and_u, and, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw32_and_u, and, u32, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw_and, and, u64, u64)
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_or_u, or, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw16_or_u, or, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw_or, or, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw8_or_u, or, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw16_or_u, or, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw32_or_u, or, u32, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw_or, or, u64, u64)
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_xor_u, xor, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw16_xor_u, xor, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw_xor, xor, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw8_xor_u, xor, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw16_xor_u, xor, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw32_xor_u, xor, u32, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw_xor, xor, u64, u64)
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_xchg_u, exchange, u8, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw16_xchg_u, exchange, u16, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i32_atomic_rmw_xchg, exchange, u32, u32)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw8_xchg_u, exchange, u8, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw16_xchg_u, exchange, u16, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw32_xchg_u, exchange, u32, u64)
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_RMW(i64_atomic_rmw_xchg, exchange, u64, u64)
+)w2c_template"
+R"w2c_template(
+#define DEFINE_ATOMIC_CMP_XCHG(name, t1, t2) \
+)w2c_template"
+R"w2c_template( static inline t1 name(wasm_rt_memory_t* mem, u64 addr, t1 expected, \
+)w2c_template"
+R"w2c_template( t1 replacement) { \
+)w2c_template"
+R"w2c_template( MEMCHECK(mem, addr, t2); \
+)w2c_template"
+R"w2c_template( ATOMIC_ALIGNMENT_CHECK(addr, t2); \
+)w2c_template"
+R"w2c_template( t2 expected_wrapped = (t2)expected; \
+)w2c_template"
+R"w2c_template( t2 replacement_wrapped = (t2)replacement; \
+)w2c_template"
+R"w2c_template( t2 old = atomic_compare_exchange_##t2(&mem->data[addr], &expected_wrapped, \
+)w2c_template"
+R"w2c_template( replacement_wrapped); \
+)w2c_template"
+R"w2c_template( return (t1)old; \
+)w2c_template"
+R"w2c_template( }
+)w2c_template"
+R"w2c_template(
+DEFINE_ATOMIC_CMP_XCHG(i32_atomic_rmw8_cmpxchg_u, u32, u8);
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_CMP_XCHG(i32_atomic_rmw16_cmpxchg_u, u32, u16);
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_CMP_XCHG(i32_atomic_rmw_cmpxchg, u32, u32);
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw8_cmpxchg_u, u64, u8);
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw16_cmpxchg_u, u64, u16);
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw32_cmpxchg_u, u64, u32);
+)w2c_template"
+R"w2c_template(DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw_cmpxchg, u64, u64);
+)w2c_template"
+;
diff --git a/src/template/wasm2c_atomicops.declarations.c b/src/template/wasm2c_atomicops.declarations.c
new file mode 100644
index 00000000..546f8a4d
--- /dev/null
+++ b/src/template/wasm2c_atomicops.declarations.c
@@ -0,0 +1,238 @@
+#if defined(_MSC_VER)
+
+#include <intrin.h>
+
+// Use MSVC intrinsics
+
+// For loads and stores, its not clear if we can rely on register width loads
+// and stores to be atomic as reported here
+// https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access?redirectedfrom=MSDN
+// or if we have to reuse other instrinsics
+// https://stackoverflow.com/questions/42660091/atomic-load-in-c-with-msvc
+// We reuse other intrinsics to be cautious
+#define atomic_load_u8(a, v) _InterlockedOr8(a, 0)
+#define atomic_load_u16(a, v) _InterlockedOr16(a, 0)
+#define atomic_load_u32(a, v) _InterlockedOr(a, 0)
+#define atomic_load_u64(a, v) _InterlockedOr64(a, 0)
+
+#define atomic_store_u8(a, v) _InterlockedExchange8(a, v)
+#define atomic_store_u16(a, v) _InterlockedExchange16(a, v)
+#define atomic_store_u32(a, v) _InterlockedExchange(a, v)
+#define atomic_store_u64(a, v) _InterlockedExchange64(a, v)
+
+#define atomic_add_u8(a, v) _InterlockedExchangeAdd8(a, v)
+#define atomic_add_u16(a, v) _InterlockedExchangeAdd16(a, v)
+#define atomic_add_u32(a, v) _InterlockedExchangeAdd(a, v)
+#define atomic_add_u64(a, v) _InterlockedExchangeAdd64(a, v)
+
+#define atomic_sub_u8(a, v) _InterlockedExchangeAdd8(a, -(v))
+#define atomic_sub_u16(a, v) _InterlockedExchangeAdd16(a, -(v))
+#define atomic_sub_u32(a, v) _InterlockedExchangeAdd(a, -(v))
+#define atomic_sub_u64(a, v) _InterlockedExchangeAdd64(a, -(v))
+
+#define atomic_and_u8(a, v) _InterlockedAnd8(a, v)
+#define atomic_and_u16(a, v) _InterlockedAnd16(a, v)
+#define atomic_and_u32(a, v) _InterlockedAnd(a, v)
+#define atomic_and_u64(a, v) _InterlockedAnd64(a, v)
+
+#define atomic_or_u8(a, v) _InterlockedOr8(a, v)
+#define atomic_or_u16(a, v) _InterlockedOr16(a, v)
+#define atomic_or_u32(a, v) _InterlockedOr(a, v)
+#define atomic_or_u64(a, v) _InterlockedOr64(a, v)
+
+#define atomic_xor_u8(a, v) _InterlockedXor8(a, v)
+#define atomic_xor_u16(a, v) _InterlockedXor16(a, v)
+#define atomic_xor_u32(a, v) _InterlockedXor(a, v)
+#define atomic_xor_u64(a, v) _InterlockedXor64(a, v)
+
+#define atomic_exchange_u8(a, v) _InterlockedExchange8(a, v)
+#define atomic_exchange_u16(a, v) _InterlockedExchange16(a, v)
+#define atomic_exchange_u32(a, v) _InterlockedExchange(a, v)
+#define atomic_exchange_u64(a, v) _InterlockedExchange64(a, v)
+
+// clang-format off
+#define atomic_compare_exchange_u8(a, expected_ptr, desired) _InterlockedCompareExchange8(a, desired, *(expected_ptr))
+#define atomic_compare_exchange_u16(a, expected_ptr, desired) _InterlockedCompareExchange16(a, desired, *(expected_ptr))
+#define atomic_compare_exchange_u32(a, expected_ptr, desired) _InterlockedCompareExchange(a, desired, *(expected_ptr))
+#define atomic_compare_exchange_u64(a, expected_ptr, desired) _InterlockedCompareExchange64(a, desired, *(expected_ptr))
+// clang-format on
+
+#define atomic_fence() _ReadWriteBarrier()
+
+#else
+
+// Use gcc/clang/icc intrinsics
+#define atomic_load_u8(a) __atomic_load_n((u8*)(a), __ATOMIC_SEQ_CST)
+#define atomic_load_u16(a) __atomic_load_n((u16*)(a), __ATOMIC_SEQ_CST)
+#define atomic_load_u32(a) __atomic_load_n((u32*)(a), __ATOMIC_SEQ_CST)
+#define atomic_load_u64(a) __atomic_load_n((u64*)(a), __ATOMIC_SEQ_CST)
+
+#define atomic_store_u8(a, v) __atomic_store_n((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_store_u16(a, v) __atomic_store_n((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_store_u32(a, v) __atomic_store_n((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_store_u64(a, v) __atomic_store_n((u64*)(a), v, __ATOMIC_SEQ_CST)
+
+#define atomic_add_u8(a, v) __atomic_fetch_add((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_add_u16(a, v) __atomic_fetch_add((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_add_u32(a, v) __atomic_fetch_add((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_add_u64(a, v) __atomic_fetch_add((u64*)(a), v, __ATOMIC_SEQ_CST)
+
+#define atomic_sub_u8(a, v) __atomic_fetch_sub((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_sub_u16(a, v) __atomic_fetch_sub((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_sub_u32(a, v) __atomic_fetch_sub((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_sub_u64(a, v) __atomic_fetch_sub((u64*)(a), v, __ATOMIC_SEQ_CST)
+
+#define atomic_and_u8(a, v) __atomic_fetch_and((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_and_u16(a, v) __atomic_fetch_and((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_and_u32(a, v) __atomic_fetch_and((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_and_u64(a, v) __atomic_fetch_and((u64*)(a), v, __ATOMIC_SEQ_CST)
+
+#define atomic_or_u8(a, v) __atomic_fetch_or((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_or_u16(a, v) __atomic_fetch_or((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_or_u32(a, v) __atomic_fetch_or((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_or_u64(a, v) __atomic_fetch_or((u64*)(a), v, __ATOMIC_SEQ_CST)
+
+#define atomic_xor_u8(a, v) __atomic_fetch_xor((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_xor_u16(a, v) __atomic_fetch_xor((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_xor_u32(a, v) __atomic_fetch_xor((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_xor_u64(a, v) __atomic_fetch_xor((u64*)(a), v, __ATOMIC_SEQ_CST)
+
+// clang-format off
+#define atomic_exchange_u8(a, v) __atomic_exchange_n((u8*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_exchange_u16(a, v) __atomic_exchange_n((u16*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_exchange_u32(a, v) __atomic_exchange_n((u32*)(a), v, __ATOMIC_SEQ_CST)
+#define atomic_exchange_u64(a, v) __atomic_exchange_n((u64*)(a), v, __ATOMIC_SEQ_CST)
+// clang-format on
+
+#define __atomic_compare_exchange_helper(a, expected_ptr, desired) \
+ (__atomic_compare_exchange_n(a, expected_ptr, desired, 0 /* is_weak */, \
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST), \
+ *(expected_ptr))
+
+// clang-format off
+#define atomic_compare_exchange_u8(a, expected_ptr, desired) __atomic_compare_exchange_helper((u8*)(a), expected_ptr, desired)
+#define atomic_compare_exchange_u16(a, expected_ptr, desired) __atomic_compare_exchange_helper((u16*)(a), expected_ptr, desired)
+#define atomic_compare_exchange_u32(a, expected_ptr, desired) __atomic_compare_exchange_helper((u32*)(a), expected_ptr, desired)
+#define atomic_compare_exchange_u64(a, expected_ptr, desired) __atomic_compare_exchange_helper((u64*)(a), expected_ptr, desired)
+// clang-format on
+
+#define atomic_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+
+#endif
+
+#define ATOMIC_ALIGNMENT_CHECK(addr, t1) \
+ if (UNLIKELY(addr % sizeof(t1))) { \
+ TRAP(UNALIGNED); \
+ }
+
+#define DEFINE_ATOMIC_LOAD(name, t1, t2, t3) \
+ static inline t3 name(wasm_rt_memory_t* mem, u64 addr) { \
+ MEMCHECK(mem, addr, t1); \
+ ATOMIC_ALIGNMENT_CHECK(addr, t1); \
+ t1 result; \
+ wasm_rt_memcpy(&result, &mem->data[addr], sizeof(t1)); \
+ result = atomic_load_##t1(&mem->data[addr]); \
+ wasm_asm("" ::"r"(result)); \
+ return (t3)(t2)result; \
+ }
+
+DEFINE_ATOMIC_LOAD(i32_atomic_load, u32, u32, u32)
+DEFINE_ATOMIC_LOAD(i64_atomic_load, u64, u64, u64)
+DEFINE_ATOMIC_LOAD(i32_atomic_load8_u, u8, u32, u32)
+DEFINE_ATOMIC_LOAD(i64_atomic_load8_u, u8, u64, u64)
+DEFINE_ATOMIC_LOAD(i32_atomic_load16_u, u16, u32, u32)
+DEFINE_ATOMIC_LOAD(i64_atomic_load16_u, u16, u64, u64)
+DEFINE_ATOMIC_LOAD(i64_atomic_load32_u, u32, u64, u64)
+
+#define DEFINE_ATOMIC_STORE(name, t1, t2) \
+ static inline void name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ ATOMIC_ALIGNMENT_CHECK(addr, t1); \
+ t1 wrapped = (t1)value; \
+ atomic_store_##t1(&mem->data[addr], wrapped); \
+ }
+
+DEFINE_ATOMIC_STORE(i32_atomic_store, u32, u32)
+DEFINE_ATOMIC_STORE(i64_atomic_store, u64, u64)
+DEFINE_ATOMIC_STORE(i32_atomic_store8, u8, u32)
+DEFINE_ATOMIC_STORE(i32_atomic_store16, u16, u32)
+DEFINE_ATOMIC_STORE(i64_atomic_store8, u8, u64)
+DEFINE_ATOMIC_STORE(i64_atomic_store16, u16, u64)
+DEFINE_ATOMIC_STORE(i64_atomic_store32, u32, u64)
+
+#define DEFINE_ATOMIC_RMW(name, op, t1, t2) \
+ static inline t2 name(wasm_rt_memory_t* mem, u64 addr, t2 value) { \
+ MEMCHECK(mem, addr, t1); \
+ ATOMIC_ALIGNMENT_CHECK(addr, t1); \
+ t1 wrapped = (t1)value; \
+ t1 ret = atomic_##op##_##t1(&mem->data[addr], wrapped); \
+ return (t2)ret; \
+ }
+
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_add_u, add, u8, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw16_add_u, add, u16, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw_add, add, u32, u32)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw8_add_u, add, u8, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw16_add_u, add, u16, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw32_add_u, add, u32, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw_add, add, u64, u64)
+
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_sub_u, sub, u8, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw16_sub_u, sub, u16, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw_sub, sub, u32, u32)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw8_sub_u, sub, u8, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw16_sub_u, sub, u16, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw32_sub_u, sub, u32, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw_sub, sub, u64, u64)
+
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_and_u, and, u8, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw16_and_u, and, u16, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw_and, and, u32, u32)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw8_and_u, and, u8, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw16_and_u, and, u16, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw32_and_u, and, u32, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw_and, and, u64, u64)
+
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_or_u, or, u8, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw16_or_u, or, u16, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw_or, or, u32, u32)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw8_or_u, or, u8, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw16_or_u, or, u16, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw32_or_u, or, u32, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw_or, or, u64, u64)
+
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_xor_u, xor, u8, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw16_xor_u, xor, u16, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw_xor, xor, u32, u32)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw8_xor_u, xor, u8, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw16_xor_u, xor, u16, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw32_xor_u, xor, u32, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw_xor, xor, u64, u64)
+
+DEFINE_ATOMIC_RMW(i32_atomic_rmw8_xchg_u, exchange, u8, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw16_xchg_u, exchange, u16, u32)
+DEFINE_ATOMIC_RMW(i32_atomic_rmw_xchg, exchange, u32, u32)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw8_xchg_u, exchange, u8, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw16_xchg_u, exchange, u16, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw32_xchg_u, exchange, u32, u64)
+DEFINE_ATOMIC_RMW(i64_atomic_rmw_xchg, exchange, u64, u64)
+
+#define DEFINE_ATOMIC_CMP_XCHG(name, t1, t2) \
+ static inline t1 name(wasm_rt_memory_t* mem, u64 addr, t1 expected, \
+ t1 replacement) { \
+ MEMCHECK(mem, addr, t2); \
+ ATOMIC_ALIGNMENT_CHECK(addr, t2); \
+ t2 expected_wrapped = (t2)expected; \
+ t2 replacement_wrapped = (t2)replacement; \
+ t2 old = atomic_compare_exchange_##t2(&mem->data[addr], &expected_wrapped, \
+ replacement_wrapped); \
+ return (t1)old; \
+ }
+
+DEFINE_ATOMIC_CMP_XCHG(i32_atomic_rmw8_cmpxchg_u, u32, u8);
+DEFINE_ATOMIC_CMP_XCHG(i32_atomic_rmw16_cmpxchg_u, u32, u16);
+DEFINE_ATOMIC_CMP_XCHG(i32_atomic_rmw_cmpxchg, u32, u32);
+DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw8_cmpxchg_u, u64, u8);
+DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw16_cmpxchg_u, u64, u16);
+DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw32_cmpxchg_u, u64, u32);
+DEFINE_ATOMIC_CMP_XCHG(i64_atomic_rmw_cmpxchg, u64, u64);
diff --git a/src/tools/wasm2c.cc b/src/tools/wasm2c.cc
index b1c1db8c..10d9210e 100644
--- a/src/tools/wasm2c.cc
+++ b/src/tools/wasm2c.cc
@@ -60,7 +60,8 @@ examples:
static const std::string supported_features[] = {
"multi-memory", "multi-value", "sign-extension", "saturating-float-to-int",
- "exceptions", "memory64", "extended-const", "simd"};
+ "exceptions", "memory64", "extended-const", "simd",
+ "threads"};
static bool IsFeatureSupported(const std::string& feature) {
return std::find(std::begin(supported_features), std::end(supported_features),