6 files changed, 246 insertions, 159 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5c5656baa..c7f4858c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,6 +65,7 @@ ENDIF()
 # Sources.
 
 SET(support_SOURCES
+  src/support/bits.cpp
   src/support/colors.cpp
   src/support/command-line.cpp
   src/support/file.cpp
diff --git a/src/binaryen-shell.cpp b/src/binaryen-shell.cpp
index f969eeb9e..9bc672dd7 100644
--- a/src/binaryen-shell.cpp
+++ b/src/binaryen-shell.cpp
@@ -53,16 +53,68 @@ struct ExitException {
 //
 
 struct ShellExternalInterface : ModuleInstance::ExternalInterface {
-  char *memory;
+  // The underlying memory can be accessed through unaligned pointers which
+  // isn't well-behaved in C++. WebAssembly nonetheless expects it to behave
+  // properly. Avoid emitting unaligned load/store by checking for alignment
+  // explicitly, and performing memcpy if unaligned.
+  //
+  // The allocated memory tries to have the same alignment as the memory being
+  // simulated.
+  class Memory {
+    // Use char because it doesn't run afoul of aliasing rules.
+    std::vector<char> memory;
+    template <typename T>
+    static bool aligned(const char* address) {
+      static_assert(!(alignof(T) & (alignof(T) - 1)), "must be a power of 2");
+      return 0 == (reinterpret_cast<uintptr_t>(address) & (alignof(T) - 1));
+    }
+    Memory(Memory&) = delete;
+    Memory& operator=(const Memory&) = delete;
+
+   public:
+    Memory() {}
+    void resize(size_t newSize) {
+      // Allocate at least this size to get proper alignment: the allocator will
+      // usually start allocating page-sized chunks which are properly aligned.
+      //
+      // The code is optimistic this will work until WG21's p0035r0 happens.
+      const size_t minSize = 1 << 12;
+      size_t oldSize = memory.size();
+      memory.resize(std::max(minSize, newSize));
+      if (newSize < oldSize && newSize < minSize) {
+        std::memset(&memory[newSize], 0, minSize - newSize);
+      }
+    }
+    template <typename T>
+    void set(size_t address, T value) {
+      if (aligned<T>(&memory[address])) {
+        *reinterpret_cast<T*>(&memory[address]) = value;
+      } else {
+        std::memcpy(&memory[address], &value, sizeof(T));
+      }
+    }
+    template <typename T>
+    T get(size_t address) {
+      if (aligned<T>(&memory[address])) {
+        return *reinterpret_cast<T*>(&memory[address]);
+      } else {
+        T loaded;
+        std::memcpy(&loaded, &memory[address], sizeof(T));
+        return loaded;
+      }
+    }
+  } memory;
 
-  ShellExternalInterface() : memory(nullptr) {}
+  ShellExternalInterface() : memory() {}
 
   void init(Module& wasm) override {
-    memory = (char*)calloc(wasm.memory.initial, 1);
+    memory.resize(wasm.memory.initial);
     // apply memory segments
     for (auto segment : wasm.memory.segments) {
       assert(segment.offset + segment.size <= wasm.memory.initial);
-      memcpy(memory + segment.offset, segment.data, segment.size);
+      for (size_t i = 0; i != segment.size; ++i) {
+        memory.set(segment.offset + i, segment.data[i]);
+      }
     }
   }
 
@@ -85,25 +137,25 @@ struct ShellExternalInterface : ModuleInstance::ExternalInterface {
     switch (load->type) {
       case i32: {
         switch (load->bytes) {
-          case 1: return Literal(load->signed_ ? (int32_t)*((int8_t*)(memory+addr))  : (int32_t)*((uint8_t*)(memory+addr)));
-          case 2: return Literal(load->signed_ ? (int32_t)*((int16_t*)(memory+addr)) : (int32_t)*((uint16_t*)(memory+addr)));
-          case 4: return Literal(load->signed_ ? (int32_t)*((int32_t*)(memory+addr)) : (int32_t)*((uint32_t*)(memory+addr)));
+          case 1: return load->signed_ ? Literal((int32_t)memory.get<int8_t>(addr)) : Literal((int32_t)memory.get<uint8_t>(addr));
+          case 2: return load->signed_ ? Literal((int32_t)memory.get<int16_t>(addr)) : Literal((int32_t)memory.get<uint16_t>(addr));
+          case 4: return load->signed_ ? Literal((int32_t)memory.get<int32_t>(addr)) : Literal((int32_t)memory.get<uint32_t>(addr));
           default: abort();
         }
         break;
       }
       case i64: {
         switch (load->bytes) {
-          case 1: return Literal(load->signed_ ? (int64_t)*((int8_t*)(memory+addr))  : (int64_t)*((uint8_t*)(memory+addr)));
-          case 2: return Literal(load->signed_ ? (int64_t)*((int16_t*)(memory+addr)) : (int64_t)*((uint16_t*)(memory+addr)));
-          case 4: return Literal(load->signed_ ? (int64_t)*((int32_t*)(memory+addr)) : (int64_t)*((uint32_t*)(memory+addr)));
-          case 8: return Literal(load->signed_ ? (int64_t)*((int64_t*)(memory+addr)) : (int64_t)*((uint64_t*)(memory+addr)));
+          case 1: return load->signed_ ? Literal((int64_t)memory.get<int8_t>(addr)) : Literal((int64_t)memory.get<uint8_t>(addr));
+          case 2: return load->signed_ ? Literal((int64_t)memory.get<int16_t>(addr)) : Literal((int64_t)memory.get<uint16_t>(addr));
+          case 4: return load->signed_ ? Literal((int64_t)memory.get<int32_t>(addr)) : Literal((int64_t)memory.get<uint32_t>(addr));
+          case 8: return load->signed_ ? Literal((int64_t)memory.get<int64_t>(addr)) : Literal((int64_t)memory.get<uint64_t>(addr));
           default: abort();
         }
         break;
       }
-      case f32: return Literal(*((float*)(memory+addr)));
-      case f64: return Literal(*((double*)(memory+addr)));
+      case f32: return Literal(memory.get<float>(addr));
+      case f64: return Literal(memory.get<double>(addr));
       default: abort();
     }
   }
@@ -113,35 +165,32 @@ struct ShellExternalInterface : ModuleInstance::ExternalInterface {
     switch (store->type) {
       case i32: {
         switch (store->bytes) {
-          case 1: *((int8_t*)(memory+addr)) = value.geti32(); break;
-          case 2: *((int16_t*)(memory+addr)) = value.geti32(); break;
-          case 4: *((int32_t*)(memory+addr)) = value.geti32(); break;
+          case 1: memory.set<int8_t>(addr, value.geti32()); break;
+          case 2: memory.set<int16_t>(addr, value.geti32()); break;
+          case 4: memory.set<int32_t>(addr, value.geti32()); break;
           default: abort();
         }
         break;
       }
       case i64: {
         switch (store->bytes) {
-          case 1: *((int8_t*)(memory+addr)) = value.geti64(); break;
-          case 2: *((int16_t*)(memory+addr)) = value.geti64(); break;
-          case 4: *((int32_t*)(memory+addr)) = value.geti64(); break;
-          case 8: *((int64_t*)(memory+addr)) = value.geti64(); break;
+          case 1: memory.set<int8_t>(addr, value.geti64()); break;
+          case 2: memory.set<int16_t>(addr, value.geti64()); break;
+          case 4: memory.set<int32_t>(addr, value.geti64()); break;
+          case 8: memory.set<int64_t>(addr, value.geti64()); break;
           default: abort();
         }
         break;
       }
       // write floats carefully, ensuring all bits reach memory
-      case f32: *((int32_t*)(memory+addr)) = value.reinterpreti32(); break;
-      case f64: *((int64_t*)(memory+addr)) = value.reinterpreti64(); break;
+      case f32: memory.set<int32_t>(addr, value.reinterpreti32()); break;
+      case f64: memory.set<int64_t>(addr, value.reinterpreti64()); break;
       default: abort();
     }
   }
 
-  void growMemory(size_t oldSize, size_t newSize) override {
-    memory = (char*)realloc(memory, newSize);
-    if (newSize > oldSize) {
-      memset(memory + oldSize, 0, newSize - oldSize);
-    }
+  void growMemory(size_t /*oldSize*/, size_t newSize) override {
+    memory.resize(newSize);
   }
 
   jmp_buf trapState;
diff --git a/src/bits.h b/src/bits.h
deleted file mode 100644
index 98ae5c4b7..000000000
--- a/src/bits.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright 2015 WebAssembly Community Group participants
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef wasm_bits_h
-#define wasm_bits_h
-
-#include <type_traits>
-
-/*
- * Portable bit functions.
- *
- * Not all platforms offer fast intrinsics for these functions, and some
- * compilers require checking CPUID at runtime before using the intrinsic.
- *
- * We instead use portable and reasonably-fast implementations, while
- * avoiding implementations with large lookup tables.
- */
-
-namespace wasm {
-
-// Only the specialized templates should be instantiated, getting
-// a linker error with these functions means an unsupported type was used.
-
-template<typename T> inline int PopCount(T /* v */);
-template<typename T> inline T BitReverse(T /* v */);
-template<typename T> inline int CountTrailingZeroes(T /* v */);
-template<typename T> inline int CountLeadingZeroes(T /* v */);
-
-// Convenience signed -> unsigned. It usually doesn't make much sense to use bit
-// functions on signed types.
-template <typename T> inline int PopCount(T v) {
-  return PopCount(typename std::make_unsigned<T>::type(v));
-}
-template <typename T> inline int CountTrailingZeroes(T v) {
-  return CountTrailingZeroes(typename std::make_unsigned<T>::type(v));
-}
-template <typename T> inline int CountLeadingZeroes(T v) {
-  return CountLeadingZeroes(typename std::make_unsigned<T>::type(v));
-}
-
-// Implementations for the above templates.
-
-template<> inline int PopCount<uint8_t>(uint8_t v) {
-  // Small table lookup.
-  static const uint8_t tbl[32] = {
-    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5
-  };
-  return tbl[v & 0xf] + tbl[v >> 4];
-}
-template<> inline int PopCount<uint16_t>(uint16_t v) {
-  return PopCount<uint8_t>(v & 0xff) + PopCount<uint8_t>(v >> 8);
-}
-template<> inline int PopCount<uint32_t>(uint32_t v) {
-  // See Stanford bithacks, counting bits set in parallel, "best method":
-  // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-  v = v - ((v >> 1) & 0x55555555);
-  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
-  return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
-}
-template<> inline int PopCount<uint64_t>(uint64_t v) {
-  return PopCount<uint32_t>((uint32_t)v) + PopCount<uint32_t>(v >> 32);
-}
-
-template<> inline uint32_t BitReverse<uint32_t>(uint32_t v) {
-  // See Hacker's Delight, first edition, figure 7-1.
-  v = ((v & 0x55555555) << 1) | ((v >> 1) & 0x55555555);
-  v = ((v & 0x33333333) << 2) | ((v >> 2) & 0x33333333);
-  v = ((v & 0x0F0F0F0F) << 4) | ((v >> 4) & 0x0F0F0F0F);
-  v = (v << 24) | ((v & 0xFF00) << 8) |
-      ((v >> 8) & 0xFF00) | (v >> 24);
-  return v;
-}
-
-template<> inline int CountTrailingZeroes<uint32_t>(uint32_t v) {
-  // See Stanford bithacks, count the consecutive zero bits (trailing) on the
-  // right with multiply and lookup:
-  // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
-  static const uint8_t tbl[32] = {
-    0,   1, 28,  2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17,  4, 8,
-    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18,  6, 11,  5, 10, 9
-  };
-  return v ?
-      (int)tbl[((uint32_t)((v & -(int32_t)v) * 0x077CB531U)) >> 27] :
-      32;
-}
-
-template<> inline int CountTrailingZeroes<uint64_t>(uint64_t v) {
-  return (uint32_t)v ? CountTrailingZeroes<uint32_t>(v)
-                     : 32 + CountTrailingZeroes<uint32_t>(v >> 32);
-}
-
-template<> inline int CountLeadingZeroes<uint32_t>(uint32_t v) {
-  // See Stanford bithacks, find the log base 2 of an N-bit integer in
-  // O(lg(N)) operations with multiply and lookup:
-  // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
-  static const uint8_t tbl[32] = {
-    31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13, 9,  6, 28, 1,
-    23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25, 5, 26, 27, 0
-  };
-  v = v | (v >>  1);
-  v = v | (v >>  2);
-  v = v | (v >>  4);
-  v = v | (v >>  8);
-  v = v | (v >> 16);
-  return v ?
-      (int)tbl[((uint32_t)(v * 0x07C4ACDDU)) >> 27] :
-      32;
-}
-
-template<> inline int CountLeadingZeroes<uint64_t>(uint64_t v) {
-  return v >> 32 ? CountLeadingZeroes<uint32_t>(v >> 32)
-                 : 32 + CountLeadingZeroes<uint32_t>(v);
-}
-
-} // namespace wasm
-
-#endif // wasm_bits_h
diff --git a/src/support/bits.cpp b/src/support/bits.cpp
new file mode 100644
index 000000000..7bf00e595
--- /dev/null
+++ b/src/support/bits.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2015 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define wasm_support_bits_definitions
+#include "support/bits.h"
+
+template<>
+int wasm::PopCount<uint8_t>(uint8_t v) {
+  // Small table lookup.
+  static const uint8_t tbl[32] = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5
+  };
+  return tbl[v & 0xf] + tbl[v >> 4];
+}
+
+template<>
+int wasm::PopCount<uint16_t>(uint16_t v) {
+  return PopCount((uint8_t)(v & 0xff)) + PopCount((uint8_t)(v >> 8));
+}
+
+template<>
+int wasm::PopCount<uint32_t>(uint32_t v) {
+  // See Stanford bithacks, counting bits set in parallel, "best method":
+  // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  v = v - ((v >> 1) & 0x55555555);
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+  return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+}
+
+template<>
+int wasm::PopCount<uint64_t>(uint64_t v) {
+  return PopCount((uint32_t)v) + PopCount((uint32_t)(v >> 32));
+}
+
+template<>
+uint32_t wasm::BitReverse<uint32_t>(uint32_t v) {
+  // See Hacker's Delight, first edition, figure 7-1.
+  v = ((v & 0x55555555) << 1) | ((v >> 1) & 0x55555555);
+  v = ((v & 0x33333333) << 2) | ((v >> 2) & 0x33333333);
+  v = ((v & 0x0F0F0F0F) << 4) | ((v >> 4) & 0x0F0F0F0F);
+  v = (v << 24) | ((v & 0xFF00) << 8) | ((v >> 8) & 0xFF00) | (v >> 24);
+  return v;
+}
+
+template<>
+int wasm::CountTrailingZeroes<uint32_t>(uint32_t v) {
+  // See Stanford bithacks, count the consecutive zero bits (trailing) on the
+  // right with multiply and lookup:
+  // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
+  static const uint8_t tbl[32] = {
+    0,   1, 28,  2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17,  4, 8,
+    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18,  6, 11,  5, 10, 9
+  };
+  return v ? (int)tbl[((uint32_t)((v & -(int32_t)v) * 0x077CB531U)) >> 27] : 32;
+}
+
+template<>
+int wasm::CountTrailingZeroes<uint64_t>(uint64_t v) {
+  return (uint32_t)v ? CountTrailingZeroes((uint32_t)v)
+                     : 32 + CountTrailingZeroes((uint32_t)(v >> 32));
+}
+
+template<>
+int wasm::CountLeadingZeroes<uint32_t>(uint32_t v) {
+  // See Stanford bithacks, find the log base 2 of an N-bit integer in
+  // O(lg(N)) operations with multiply and lookup:
+  // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+  static const uint8_t tbl[32] = {
+    31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13, 9,  6, 28, 1,
+    23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25, 5, 26, 27, 0
+  };
+  v = v | (v >> 1);
+  v = v | (v >> 2);
+  v = v | (v >> 4);
+  v = v | (v >> 8);
+  v = v | (v >> 16);
+  return v ? (int)tbl[((uint32_t)(v * 0x07C4ACDDU)) >> 27] : 32;
+}
+
+template<>
+int wasm::CountLeadingZeroes<uint64_t>(uint64_t v) {
+  return v >> 32 ? CountLeadingZeroes((uint32_t)(v >> 32))
+                 : 32 + CountLeadingZeroes((uint32_t)v);
+}
diff --git a/src/support/bits.h b/src/support/bits.h
new file mode 100644
index 000000000..3049a2cf1
--- /dev/null
+++ b/src/support/bits.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2015 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_support_bits_h
+#define wasm_support_bits_h
+
+#include <cstdint>
+#include <type_traits>
+
+/*
+ * Portable bit functions.
+ *
+ * Not all platforms offer fast intrinsics for these functions, and some
+ * compilers require checking CPUID at runtime before using the intrinsic.
+ *
+ * We instead use portable and reasonably-fast implementations, while
+ * avoiding implementations with large lookup tables.
+ */
+
+namespace wasm {
+
+template<typename T> int PopCount(T);
+template<typename T> uint32_t BitReverse(T);
+template<typename T> int CountTrailingZeroes(T);
+template<typename T> int CountLeadingZeroes(T);
+
+#ifndef wasm_support_bits_definitions
+// The template specializations are provided elsewhere.
+extern template int PopCount(uint8_t);
+extern template int PopCount(uint16_t);
+extern template int PopCount(uint32_t);
+extern template int PopCount(uint64_t);
+extern template uint32_t BitReverse(uint32_t);
+extern template int CountTrailingZeroes(uint32_t);
+extern template int CountTrailingZeroes(uint64_t);
+extern template int CountLeadingZeroes(uint32_t);
+extern template int CountLeadingZeroes(uint64_t);
+#endif
+
+// Convenience signed -> unsigned. It usually doesn't make much sense to use bit
+// functions on signed types.
+template <typename T>
+inline int PopCount(T v) {
+  return PopCount(typename std::make_unsigned<T>::type(v));
+}
+template <typename T>
+inline int CountTrailingZeroes(T v) {
+  return CountTrailingZeroes(typename std::make_unsigned<T>::type(v));
+}
+template <typename T>
+inline int CountLeadingZeroes(T v) {
+  return CountLeadingZeroes(typename std::make_unsigned<T>::type(v));
+}
+
+}  // namespace wasm
+
+#endif  // wasm_support_bits_h
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index 97e9117f7..693135962 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -25,7 +25,7 @@
 
 #include <limits.h>
 
-#include "bits.h"
+#include "support/bits.h"
 #include "wasm.h"
 
 namespace wasm {