summaryrefslogtreecommitdiff
path: root/src/support/bits.cpp
diff options
context:
space:
mode:
authorMax Graey <maxgraey@gmail.com>2020-06-18 04:13:59 +0300
committerGitHub <noreply@github.com>2020-06-17 18:13:59 -0700
commitf6eb790eec107064798b9e54552f1ef5966f6359 (patch)
tree25a51d7cb2d0722dc2ab015d10abc59078916477 /src/support/bits.cpp
parent251a68b603080c86cd417db4f02801510376b279 (diff)
downloadbinaryen-f6eb790eec107064798b9e54552f1ef5966f6359.tar.gz
binaryen-f6eb790eec107064798b9e54552f1ef5966f6359.tar.bz2
binaryen-f6eb790eec107064798b9e54552f1ef5966f6359.zip
Optimize bit count polyfills (#2914)
Diffstat (limited to 'src/support/bits.cpp')
-rw-r--r--src/support/bits.cpp87
1 files changed, 66 insertions, 21 deletions
diff --git a/src/support/bits.cpp b/src/support/bits.cpp
index 2489a0249..f57a6d0e8 100644
--- a/src/support/bits.cpp
+++ b/src/support/bits.cpp
@@ -19,6 +19,12 @@
#include "../compiler-support.h"
#include "support/utilities.h"
+#ifdef _MSC_VER
+#include <intrin.h>
+#define __builtin_popcount __popcnt
+#define __builtin_popcountll __popcnt64
+#endif
+
namespace wasm {
template<> int PopCount<uint8_t>(uint8_t v) {
@@ -30,19 +36,31 @@ template<> int PopCount<uint8_t>(uint8_t v) {
}
template<> int PopCount<uint16_t>(uint16_t v) {
- return PopCount((uint8_t)(v & 0xff)) + PopCount((uint8_t)(v >> 8));
+#if __has_builtin(__builtin_popcount) || defined(__GNUC__) || defined(_MSC_VER)
+ return (int)__builtin_popcount(v);
+#else
+ return PopCount((uint8_t)(v & 0xFF)) + PopCount((uint8_t)(v >> 8));
+#endif
}
template<> int PopCount<uint32_t>(uint32_t v) {
+#if __has_builtin(__builtin_popcount) || defined(__GNUC__) || defined(_MSC_VER)
+ return (int)__builtin_popcount(v);
+#else
// See Stanford bithacks, counting bits set in parallel, "best method":
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
v = v - ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+#endif
}
template<> int PopCount<uint64_t>(uint64_t v) {
+#if __has_builtin(__builtin_popcount) || defined(__GNUC__) || defined(_MSC_VER)
+ return (int)__builtin_popcountll(v);
+#else
return PopCount((uint32_t)v) + PopCount((uint32_t)(v >> 32));
+#endif
}
template<> uint32_t BitReverse<uint32_t>(uint32_t v) {
@@ -55,21 +73,53 @@ template<> uint32_t BitReverse<uint32_t>(uint32_t v) {
}
template<> int CountTrailingZeroes<uint32_t>(uint32_t v) {
+ if (v == 0) {
+ return 32;
+ }
+#if __has_builtin(__builtin_ctz) || defined(__GNUC__)
+ return __builtin_ctz(v);
+#elif defined(_MSC_VER)
+ unsigned long count;
+ _BitScanForward(&count, v);
+ return (int)count;
+#else
// See Stanford bithacks, count the consecutive zero bits (trailing) on the
// right with multiply and lookup:
// http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
static const uint8_t tbl[32] = {0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20,
15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19,
16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
- return v ? (int)tbl[((uint32_t)((v & -v) * 0x077CB531U)) >> 27] : 32;
+ return (int)tbl[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
+#endif
}
template<> int CountTrailingZeroes<uint64_t>(uint64_t v) {
+ if (v == 0) {
+ return 64;
+ }
+#if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
+ return __builtin_ctzll(v);
+#elif defined(_MSC_VER)
+ unsigned long count;
+ _BitScanForward64(&count, v);
+ return (int)count;
+#else
return (uint32_t)v ? CountTrailingZeroes((uint32_t)v)
: 32 + CountTrailingZeroes((uint32_t)(v >> 32));
+#endif
}
template<> int CountLeadingZeroes<uint32_t>(uint32_t v) {
+ if (v == 0) {
+ return 32;
+ }
+#if __has_builtin(__builtin_clz) || defined(__GNUC__)
+ return __builtin_clz(v);
+#elif defined(_MSC_VER)
+ unsigned long count;
+ _BitScanReverse(&count, v);
+ return (int)count;
+#else
// See Stanford bithacks, find the log base 2 of an N-bit integer in
// O(lg(N)) operations with multiply and lookup:
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
@@ -81,12 +131,24 @@ template<> int CountLeadingZeroes<uint32_t>(uint32_t v) {
v = v | (v >> 4);
v = v | (v >> 8);
v = v | (v >> 16);
- return v ? (int)tbl[((uint32_t)(v * 0x07C4ACDDU)) >> 27] : 32;
+ return (int)tbl[((uint32_t)(v * 0x07C4ACDDU)) >> 27];
+#endif
}
template<> int CountLeadingZeroes<uint64_t>(uint64_t v) {
+ if (v == 0) {
+ return 64;
+ }
+#if __has_builtin(__builtin_clzll) || defined(__GNUC__)
+ return __builtin_clzll(v);
+#elif defined(_MSC_VER)
+ unsigned long count;
+ _BitScanReverse64(&count, v);
+ return (int)count;
+#else
return v >> 32 ? CountLeadingZeroes((uint32_t)(v >> 32))
: 32 + CountLeadingZeroes((uint32_t)v);
+#endif
}
uint32_t Log2(uint32_t v) {
@@ -108,23 +170,6 @@ uint32_t Log2(uint32_t v) {
}
}
-uint32_t Pow2(uint32_t v) {
- switch (v) {
- case 0:
- return 1;
- case 1:
- return 2;
- case 2:
- return 4;
- case 3:
- return 8;
- case 4:
- return 16;
- case 5:
- return 32;
- default:
- return 1 << v;
- }
-}
+uint32_t Pow2(uint32_t v) { return 1 << v; }
} // namespace wasm