1 #ifndef INTERNAL_BITS_H
2 #define INTERNAL_BITS_H
34 #if MSC_VERSION_SINCE(1310)
38 #if defined(HAVE_X86INTRIN_H) && ! defined(MJIT_HEADER)
40 # include <x86intrin.h>
41 #elif MSC_VERSION_SINCE(1310)
45 #if defined(_MSC_VER) && defined(__AVX__)
46 # pragma intrinsic(__popcnt)
47 # pragma intrinsic(__popcnt64)
50 #if defined(_MSC_VER) && defined(__AVX2__)
51 # pragma intrinsic(__lzcnt)
52 # pragma intrinsic(__lzcnt64)
55 #if MSC_VERSION_SINCE(1310)
56 # pragma intrinsic(_rotl)
57 # pragma intrinsic(_rotr)
59 # pragma intrinsic(_rotl64)
60 # pragma intrinsic(_rotr64)
64 #if MSC_VERSION_SINCE(1400)
65 # pragma intrinsic(_BitScanForward)
66 # pragma intrinsic(_BitScanReverse)
68 # pragma intrinsic(_BitScanForward64)
69 # pragma intrinsic(_BitScanReverse64)
74 #include "internal/static_assert.h"
80 #define HALF_LONG_MSB ((SIGNED_VALUE)1<<((SIZEOF_LONG*CHAR_BIT-1)/2))
82 #define SIGNED_INTEGER_TYPE_P(T) (0 > ((T)0)-1)
84 #define SIGNED_INTEGER_MIN(T) \
85 ((sizeof(T) == sizeof(int8_t)) ? ((T)INT8_MIN) : \
86 ((sizeof(T) == sizeof(int16_t)) ? ((T)INT16_MIN) : \
87 ((sizeof(T) == sizeof(int32_t)) ? ((T)INT32_MIN) : \
88 ((sizeof(T) == sizeof(int64_t)) ? ((T)INT64_MIN) : \
91 #define SIGNED_INTEGER_MAX(T) ((T)(SIGNED_INTEGER_MIN(T) ^ ((T)~(T)0)))
93 #define UNSIGNED_INTEGER_MAX(T) ((T)~(T)0)
95 #if __has_builtin(__builtin_mul_overflow_p)
96 # define MUL_OVERFLOW_P(a, b) \
97 __builtin_mul_overflow_p((a), (b), (__typeof__(a * b))0)
98 #elif __has_builtin(__builtin_mul_overflow)
99 # define MUL_OVERFLOW_P(a, b) \
100 __extension__ ({ __typeof__(a) c; __builtin_mul_overflow((a), (b), &c); })
103 #define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
105 (a) == -1 ? (b) < -(max) : \
107 ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \
108 ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b)))
110 #if __has_builtin(__builtin_mul_overflow_p)
113 # define MUL_OVERFLOW_FIXNUM_P(a, b) \
115 struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
116 __builtin_mul_overflow_p((a), (b), c.fixnum); \
119 # define MUL_OVERFLOW_FIXNUM_P(a, b) \
120 MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
123 #ifdef MUL_OVERFLOW_P
124 # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
125 # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
126 # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_P(a, b)
128 # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
129 # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
130 # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
133 #ifdef HAVE_UINT128_T
134 # define bit_length(x) \
136 (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
137 sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \
138 128 - nlz_int128((uint128_t)(x)))
140 # define bit_length(x) \
142 (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
143 64 - nlz_int64((uint64_t)(x)))
147 # define swap16 ruby_swap16
151 # define swap32 ruby_swap32
155 # define swap64 ruby_swap64
158 static inline uint16_t ruby_swap16(uint16_t);
161 static inline unsigned nlz_int(
unsigned x);
162 static inline unsigned nlz_long(
unsigned long x);
163 static inline unsigned nlz_long_long(
unsigned long long x);
164 static inline unsigned nlz_intptr(
uintptr_t x);
165 static inline unsigned nlz_int32(
uint32_t x);
166 static inline unsigned nlz_int64(
uint64_t x);
167 #ifdef HAVE_UINT128_T
168 static inline unsigned nlz_int128(uint128_t x);
170 static inline unsigned rb_popcount32(
uint32_t x);
171 static inline unsigned rb_popcount64(
uint64_t x);
172 static inline unsigned rb_popcount_intptr(
uintptr_t x);
173 static inline int ntz_int32(
uint32_t x);
174 static inline int ntz_int64(
uint64_t x);
175 static inline int ntz_intptr(
uintptr_t x);
176 static inline VALUE RUBY_BIT_ROTL(
VALUE,
int);
177 static inline VALUE RUBY_BIT_ROTR(
VALUE,
int);
179 static inline uint16_t
180 ruby_swap16(uint16_t x)
182 #if __has_builtin(__builtin_bswap16)
183 return __builtin_bswap16(x);
185 #elif MSC_VERSION_SINCE(1310)
186 return _byteswap_ushort(x);
189 return (x << 8) | (x >> 8);
197 #if __has_builtin(__builtin_bswap32)
198 return __builtin_bswap32(x);
200 #elif MSC_VERSION_SINCE(1310)
201 return _byteswap_ulong(x);
204 x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
205 x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8);
214 #if __has_builtin(__builtin_bswap64)
215 return __builtin_bswap64(x);
217 #elif MSC_VERSION_SINCE(1310)
218 return _byteswap_uint64(x);
221 x = ((x & 0x00000000FFFFFFFF
ULL) << 32) | ((x & 0xFFFFFFFF00000000
ULL) >> 32);
222 x = ((x & 0x0000FFFF0000FFFF
ULL) << 16) | ((x & 0xFFFF0000FFFF0000
ULL) >> 16);
223 x = ((x & 0x00FF00FF00FF00FF
ULL) << 8) | ((x & 0xFF00FF00FF00FF00
ULL) >> 8);
229 static inline unsigned int
232 #if defined(_MSC_VER) && defined(__AVX2__)
237 return (
unsigned int)__lzcnt(x);
239 #elif defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER)
240 return (
unsigned int)_lzcnt_u32(x);
242 #elif MSC_VERSION_SINCE(1400)
244 return _BitScanReverse(&r, x) ? (31 - (int)r) : 32;
246 #elif __has_builtin(__builtin_clz)
248 return x ? (
unsigned int)__builtin_clz(x) : 32;
253 y = x >> 16;
if (
y) {n -= 16; x =
y;}
254 y = x >> 8;
if (
y) {n -= 8; x =
y;}
255 y = x >> 4;
if (
y) {n -= 4; x =
y;}
256 y = x >> 2;
if (
y) {n -= 2; x =
y;}
257 y = x >> 1;
if (
y) {
return n - 2;}
258 return (
unsigned int)(n - x);
262 static inline unsigned int
265 #if defined(_MSC_VER) && defined(__AVX2__)
266 return (
unsigned int)__lzcnt64(x);
268 #elif defined(__x86_64__) && defined(__LZCNT__) && ! defined(MJIT_HEADER)
269 return (
unsigned int)_lzcnt_u64(x);
271 #elif defined(_WIN64) && MSC_VERSION_SINCE(1400)
273 return _BitScanReverse64(&r, x) ? (63u - (
unsigned int)r) : 64;
275 #elif __has_builtin(__builtin_clzl)
279 else if (
sizeof(
long) *
CHAR_BIT == 64) {
280 return (
unsigned int)__builtin_clzl((
unsigned long)x);
282 else if (
sizeof(
long long) *
CHAR_BIT == 64) {
283 return (
unsigned int)__builtin_clzll((
unsigned long long)x);
293 y = x >> 32;
if (
y) {n -= 32; x =
y;}
294 y = x >> 16;
if (
y) {n -= 16; x =
y;}
295 y = x >> 8;
if (
y) {n -= 8; x =
y;}
296 y = x >> 4;
if (
y) {n -= 4; x =
y;}
297 y = x >> 2;
if (
y) {n -= 2; x =
y;}
298 y = x >> 1;
if (
y) {
return n - 2;}
299 return (
unsigned int)(n - x);
304 #ifdef HAVE_UINT128_T
305 static inline unsigned int
306 nlz_int128(uint128_t x)
314 return (
unsigned int)nlz_int64(x) + 64;
317 return (
unsigned int)nlz_int64(
y);
322 static inline unsigned int
323 nlz_int(
unsigned int x)
325 if (
sizeof(
unsigned int) *
CHAR_BIT == 32) {
328 else if (
sizeof(
unsigned int) *
CHAR_BIT == 64) {
336 static inline unsigned int
337 nlz_long(
unsigned long x)
339 if (
sizeof(
unsigned long) *
CHAR_BIT == 32) {
342 else if (
sizeof(
unsigned long) *
CHAR_BIT == 64) {
350 static inline unsigned int
351 nlz_long_long(
unsigned long long x)
353 if (
sizeof(
unsigned long long) *
CHAR_BIT == 64) {
356 #ifdef HAVE_UINT128_T
357 else if (
sizeof(
unsigned long long) *
CHAR_BIT == 128) {
358 return nlz_int128((uint128_t)x);
366 static inline unsigned int
369 if (
sizeof(
uintptr_t) ==
sizeof(
unsigned int)) {
370 return nlz_int((
unsigned int)x);
372 if (
sizeof(
uintptr_t) ==
sizeof(
unsigned long)) {
373 return nlz_long((
unsigned long)x);
375 if (
sizeof(
uintptr_t) ==
sizeof(
unsigned long long)) {
376 return nlz_long_long((
unsigned long long)x);
383 static inline unsigned int
386 #if defined(_MSC_VER) && defined(__AVX__)
390 return (
unsigned int)__popcnt(x);
392 #elif __has_builtin(__builtin_popcount)
394 return (
unsigned int)__builtin_popcount(x);
397 x = (x & 0x55555555) + (x >> 1 & 0x55555555);
398 x = (x & 0x33333333) + (x >> 2 & 0x33333333);
399 x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f);
400 x = (x & 0x001f001f) + (x >> 8 & 0x001f001f);
401 x = (x & 0x0000003f) + (x >>16 & 0x0000003f);
402 return (
unsigned int)x;
407 static inline unsigned int
410 #if defined(_MSC_VER) && defined(__AVX__)
411 return (
unsigned int)__popcnt64(x);
413 #elif __has_builtin(__builtin_popcount)
414 if (
sizeof(
long) *
CHAR_BIT == 64) {
415 return (
unsigned int)__builtin_popcountl((
unsigned long)x);
417 else if (
sizeof(
long long) *
CHAR_BIT == 64) {
418 return (
unsigned int)__builtin_popcountll((
unsigned long long)x);
426 x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
427 x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
428 x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
429 x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f);
430 x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f);
431 x = (x & 0x000000000000007f) + (x >>32 & 0x000000000000007f);
432 return (
unsigned int)x;
437 static inline unsigned int
454 #if defined(__x86_64__) && defined(__BMI__) && ! defined(MJIT_HEADER)
455 return (
unsigned)_tzcnt_u32(x);
457 #elif MSC_VERSION_SINCE(1400)
461 return _BitScanForward(&r, x) ? (int)r : 32;
463 #elif __has_builtin(__builtin_ctz)
465 return x ? (unsigned)__builtin_ctz(x) : 32;
468 return rb_popcount32((~x) & (x-1));
476 #if defined(__x86_64__) && defined(__BMI__) && ! defined(MJIT_HEADER)
477 return (
unsigned)_tzcnt_u64(x);
479 #elif defined(_WIN64) && MSC_VERSION_SINCE(1400)
481 return _BitScanForward64(&r, x) ? (int)r : 64;
483 #elif __has_builtin(__builtin_ctzl)
487 else if (
sizeof(
long) *
CHAR_BIT == 64) {
488 return (
unsigned)__builtin_ctzl((
unsigned long)x);
490 else if (
sizeof(
long long) *
CHAR_BIT == 64) {
491 return (
unsigned)__builtin_ctzll((
unsigned long long)x);
499 return rb_popcount64((~x) & (x-1));
519 RUBY_BIT_ROTL(
VALUE v,
int n)
521 #if __has_builtin(__builtin_rotateleft32) && (SIZEOF_VALUE * CHAR_BIT == 32)
522 return __builtin_rotateleft32(v, n);
524 #elif __has_builtin(__builtin_rotateleft64) && (SIZEOF_VALUE * CHAR_BIT == 64)
525 return __builtin_rotateleft64(v, n);
527 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 32)
530 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 64)
531 return _rotl64(v, n);
533 #elif defined(_lrotl) && (SIZEOF_VALUE == SIZEOF_LONG)
538 return (v << (n & m)) | (v >> (-n & m));
543 RUBY_BIT_ROTR(
VALUE v,
int n)
545 #if __has_builtin(__builtin_rotateright32) && (SIZEOF_VALUE * CHAR_BIT == 32)
546 return __builtin_rotateright32(v, n);
548 #elif __has_builtin(__builtin_rotateright64) && (SIZEOF_VALUE * CHAR_BIT == 64)
549 return __builtin_rotateright64(v, n);
551 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 32)
554 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 64)
555 return _rotr64(v, n);
557 #elif defined(_lrotr) && (SIZEOF_VALUE == SIZEOF_LONG)
562 return (v << (-n & m)) | (v >> (n & m));