|
|
@@ -0,0 +1,602 @@
|
|
|
+#ifndef BITOPS_HH
|
|
|
+#define BITOPS_HH
|
|
|
+
|
|
|
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
|
|
|
+#define constexpr14 constexpr
|
|
|
+#else
|
|
|
+#define constexpr14
|
|
|
+#endif
|
|
|
+
|
|
|
+#include <cstdint>
|
|
|
+#include <cstddef>
|
|
|
+#include <climits>
|
|
|
+#include <limits>
|
|
|
+#include <type_traits>
|
|
|
+#include <algorithm>
|
|
|
+
|
|
|
+namespace std {
|
|
|
+
|
|
|
+//This implementation makes the following platform assumptions:
|
|
|
+//* signed right shift is an arithmetic shift
|
|
|
+//* CHAR_BIT == 8
|
|
|
+//* Native integer types are exactly 8, 16, 32, and 64 bits wide. No support is added for larger types.
|
|
|
+//* Signed numbers are implemented using 2's compliment
|
|
|
+//
|
|
|
+//The implementation is not designed to be efficient. The purpose is only to prove that each proposed function is implementable.
|
|
|
+//A real implementation may use techniques such as SFINAE, static_assert, overloading, and/or = delete to limit the set of overloads.
|
|
|
+//These have been omitted here to improve readability.
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Explicit shifts
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Logical left shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT
|
|
|
+//Just about every processor in existance has this
|
|
|
+//Included for symmetry
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral shll(Integral x, int s) noexcept {
|
|
|
+ return Integral(typename std::make_unsigned<Integral>::type(x) << s);
|
|
|
+ }
|
|
|
+
|
|
|
+//Logical right shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT
|
|
|
+//Just about every processor in existance has this
|
|
|
+//Included for symmetry, also can right shift a signed number easily without a cast to unsigned
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral shlr(Integral x, int s) noexcept {
|
|
|
+ return Integral(typename std::make_unsigned<Integral>::type(x) >> s);
|
|
|
+ }
|
|
|
+
|
|
|
+//Arithmetic left shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT
|
|
|
+//Just about every processor in existance has this
|
|
|
+//Included for symmetry
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral shal(Integral x, int s) noexcept {
|
|
|
+ return shll(x, s);
|
|
|
+ }
|
|
|
+
|
|
|
+//Arithmetic right shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT
|
|
|
+//Just about every processor in existance has this, signed right shift is implementation defined. There is no standards compliant alternative to shar().
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral shar(Integral x, int s) noexcept {
|
|
|
+ //Assumes signed right shift is arithmetic. If it is not the platform will need to implement this another way.
|
|
|
+ return Integral(typename std::make_signed<Integral>::type(x) >> s);
|
|
|
+ }
|
|
|
+
|
|
|
+//Circular left shift (rotate), undefined if s < 0 or x > sizeof(x) * CHAR_BIT
|
|
|
+//Just about every processor in existance has this, including the PDP-11 (1969) and yet C or C++ never included a way to get at this instruction.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rotl(Integral x, int s) noexcept {
|
|
|
+ return (x << s) | shlr(x, (sizeof(x)*CHAR_BIT-s));
|
|
|
+ }
|
|
|
+
|
|
|
+//Circular right shift (rotate), undefined if s < 0 or x > sizeof(x) * CHAR_BIT
|
|
|
+//Just about every processor in existance has this, including the PDP-11 (1969) and yet C or C++ never included a way to get at this instruction.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rotr(Integral x, int s) noexcept {
|
|
|
+ return shlr(x, s) | ( x << (sizeof(x)*CHAR_BIT-s));
|
|
|
+ }
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Zero and One Counting algorithms
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Returns the number of trailing zeros in x, or sizeof(x) * CHAR_BIT if x is 0
|
|
|
+//i386 bsf, cmov
|
|
|
+//x86_64 w/ BMI1: tzcnt
|
|
|
+//Alpha: cttz
|
|
|
+//MIPS: CLZ
|
|
|
+//gcc: x == 0 ? sizeof(x) * CHAR_BIT :__builtin_ctz(x)
|
|
|
+//Applications: SSE2 strlen, Howard Hinnant's gcd example
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 int cntt0(Integral x) noexcept {
|
|
|
+ constexpr int nbits = int(sizeof(x) * CHAR_BIT);
|
|
|
+ if(x == 0) { return nbits; }
|
|
|
+ Integral n = 0;
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ if((x & Integral(0xFFFFFFFFUL)) == 0) { n = n + 32; x = shlr(x, 32); }
|
|
|
+ }
|
|
|
+ if((x & Integral(0xFFFFUL)) == 0) { n = n + 16; x = shlr(x, 16); }
|
|
|
+ }
|
|
|
+ if((x & Integral(0xFFUL)) == 0) { n = n + 8; x = shlr(x, 8); }
|
|
|
+ }
|
|
|
+ if((x & Integral(0xFUL)) == 0) { n = n + 4; x = shlr(x, 4); }
|
|
|
+ if((x & Integral(0x3UL)) == 0) { n = n + 2; x = shlr(x, 2); }
|
|
|
+ return n - (x & 1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns the number of leading zeroes in x, or sizeof(x) * CHAR_BIT if x is 0
|
|
|
+//i386 bsr, cmov
|
|
|
+//x86_64 w/ SSE4: lzcnt
|
|
|
+//ARMv5: CLZ
|
|
|
+//IA64: clz
|
|
|
+//Alpha: CTLZ
|
|
|
+//PowerPC: cntlz[dw]
|
|
|
+//gcc: x == 0 ? sizeof(x) * CHAR_BIT :__builtin_clz(x)
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 int cntl0(Integral x) noexcept {
|
|
|
+ constexpr int nbits = int(sizeof(x) * CHAR_BIT);
|
|
|
+ if(x == 0) { return nbits; }
|
|
|
+ Integral n = 1;
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ if((shlr(x, nbits-32)) == 0) { n = n + 32; x = x << 32; }
|
|
|
+ }
|
|
|
+ if((shlr(x, nbits-16)) == 0) { n = n + 16; x = x << 16; }
|
|
|
+ }
|
|
|
+ if((shlr(x, nbits-8)) == 0) { n = n + 8; x = x << 8; }
|
|
|
+ }
|
|
|
+ if((shlr(x, nbits-4)) == 0) { n = n + 4; x = x << 4; }
|
|
|
+ if((shlr(x, nbits-2)) == 0) { n = n + 2; x = x << 2; }
|
|
|
+ n = n - (shlr(x, 31));
|
|
|
+ return n;
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns the number of leading 1 bits in x.
|
|
|
+//ARMv8: CLS
|
|
|
+//Blackfin: SIGNBITS
|
|
|
+//C6X: NORM
|
|
|
+//Picochip: SBC
|
|
|
+//MIPS: CTO
|
|
|
+//gcc: __builtin_clrsb(x)
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 int cntl1(Integral x) noexcept {
|
|
|
+ return cntl0(~x);
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns the number of trailing 1 bits in x.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 int cntt1(Integral x) noexcept {
|
|
|
+ return cntt0(~x);
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns the number of 1 bits in x.
|
|
|
+//x86_64 SSE4: popcnt
|
|
|
+//IA64: popcnt
|
|
|
+//Alpha: CTPOP
|
|
|
+//PowerPC: popcntb
|
|
|
+//SparcV9: POPC
|
|
|
+//Blackfin: ONES
|
|
|
+//gcc: __builtin_popcount(x)
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 int popcount(Integral x) noexcept {
|
|
|
+ x = (x & Integral(0x5555555555555555UL)) + (shlr(x, 1) & Integral(0x5555555555555555UL));
|
|
|
+ x = (x & Integral(0x3333333333333333UL)) + (shlr(x, 2) & Integral(0x3333333333333333UL));
|
|
|
+ x = (x & Integral(0x0F0F0F0F0F0F0F0FUL)) + (shlr(x, 4) & Integral(0x0F0F0F0F0F0F0F0FUL));
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ x = (x & Integral(0x00FF00FF00FF00FFUL)) + (shlr(x, 8) & Integral(0x00FF00FF00FF00FFUL));
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ x = (x & Integral(0x0000FFFF0000FFFFUL)) + (shlr(x, 16) & Integral(0x0000FFFF0000FFFFUL));
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ x = (x & Integral(0x00000000FFFFFFFFUL)) + (shlr(x, 32) & Integral(0x00000000FFFFFFFFUL));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return x;
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns the number of 1 bits in x mod 2
|
|
|
+//gcc: __builtin_parity(x)
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 int parity(Integral x) noexcept {
|
|
|
+ x = x ^ shlr(x, 1);
|
|
|
+ x = x ^ shlr(x, 2);
|
|
|
+ x = x ^ shlr(x, 4);
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ x = x ^ shlr(x, 8);
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ x = x ^ shlr(x, 16);
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ x = x ^ shlr(x, 32);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return x;
|
|
|
+ }
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Rightmost bit manipulation
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Reset least siginificant 1 bit
|
|
|
+//Resets the least siginificant 1 bit of x. Returns 0 if x is 0.
|
|
|
+//x86_64 BMI1: BLSR
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rstls1b(Integral x) {
|
|
|
+ return x & (x-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Set the least significant 0 bit
|
|
|
+//x86_64 AMD TBM: BLCS
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral setls0b(Integral x) {
|
|
|
+ return x | (x + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Isolate least siginificant 1 bit
|
|
|
+//Isolates the least significant 1 bit of x and returns it. Returns 0 if x is 0.
|
|
|
+//x86_64 BMI1: BLSI
|
|
|
+//x86_64 AMD TBM: BLSIC, NOT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral isols1b(Integral x) {
|
|
|
+ return x & -x;
|
|
|
+ }
|
|
|
+
|
|
|
+//Set the least significant zero bit to 1 and all of the rest to 0.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral isols0b(Integral x) {
|
|
|
+ return (~x) & (x + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Reset the trailing 1's in x
|
|
|
+//x86_64 AMD TBM: BLCFILL
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rstt1(Integral x) {
|
|
|
+ return x & (x + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Set all of the trailing 0's in x
|
|
|
+//x86_64 AMD TBM: BLSFILL
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral sett0(Integral x) {
|
|
|
+ return x | (x - 1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns a mask with all of the trailing 0's set.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral maskt0(Integral x) {
|
|
|
+ return (~x) & (x-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns a mask with all of the trailing 1's set.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral maskt1(Integral x) {
|
|
|
+ return ~((~x) | (x + 1));
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns a mask with all of the trailing 0's and the least significant 1 bit set.
|
|
|
+//x86_64 BMI1: BLSMSK
|
|
|
+//x86_64 AMD TBM: TZMSK
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral maskt0ls1b(Integral x) {
|
|
|
+ return (x-1) ^ x;
|
|
|
+ }
|
|
|
+
|
|
|
+//Returns a mask with all of the trailing 1's and the least significant 0 bit set.
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral maskt1ls0b(Integral x) {
|
|
|
+ return x ^ (x + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Bit and Byte reversal algorithms
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Reverse each group of blocks of bits in x.
|
|
|
+//
|
|
|
+//bits_per_block == 1: reverses the bits of x
|
|
|
+//ARMv7: RBIT
|
|
|
+//EPIPHANY: BITR
|
|
|
+//bits_per_block == 2: reverses each pair of bits in x
|
|
|
+//bits_per_block == 4: reverses the nibbles in x
|
|
|
+//AVR: SWAP
|
|
|
+//bits_per_block == 8: reverses the bytes in x (assuming CHAR_BIT == 8). This is the traditional byte swap.
|
|
|
+//i386: bswap
|
|
|
+//ARMv5: REV
|
|
|
+//PDP-11: SWAB
|
|
|
+//gcc: __builtin_bswap[16|32|64](x)
|
|
|
+//(blocks_per_group == 2) ARMv6: REV16
|
|
|
+//(blocks_per_group == 4) ARMv8: REV32
|
|
|
+//bits_per_block == 16,32,etc.. reverses the words in x.
|
|
|
+//(bits_per_block == 16) MC68020: SWAP
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 auto reverse_bits(Integral x,
|
|
|
+ int subword_bits = 1,
|
|
|
+ int group_subwords = 1)
|
|
|
+ noexcept -> typename std::enable_if<std::is_unsigned<Integral>::value, Integral>::type {
|
|
|
+ int group_sz = int(sizeof(Integral) * CHAR_BIT) / group_subwords;
|
|
|
+ int k = group_sz - subword_bits;
|
|
|
+ if(k & 1) x = shll(x & Integral(0x5555555555555555UL), 1) | shlr(x & Integral(0xAAAAAAAAAAAAAAAAUL), 1);
|
|
|
+ if(k & 2) x = shll(x & Integral(0x3333333333333333UL), 2) | shlr(x & Integral(0xCCCCCCCCCCCCCCCCUL), 2);
|
|
|
+ if(k & 4) x = shll(x & Integral(0x0F0F0F0F0F0F0F0FUL), 4) | shlr(x & Integral(0xF0F0F0F0F0F0F0F0UL), 4);
|
|
|
+ //sizeof comparisons added to help compiler remove these checks for small integers
|
|
|
+ if(sizeof(x) > 1 && k & 8) x = shll(x & Integral(0x00FF00FF00FF00FFUL), 8) | shlr(x & Integral(0xFF00FF00FF00FF00UL), 8);
|
|
|
+ if(sizeof(x) > 2 && k & 16) x = shll(x & Integral(0x0000FFFF0000FFFFUL), 16) | shlr(x & Integral(0xFFFF0000FFFF0000UL), 16);
|
|
|
+ if(sizeof(x) > 4 && k & 32) x = shll(x & Integral(0x00000000FFFFFFFFUL), 32) | shlr(x & Integral(0xFFFFFFFF00000000UL), 32);
|
|
|
+ return x;
|
|
|
+ }
|
|
|
+
|
|
|
+//Signed version calls unsigned to avoid sign extension issues
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 auto reverse_bits(Integral x,
|
|
|
+ int subword_bits = 1,
|
|
|
+ int group_subwords = 1)
|
|
|
+ noexcept -> typename std::enable_if<std::is_signed<Integral>::value, Integral>::type {
|
|
|
+ return Integral(reverse_bits(typename std::make_unsigned<Integral>::type(x), subword_bits, group_subwords));
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+//Byte reversal, simple wrapper around reverse_bits
|
|
|
+template <typename Integral>
|
|
|
+ constexpr14 Integral reverse_bytes(Integral x,
|
|
|
+ int bytes_per_block=1,
|
|
|
+ int blocks_per_group = sizeof(Integral)) noexcept {
|
|
|
+ return reverse_bits(x, CHAR_BIT * bytes_per_block, blocks_per_group);
|
|
|
+ }
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Single bit manipulation
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Sets bit b in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral setbit(Integral x, int b) noexcept {
|
|
|
+ return x | (Integral(1) << b);
|
|
|
+ }
|
|
|
+
|
|
|
+//Resets bit b in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rstbit(Integral x, int b) noexcept {
|
|
|
+ return x & ~(Integral(1) << b);
|
|
|
+ }
|
|
|
+
|
|
|
+//Flips bit b in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral flipbit(Integral x, int b) noexcept {
|
|
|
+ return x ^ (Integral(1) << b);
|
|
|
+ }
|
|
|
+
|
|
|
+//Return whether or not bit b is set in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr bool testbit(Integral x, int b) noexcept {
|
|
|
+ return x & (Integral(1) << b);
|
|
|
+ }
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Range of bits manipulation
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Resets all bits >= position b, nop if b > sizeof(x) * CHAR_BIT
|
|
|
+//x86_64 w/ BMI2: BZHI
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rstbitsge(Integral x, int b) noexcept {
|
|
|
+ return x & ((Integral(1) << b)-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Resets all bits < position b, nop if b > sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral rstbitsle(Integral x, int b) noexcept {
|
|
|
+ return x & ~((Integral(1) << (b+1))-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Set all bits >= position b, nop if b > sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral setbitsge(Integral x, int b) noexcept {
|
|
|
+ return x | ~((Integral(1) << b)-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Sets all bits < position b, nop if b > sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral setbitsle(Integral x, int b) noexcept {
|
|
|
+ return x | ((Integral(1) << (b+1))-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Flip all bits >= position b, nop if b > sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral flipbitsge(Integral x, int b) noexcept {
|
|
|
+ return x ^ ~((Integral(1) << b)-1);
|
|
|
+ }
|
|
|
+
|
|
|
+//Flip all bits < position b, nop if b > sizeof(x) * CHAR_BIT
|
|
|
+template <typename Integral>
|
|
|
+ constexpr Integral flipbitsle(Integral x, int b) noexcept {
|
|
|
+ return x ^ ((Integral(1) << (b+1))-1);
|
|
|
+ }
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Power of 2 manipulation
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Returns true if x is a power of 2
|
|
|
+//Application: Integral template arguments, add optimizations for power of 2
|
|
|
+template <typename Integral>
|
|
|
+ constexpr bool ispow2(Integral x) noexcept {
|
|
|
+ return x > 0 && (x & (x-1)) == 0;
|
|
|
+ //return popcount(x) == 1;
|
|
|
+ }
|
|
|
+
|
|
|
+//Round up to the next power of 2
|
|
|
+//Application: Growable containers whose size must be a power of 2
|
|
|
+//Application: Extending a 2d image size to a power of 2 for 3d graphics libraries (OpenGL/DirectX)
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral ceilp2(Integral x) noexcept {
|
|
|
+ x = x-1;
|
|
|
+ x |= shlr(x, 1);
|
|
|
+ x |= shlr(x, 2);
|
|
|
+ x |= shlr(x, 4);
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ x |= shlr(x, 8);
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ x |= shlr(x, 16);
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ x |= shlr(x, 32);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return x + 1;
|
|
|
+}
|
|
|
+
|
|
|
+//Round down to the previous power of 2
|
|
|
+//Application: See ceilp2
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral floorp2(Integral x) noexcept {
|
|
|
+ x |= shlr(x, 1);
|
|
|
+ x |= shlr(x, 2);
|
|
|
+ x |= shlr(x, 4);
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ x |= shlr(x, 8);
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ x |= shlr(x, 16);
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ x |= shlr(x, 32);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return x - shlr(x, 1);
|
|
|
+}
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Saturated Arithmetic
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Perform saturated addition on l and r.
|
|
|
+//ARMv7 DSP extensions: QADD
|
|
|
+template <typename IntegralL, typename IntegralR>
|
|
|
+ constexpr auto satadd(IntegralL l, IntegralR r) noexcept -> decltype(l + r) {
|
|
|
+ typedef decltype(l + r) LR;
|
|
|
+ return LR(l) > std::numeric_limits<LR>::max() - LR(r) ? std::numeric_limits<LR>::max() : l + r;
|
|
|
+ }
|
|
|
+
|
|
|
+//Perform saturated subtraction on l and r.
|
|
|
+//ARMv7 DSP extensions: QSUB
|
|
|
+template <typename IntegralL, typename IntegralR>
|
|
|
+ constexpr auto satsub(IntegralL l, IntegralR r) noexcept -> decltype(l - r) {
|
|
|
+ typedef decltype(l + r) LR;
|
|
|
+ return LR(l) < std::numeric_limits<LR>::min() + LR(r) ? std::numeric_limits<LR>::min() : l - r;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+////////////////////////////////////
|
|
|
+//Pointer and size alignment helpers
|
|
|
+////////////////////////////////////
|
|
|
+
|
|
|
+//Returns true if t is aligned to a
|
|
|
+template <typename Integral>
|
|
|
+constexpr bool is_aligned(Integral t, size_t a) noexcept {
|
|
|
+ return ((t & (a-1)) == 0);
|
|
|
+}
|
|
|
+bool is_aligned(void* t, size_t a) noexcept {
|
|
|
+ return is_aligned(uintptr_t(t), a);
|
|
|
+}
|
|
|
+
|
|
|
+//Returns the smallest number n when n >= val && is_aligned(n, align). align must be a power of 2!
|
|
|
+template <typename Integral>
|
|
|
+constexpr Integral align_up(Integral val, size_t a) noexcept {
|
|
|
+ return ((val + (a -1)) & -a);
|
|
|
+}
|
|
|
+void* align_up(void* val, size_t a) noexcept {
|
|
|
+ return (void*)align_up(uintptr_t(val), a);
|
|
|
+}
|
|
|
+
|
|
|
+//Returns the largest number n when n <= val && is_aligned(n, align). align must be a power of 2!
|
|
|
+template <typename Integral>
|
|
|
+constexpr Integral align_down(Integral val, size_t a) noexcept {
|
|
|
+ return val & -a;
|
|
|
+}
|
|
|
+void* align_down(void* val, size_t a) noexcept {
|
|
|
+ return (void*)align_down(uintptr_t(val), a);
|
|
|
+}
|
|
|
+
|
|
|
+///////////////////////////////////
|
|
|
+//Bit Shuffling
|
|
|
+///////////////////////////////////
|
|
|
+
|
|
|
+//Outer Perfect Shuffle
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral outer_pshuffle(Integral x) noexcept {
|
|
|
+ Integral t = 0;
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ t = (x ^ shlr(x, 16)) & Integral(0x00000000FFFF0000UL);
|
|
|
+ x = x ^ t ^ shll(t, 16);
|
|
|
+ }
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ t = (x ^ shlr(x, 8)) & Integral(0x0000FF000000FF00UL);
|
|
|
+ x = x ^ t ^ shll(t, 8);
|
|
|
+ }
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ t = (x ^ shlr(x, 4)) & Integral(0x00F000F000F000F0UL);
|
|
|
+ x = x ^ t ^ shll(t, 4);
|
|
|
+ }
|
|
|
+ t = (x ^ shlr(x, 2)) & Integral(0x0C0C0C0C0C0C0C0CUL);
|
|
|
+ x = x ^ t ^ shll(t, 2);
|
|
|
+ t = (x ^ shlr(x, 1)) & Integral(0x2222222222222222UL);
|
|
|
+ x = x ^ t ^ shll(t, 1);
|
|
|
+ return x;
|
|
|
+}
|
|
|
+
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral outer_punshuffle(Integral x) noexcept {
|
|
|
+ Integral t = 0;
|
|
|
+ t = (x ^ shlr(x, 1)) & Integral(0x2222222222222222UL);
|
|
|
+ x = x ^ t ^ shll(t, 1);
|
|
|
+ t = (x ^ shlr(x, 2)) & Integral(0x0C0C0C0C0C0C0C0CUL);
|
|
|
+ x = x ^ t ^ shll(t, 2);
|
|
|
+ if(sizeof(x) > 1) {
|
|
|
+ t = (x ^ shlr(x, 4)) & Integral(0x00F000F000F000F0UL);
|
|
|
+ x = x ^ t ^ shll(t, 4);
|
|
|
+ }
|
|
|
+ if(sizeof(x) > 2) {
|
|
|
+ t = (x ^ shlr(x, 8)) & Integral(0x0000FF000000FF00UL);
|
|
|
+ x = x ^ t ^ shll(t, 8);
|
|
|
+ }
|
|
|
+ if(sizeof(x) > 4) {
|
|
|
+ t = (x ^ shlr(x, 16)) & Integral(0x00000000FFFF0000UL);
|
|
|
+ x = x ^ t ^ shll(t, 16);
|
|
|
+ }
|
|
|
+ return x;
|
|
|
+}
|
|
|
+
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral inner_pshuffle(Integral x) noexcept {
|
|
|
+ return outer_pshuffle(reverse_bits(x, sizeof(x)*CHAR_BIT/2, 2));
|
|
|
+}
|
|
|
+
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral inner_punshuffle(Integral x) noexcept {
|
|
|
+ return reverse_bits(outer_punshuffle(x), sizeof(x)*CHAR_BIT/2, 2);
|
|
|
+}
|
|
|
+
|
|
|
+///////////////////////////////////
|
|
|
+//Bits Deposit and Extract
|
|
|
+///////////////////////////////////
|
|
|
+
|
|
|
+//Parallel Bits Deposit
|
|
|
+//x HGFEDCBA
|
|
|
+//mask 01100100
|
|
|
+//res 0CB00A00
|
|
|
+//x86_64 BMI2: PDEP
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral deposit_bits(Integral x, Integral mask) {
|
|
|
+ Integral res = 0;
|
|
|
+ for(Integral bb = 1; mask != 0; bb += bb) {
|
|
|
+ if(x & bb) {
|
|
|
+ res |= mask & (-mask);
|
|
|
+ }
|
|
|
+ mask &= (mask - 1);
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+//Parallel Bits Extract
|
|
|
+//x HGFEDCBA
|
|
|
+//mask 01100100
|
|
|
+//res 00000GFC
|
|
|
+//x86_64 BMI2: PEXT
|
|
|
+template <typename Integral>
|
|
|
+constexpr14 Integral extract_bits(Integral x, Integral mask) {
|
|
|
+ Integral res = 0;
|
|
|
+ for(Integral bb = 1; mask != 0; bb += bb) {
|
|
|
+ if(x & mask & -mask) {
|
|
|
+ res |= bb;
|
|
|
+ }
|
|
|
+ mask &= (mask - 1);
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+} //namespace std
|
|
|
+
|
|
|
+#endif
|