/// /// @file popcnt.hpp /// @brief Functions to count the number of 1 bits inside /// an array or a 64-bit word. /// /// Copyright (C) 2021 Kim Walisch, /// /// This file is distributed under the BSD License. See the COPYING /// file in the top level directory. /// #ifndef POPCNT_HPP #define POPCNT_HPP #include #if defined(__has_include) #define HAS_INCLUDE(header) __has_include(header) #else // If the __has_include() macro does not exist // we assume that the header file exists. #define HAS_INCLUDE(header) 1 #endif #if !defined(__has_builtin) #define __has_builtin(x) 0 #endif // GCC & Clang #if defined(__GNUC__) || \ __has_builtin(__builtin_popcountl) namespace { inline uint64_t popcnt64(uint64_t x) { #if __cplusplus >= 201703L if constexpr(sizeof(int) >= sizeof(uint64_t)) return (uint64_t) __builtin_popcount(x); else if constexpr(sizeof(long) >= sizeof(uint64_t)) return (uint64_t) __builtin_popcountl(x); else if constexpr(sizeof(long long) >= sizeof(uint64_t)) return (uint64_t) __builtin_popcountll(x); #else return (uint64_t) __builtin_popcountll(x); #endif } } // namespace #elif defined(_MSC_VER) && \ !defined(DISABLE_POPCNT) && \ defined(_M_X64) && \ HAS_INCLUDE() #include namespace { inline uint64_t popcnt64(uint64_t x) { return __popcnt64(x); } } // namespace #elif defined(_MSC_VER) && \ !defined(DISABLE_POPCNT) && \ defined(_M_IX86) && \ HAS_INCLUDE() #include namespace { inline uint64_t popcnt64(uint64_t x) { return __popcnt((uint32_t) x) + __popcnt((uint32_t)(x >> 32)); } } // namespace #elif __cplusplus >= 202002L #include namespace { /// Ideally we would like to std::popcount instead of the /// intrinsics above. However std::popcount is from C++20 and /// not yet widely supported. Also the assembly generated by /// std::popcount is not as good as __builtin_popcountll on /// x64 with the Clang compiler. This will likely be fixed in /// a few years. /// inline uint64_t popcnt64(uint64_t x) { return std::popcount(x); } } // namespace #elif defined(DISABLE_POPCNT) namespace { /// This uses fewer arithmetic operations than any other known /// implementation on machines with fast multiplication. /// It uses 12 arithmetic operations, one of which is a multiply. /// https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation /// inline uint64_t popcnt64(uint64_t x) { uint64_t m1 = 0x5555555555555555ull; uint64_t m2 = 0x3333333333333333ull; uint64_t m4 = 0x0F0F0F0F0F0F0F0Full; uint64_t h01 = 0x0101010101010101ull; x -= (x >> 1) & m1; x = (x & m2) + ((x >> 2) & m2); x = (x + (x >> 4)) & m4; return (x * h01) >> 56; } } // namespace #endif #endif // POPCNT_HPP