// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/count_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestPopulationCount { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; size_t N = Lanes(d); auto data = AllocateAligned(N); auto popcnt = AllocateAligned(N); HWY_ASSERT(data && popcnt); for (size_t i = 0; i < AdjustedReps(1 << 18) / N; i++) { for (size_t j = 0; j < N; j++) { data[j] = static_cast(rng()); popcnt[j] = static_cast(PopCount(data[j])); } HWY_ASSERT_VEC_EQ(d, popcnt.get(), PopulationCount(Load(d, data.get()))); } } }; HWY_NOINLINE void TestAllPopulationCount() { ForUnsignedTypes(ForPartialVectors()); } template static HWY_INLINE T LeadingZeroCountOfValue(T val) { const uint32_t u32_val = static_cast(val); return static_cast(u32_val ? Num0BitsAboveMS1Bit_Nonzero32(u32_val) : 32); } template static HWY_INLINE T LeadingZeroCountOfValue(T val) { const uint64_t u64_val = static_cast(val); return static_cast(u64_val ? Num0BitsAboveMS1Bit_Nonzero64(u64_val) : 64); } template static HWY_INLINE T LeadingZeroCountOfValue(T val) { using TU = MakeUnsigned; constexpr uint32_t kNumOfExtraLeadingZeros{32 - (sizeof(T) * 8)}; return static_cast( LeadingZeroCountOfValue(static_cast(static_cast(val))) - kNumOfExtraLeadingZeros); } struct TestLeadingZeroCount { template HWY_ATTR_NO_MSAN HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; using TU = MakeUnsigned; const RebindToUnsigned du; size_t N = Lanes(d); auto data = AllocateAligned(N); auto lzcnt = AllocateAligned(N); constexpr T kNumOfBitsInT = static_cast(sizeof(T) * 8); for (size_t j = 0; j < N; j++) { lzcnt[j] = kNumOfBitsInT; } HWY_ASSERT_VEC_EQ(d, lzcnt.get(), LeadingZeroCount(Zero(d))); for (size_t j = 0; j < N; j++) { lzcnt[j] = static_cast(kNumOfBitsInT - 1); } HWY_ASSERT_VEC_EQ(d, lzcnt.get(), LeadingZeroCount(Set(d, static_cast(1)))); for (size_t j = 0; j < N; j++) { lzcnt[j] = static_cast(kNumOfBitsInT - 2); } HWY_ASSERT_VEC_EQ(d, lzcnt.get(), LeadingZeroCount(Set(d, static_cast(2)))); for (size_t j = 0; j < N; j++) { lzcnt[j] = static_cast(0); } HWY_ASSERT_VEC_EQ( d, lzcnt.get(), LeadingZeroCount(BitCast(d, Set(du, TU{1} << (kNumOfBitsInT - 1))))); for (size_t j = 0; j < N; j++) { lzcnt[j] = static_cast(1); } HWY_ASSERT_VEC_EQ( d, lzcnt.get(), LeadingZeroCount(Set(d, static_cast(1) << (kNumOfBitsInT - 2)))); for (size_t j = 0; j < N; j++) { lzcnt[j] = static_cast(kNumOfBitsInT - 5); } HWY_ASSERT_VEC_EQ(d, lzcnt.get(), LeadingZeroCount(Set(d, static_cast(0x1D)))); for (size_t i = 0; i < AdjustedReps(1000); i++) { for (size_t j = 0; j < N; j++) { data[j] = static_cast(rng()); lzcnt[j] = LeadingZeroCountOfValue(data[j]); } HWY_ASSERT_VEC_EQ(d, lzcnt.get(), LeadingZeroCount(Load(d, data.get()))); } } }; HWY_NOINLINE void TestAllLeadingZeroCount() { ForIntegerTypes(ForPartialVectors()); } template static HWY_INLINE T TrailingZeroCountOfValue(T val) { using TU = MakeUnsigned; constexpr size_t kNumOfBitsInT = sizeof(T) * 8; const uint32_t u32_val = static_cast(static_cast(val)); return static_cast(u32_val ? Num0BitsBelowLS1Bit_Nonzero32(u32_val) : kNumOfBitsInT); } template static HWY_INLINE T TrailingZeroCountOfValue(T val) { const uint64_t u64_val = static_cast(val); return static_cast(u64_val ? Num0BitsBelowLS1Bit_Nonzero64(u64_val) : 64); } struct TestTrailingZeroCount { template HWY_ATTR_NO_MSAN HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; using TU = MakeUnsigned; const RebindToUnsigned du; size_t N = Lanes(d); auto data = AllocateAligned(N); auto tzcnt = AllocateAligned(N); constexpr T kNumOfBitsInT = static_cast(sizeof(T) * 8); for (size_t j = 0; j < N; j++) { tzcnt[j] = kNumOfBitsInT; } HWY_ASSERT_VEC_EQ(d, tzcnt.get(), TrailingZeroCount(Zero(d))); for (size_t j = 0; j < N; j++) { tzcnt[j] = static_cast(0); } HWY_ASSERT_VEC_EQ(d, tzcnt.get(), TrailingZeroCount(Set(d, static_cast(1)))); for (size_t j = 0; j < N; j++) { tzcnt[j] = static_cast(1); } HWY_ASSERT_VEC_EQ(d, tzcnt.get(), TrailingZeroCount(Set(d, static_cast(2)))); for (size_t j = 0; j < N; j++) { tzcnt[j] = static_cast(kNumOfBitsInT - 1); } HWY_ASSERT_VEC_EQ( d, tzcnt.get(), TrailingZeroCount(BitCast(d, Set(du, TU{1} << (kNumOfBitsInT - 1))))); for (size_t j = 0; j < N; j++) { tzcnt[j] = static_cast(kNumOfBitsInT - 2); } HWY_ASSERT_VEC_EQ( d, tzcnt.get(), TrailingZeroCount(Set(d, static_cast(1) << (kNumOfBitsInT - 2)))); for (size_t j = 0; j < N; j++) { tzcnt[j] = static_cast(3); } HWY_ASSERT_VEC_EQ(d, tzcnt.get(), TrailingZeroCount(Set(d, static_cast(0x68)))); for (size_t i = 0; i < AdjustedReps(1000); i++) { for (size_t j = 0; j < N; j++) { data[j] = static_cast(rng()); tzcnt[j] = TrailingZeroCountOfValue(data[j]); } HWY_ASSERT_VEC_EQ(d, tzcnt.get(), TrailingZeroCount(Load(d, data.get()))); } } }; HWY_NOINLINE void TestAllTrailingZeroCount() { ForIntegerTypes(ForPartialVectors()); } class TestHighestSetBitIndex { private: template static HWY_INLINE V NormalizedHighestSetBitIndex(V v) { const DFromV d; const RebindToSigned di; const auto hsb_idx = BitCast(di, HighestSetBitIndex(v)); return BitCast(d, Or(BroadcastSignBit(hsb_idx), hsb_idx)); } public: template HWY_ATTR_NO_MSAN HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; using TU = MakeUnsigned; const RebindToUnsigned du; size_t N = Lanes(d); auto data = AllocateAligned(N); auto hsb_index = AllocateAligned(N); constexpr T kNumOfBitsInT = static_cast(sizeof(T) * 8); constexpr T kMsbIdx = static_cast(kNumOfBitsInT - 1); for (size_t j = 0; j < N; j++) { hsb_index[j] = static_cast(-1); } HWY_ASSERT_VEC_EQ(d, hsb_index.get(), NormalizedHighestSetBitIndex(Zero(d))); for (size_t j = 0; j < N; j++) { hsb_index[j] = static_cast(0); } HWY_ASSERT_VEC_EQ(d, hsb_index.get(), NormalizedHighestSetBitIndex(Set(d, static_cast(1)))); for (size_t j = 0; j < N; j++) { hsb_index[j] = static_cast(1); } HWY_ASSERT_VEC_EQ(d, hsb_index.get(), NormalizedHighestSetBitIndex(Set(d, static_cast(3)))); for (size_t j = 0; j < N; j++) { hsb_index[j] = static_cast(kNumOfBitsInT - 1); } HWY_ASSERT_VEC_EQ(d, hsb_index.get(), NormalizedHighestSetBitIndex( BitCast(d, Set(du, TU{1} << (kNumOfBitsInT - 1))))); for (size_t j = 0; j < N; j++) { hsb_index[j] = static_cast(kNumOfBitsInT - 2); } HWY_ASSERT_VEC_EQ(d, hsb_index.get(), NormalizedHighestSetBitIndex( Set(d, static_cast(1) << (kNumOfBitsInT - 2)))); for (size_t j = 0; j < N; j++) { hsb_index[j] = static_cast(5); } HWY_ASSERT_VEC_EQ( d, hsb_index.get(), NormalizedHighestSetBitIndex(Set(d, static_cast(0x2B)))); for (size_t i = 0; i < AdjustedReps(1000); i++) { for (size_t j = 0; j < N; j++) { data[j] = static_cast(rng()); hsb_index[j] = static_cast(kMsbIdx - LeadingZeroCountOfValue(data[j])); } HWY_ASSERT_VEC_EQ(d, hsb_index.get(), NormalizedHighestSetBitIndex(Load(d, data.get()))); } } }; HWY_NOINLINE void TestAllHighestSetBitIndex() { ForIntegerTypes(ForPartialVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyCountTest); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllPopulationCount); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllLeadingZeroCount); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllTrailingZeroCount); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllHighestSetBitIndex); } // namespace hwy #endif