// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include // memset #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/slide_up_down_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { class TestSlideUpLanes { private: template static HWY_INLINE void DoTestSlideUpLanes(D d, TFromD* HWY_RESTRICT expected, const size_t N, const size_t slide_amt) { for (size_t i = 0; i < N; i++) { expected[i] = ConvertScalarTo>( (i >= slide_amt) ? (i - slide_amt + 1) : 0); } const auto v = Iota(d, 1); HWY_ASSERT_VEC_EQ(d, expected, SlideUpLanes(d, v, slide_amt)); if (slide_amt == 1) { HWY_ASSERT_VEC_EQ(d, expected, Slide1Up(d, v)); } } #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 template static HWY_NOINLINE void DoTestSlideUpLanesWithConstAmt_0_7( D d, TFromD* HWY_RESTRICT expected, const size_t N) { DoTestSlideUpLanes(d, expected, N, 0); if (N <= 1) return; DoTestSlideUpLanes(d, expected, N, 1); if (N <= 2) return; DoTestSlideUpLanes(d, expected, N, 2); DoTestSlideUpLanes(d, expected, N, 3); if (N <= 4) return; DoTestSlideUpLanes(d, expected, N, 4); DoTestSlideUpLanes(d, expected, N, 5); DoTestSlideUpLanes(d, expected, N, 6); DoTestSlideUpLanes(d, expected, N, 7); } template static HWY_INLINE void DoTestSlideUpLanesWithConstAmt_8_15( D /*d*/, TFromD* HWY_RESTRICT /*expected*/, const size_t /*N*/) {} template static HWY_NOINLINE void DoTestSlideUpLanesWithConstAmt_8_15( D d, TFromD* HWY_RESTRICT expected, const size_t N) { if (N <= 8) return; DoTestSlideUpLanes(d, expected, N, 8); DoTestSlideUpLanes(d, expected, N, 9); DoTestSlideUpLanes(d, expected, N, 10); DoTestSlideUpLanes(d, expected, N, 11); DoTestSlideUpLanes(d, expected, N, 12); DoTestSlideUpLanes(d, expected, N, 13); DoTestSlideUpLanes(d, expected, N, 14); DoTestSlideUpLanes(d, expected, N, 15); } #if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 template static HWY_INLINE void DoTestSlideUpLanesWithConstAmt_16_31( D /*d*/, TFromD* HWY_RESTRICT /*expected*/, const size_t /*N*/) {} template static HWY_NOINLINE void DoTestSlideUpLanesWithConstAmt_16_31( D d, TFromD* HWY_RESTRICT expected, const size_t N) { if (N <= 16) return; DoTestSlideUpLanes(d, expected, N, 16); DoTestSlideUpLanes(d, expected, N, 17); DoTestSlideUpLanes(d, expected, N, 18); DoTestSlideUpLanes(d, expected, N, 19); DoTestSlideUpLanes(d, expected, N, 20); DoTestSlideUpLanes(d, expected, N, 21); DoTestSlideUpLanes(d, expected, N, 22); DoTestSlideUpLanes(d, expected, N, 23); DoTestSlideUpLanes(d, expected, N, 24); DoTestSlideUpLanes(d, expected, N, 25); DoTestSlideUpLanes(d, expected, N, 26); DoTestSlideUpLanes(d, expected, N, 27); DoTestSlideUpLanes(d, expected, N, 28); DoTestSlideUpLanes(d, expected, N, 29); DoTestSlideUpLanes(d, expected, N, 30); DoTestSlideUpLanes(d, expected, N, 31); } #if HWY_TARGET <= HWY_AVX3 template static HWY_INLINE void DoTestSlideUpLanesWithConstAmt_32_63( D /*d*/, TFromD* HWY_RESTRICT /*expected*/, const size_t /*N*/) {} template static HWY_NOINLINE void DoTestSlideUpLanesWithConstAmt_32_63( D d, TFromD* HWY_RESTRICT expected, const size_t N) { if (N <= 32) return; DoTestSlideUpLanes(d, expected, N, 32); DoTestSlideUpLanes(d, expected, N, 33); DoTestSlideUpLanes(d, expected, N, 34); DoTestSlideUpLanes(d, expected, N, 35); DoTestSlideUpLanes(d, expected, N, 36); DoTestSlideUpLanes(d, expected, N, 37); DoTestSlideUpLanes(d, expected, N, 38); DoTestSlideUpLanes(d, expected, N, 39); DoTestSlideUpLanes(d, expected, N, 40); DoTestSlideUpLanes(d, expected, N, 41); DoTestSlideUpLanes(d, expected, N, 42); DoTestSlideUpLanes(d, expected, N, 43); DoTestSlideUpLanes(d, expected, N, 44); DoTestSlideUpLanes(d, expected, N, 45); DoTestSlideUpLanes(d, expected, N, 46); DoTestSlideUpLanes(d, expected, N, 47); DoTestSlideUpLanes(d, expected, N, 48); DoTestSlideUpLanes(d, expected, N, 49); DoTestSlideUpLanes(d, expected, N, 50); DoTestSlideUpLanes(d, expected, N, 51); DoTestSlideUpLanes(d, expected, N, 52); DoTestSlideUpLanes(d, expected, N, 53); DoTestSlideUpLanes(d, expected, N, 54); DoTestSlideUpLanes(d, expected, N, 55); DoTestSlideUpLanes(d, expected, N, 56); DoTestSlideUpLanes(d, expected, N, 57); DoTestSlideUpLanes(d, expected, N, 58); DoTestSlideUpLanes(d, expected, N, 59); DoTestSlideUpLanes(d, expected, N, 60); DoTestSlideUpLanes(d, expected, N, 61); DoTestSlideUpLanes(d, expected, N, 62); DoTestSlideUpLanes(d, expected, N, 63); } #endif // HWY_TARGET <= HWY_AVX3 #endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 #endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && // HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 public: template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto expected = AllocateAligned(N); HWY_ASSERT(expected); for (size_t i = 0; i < N; i++) { size_t slide_amt = i; #if !HWY_COMPILER_MSVC PreventElision(slide_amt); #endif DoTestSlideUpLanes(d, expected.get(), N, slide_amt); } #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 DoTestSlideUpLanesWithConstAmt_0_7(d, expected.get(), N); DoTestSlideUpLanesWithConstAmt_8_15(d, expected.get(), N); #if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 DoTestSlideUpLanesWithConstAmt_16_31(d, expected.get(), N); #if HWY_TARGET <= HWY_AVX3 DoTestSlideUpLanesWithConstAmt_32_63(d, expected.get(), N); #endif // HWY_TARGET <= HWY_AVX3 #endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 #endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && // HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 } }; HWY_NOINLINE void TestAllSlideUpLanes() { ForAllTypes(ForPartialVectors()); } #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 // DoTestSlideDownLanes needs to be inlined on targets where // DoTestSlideDownLanesWithConstAmt_0_7, DoTestSlideDownLanesWithConstAmt_8_15, // DoTestSlideDownLanesWithConstAmt_16_31, and // DoTestSlideDownLanesWithConstAmt_32_63 are called since the implementation // of SlideDownLanes(d, v, N) for the SSE2/SSSE3/SSE4/AVX2/AVX3/NEON/WASM // targets has an optimized path for the case where __builtin_constant_p(N) is // true (or in other words, when N is known to be a constant) when compiled with // GCC or Clang and optimizations are enabled. // If DoTestSlideDownLanes is not inlined on the // SSE2/SSSE3/SSE4/AVX2/AVX3/NEON/WASM targets, // DoTestSlideDownLanesWithConstAmt_0_7, DoTestSlideDownLanesWithConstAmt_8_15, // DoTestSlideDownLanesWithConstAmt_16_31, and // DoTestSlideDownLanesWithConstAmt_32_63 will fail to throughly test the // implementations of SlideDownLanes(d, v, N) in optimized builds compiled with // GCC or Clang for the case where N is known to be a constant. #define HWY_SLIDE_DOWN_TEST_INLINE HWY_INLINE #else // DoTestSlideDownLanes should not be inlined on RVV targets to work around RVV // miscompilation. #define HWY_SLIDE_DOWN_TEST_INLINE HWY_NOINLINE #endif class TestSlideDownLanes { private: // HWY_SLIDE_DOWN_TEST_INLINE is required here to work around RVV // miscompilation. template static HWY_SLIDE_DOWN_TEST_INLINE void DoTestSlideDownLanes( D d, TFromD* HWY_RESTRICT expected, const size_t N, const size_t slide_amt) { for (size_t i = 0; i < N; i++) { const size_t src_idx = slide_amt + i; expected[i] = ConvertScalarTo>((src_idx < N) ? src_idx : 0); } const Vec v = Iota(d, 0); HWY_ASSERT_VEC_EQ(d, expected, SlideDownLanes(d, v, slide_amt)); if (slide_amt == 1) { HWY_ASSERT_VEC_EQ(d, expected, Slide1Down(d, v)); } } #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 template static HWY_NOINLINE void DoTestSlideDownLanesWithConstAmt_0_7( D d, TFromD* HWY_RESTRICT expected, const size_t N) { DoTestSlideDownLanes(d, expected, N, 0); if (N <= 1) return; DoTestSlideDownLanes(d, expected, N, 1); if (N <= 2) return; DoTestSlideDownLanes(d, expected, N, 2); DoTestSlideDownLanes(d, expected, N, 3); if (N <= 4) return; DoTestSlideDownLanes(d, expected, N, 4); DoTestSlideDownLanes(d, expected, N, 5); DoTestSlideDownLanes(d, expected, N, 6); DoTestSlideDownLanes(d, expected, N, 7); } template static HWY_INLINE void DoTestSlideDownLanesWithConstAmt_8_15( D /*d*/, TFromD* HWY_RESTRICT /*expected*/, const size_t /*N*/) {} template static HWY_NOINLINE void DoTestSlideDownLanesWithConstAmt_8_15( D d, TFromD* HWY_RESTRICT expected, const size_t N) { if (N <= 8) return; DoTestSlideDownLanes(d, expected, N, 8); DoTestSlideDownLanes(d, expected, N, 9); DoTestSlideDownLanes(d, expected, N, 10); DoTestSlideDownLanes(d, expected, N, 11); DoTestSlideDownLanes(d, expected, N, 12); DoTestSlideDownLanes(d, expected, N, 13); DoTestSlideDownLanes(d, expected, N, 14); DoTestSlideDownLanes(d, expected, N, 15); } #if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 template static HWY_INLINE void DoTestSlideDownLanesWithConstAmt_16_31( D /*d*/, TFromD* HWY_RESTRICT /*expected*/, const size_t /*N*/) {} template static HWY_NOINLINE void DoTestSlideDownLanesWithConstAmt_16_31( D d, TFromD* HWY_RESTRICT expected, const size_t N) { if (N <= 16) return; DoTestSlideDownLanes(d, expected, N, 16); DoTestSlideDownLanes(d, expected, N, 17); DoTestSlideDownLanes(d, expected, N, 18); DoTestSlideDownLanes(d, expected, N, 19); DoTestSlideDownLanes(d, expected, N, 20); DoTestSlideDownLanes(d, expected, N, 21); DoTestSlideDownLanes(d, expected, N, 22); DoTestSlideDownLanes(d, expected, N, 23); DoTestSlideDownLanes(d, expected, N, 24); DoTestSlideDownLanes(d, expected, N, 25); DoTestSlideDownLanes(d, expected, N, 26); DoTestSlideDownLanes(d, expected, N, 27); DoTestSlideDownLanes(d, expected, N, 28); DoTestSlideDownLanes(d, expected, N, 29); DoTestSlideDownLanes(d, expected, N, 30); DoTestSlideDownLanes(d, expected, N, 31); } #if HWY_TARGET <= HWY_AVX3 template static HWY_INLINE void DoTestSlideDownLanesWithConstAmt_32_63( D /*d*/, TFromD* HWY_RESTRICT /*expected*/, const size_t /*N*/) {} template static HWY_NOINLINE void DoTestSlideDownLanesWithConstAmt_32_63( D d, TFromD* HWY_RESTRICT expected, const size_t N) { if (N <= 32) return; DoTestSlideDownLanes(d, expected, N, 32); DoTestSlideDownLanes(d, expected, N, 33); DoTestSlideDownLanes(d, expected, N, 34); DoTestSlideDownLanes(d, expected, N, 35); DoTestSlideDownLanes(d, expected, N, 36); DoTestSlideDownLanes(d, expected, N, 37); DoTestSlideDownLanes(d, expected, N, 38); DoTestSlideDownLanes(d, expected, N, 39); DoTestSlideDownLanes(d, expected, N, 40); DoTestSlideDownLanes(d, expected, N, 41); DoTestSlideDownLanes(d, expected, N, 42); DoTestSlideDownLanes(d, expected, N, 43); DoTestSlideDownLanes(d, expected, N, 44); DoTestSlideDownLanes(d, expected, N, 45); DoTestSlideDownLanes(d, expected, N, 46); DoTestSlideDownLanes(d, expected, N, 47); DoTestSlideDownLanes(d, expected, N, 48); DoTestSlideDownLanes(d, expected, N, 49); DoTestSlideDownLanes(d, expected, N, 50); DoTestSlideDownLanes(d, expected, N, 51); DoTestSlideDownLanes(d, expected, N, 52); DoTestSlideDownLanes(d, expected, N, 53); DoTestSlideDownLanes(d, expected, N, 54); DoTestSlideDownLanes(d, expected, N, 55); DoTestSlideDownLanes(d, expected, N, 56); DoTestSlideDownLanes(d, expected, N, 57); DoTestSlideDownLanes(d, expected, N, 58); DoTestSlideDownLanes(d, expected, N, 59); DoTestSlideDownLanes(d, expected, N, 60); DoTestSlideDownLanes(d, expected, N, 61); DoTestSlideDownLanes(d, expected, N, 62); DoTestSlideDownLanes(d, expected, N, 63); } #endif // HWY_TARGET <= HWY_AVX3 #endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 #endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && // HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 public: template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto expected = AllocateAligned(N); HWY_ASSERT(expected); for (size_t i = 0; i < N; i++) { size_t slide_amt = i; #if !HWY_COMPILER_MSVC PreventElision(slide_amt); #endif DoTestSlideDownLanes(d, expected.get(), N, slide_amt); } #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 DoTestSlideDownLanesWithConstAmt_0_7(d, expected.get(), N); DoTestSlideDownLanesWithConstAmt_8_15(d, expected.get(), N); #if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 DoTestSlideDownLanesWithConstAmt_16_31(d, expected.get(), N); #if HWY_TARGET <= HWY_AVX3 DoTestSlideDownLanesWithConstAmt_32_63(d, expected.get(), N); #endif // HWY_TARGET <= HWY_AVX3 #endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256 #endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && // HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 } }; #undef HWY_SLIDE_DOWN_TEST_INLINE HWY_NOINLINE void TestAllSlideDownLanes() { ForAllTypes(ForPartialVectors()); } struct TestSlide1 { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto iota0 = Iota(d, 0); const auto iota1 = Iota(d, 1); const auto expected_slide_down_result = IfThenElseZero(FirstN(d, Lanes(d) - 1), iota1); HWY_ASSERT_VEC_EQ(d, iota0, Slide1Up(d, iota1)); HWY_ASSERT_VEC_EQ(d, expected_slide_down_result, Slide1Down(d, iota0)); } }; HWY_NOINLINE void TestAllSlide1() { ForAllTypes(ForPartialVectors()); } class TestSlideBlocks { private: template static HWY_INLINE void DoTestSlideByKBlocks(D d) { using T = TFromD; constexpr size_t kLanesPerBlock = 16 / sizeof(T); constexpr size_t kLanesToSlide = static_cast(kBlocks) * kLanesPerBlock; const auto iota_0 = Iota(d, 0); const auto iota_k = Iota(d, kLanesToSlide); const auto first_k_lanes_mask = FirstN(d, kLanesToSlide); const auto expected_slide_up_result = IfThenZeroElse(first_k_lanes_mask, iota_0); HWY_ASSERT_VEC_EQ(d, expected_slide_up_result, SlideUpBlocks(d, iota_k)); const RebindToUnsigned du; using TU = TFromD; const auto slide_down_result_mask = BitCast( d, Reverse(du, IfThenZeroElse(RebindMask(du, first_k_lanes_mask), Set(du, hwy::LimitsMax())))); const auto expected_slide_down_result = And(slide_down_result_mask, iota_k); HWY_ASSERT_VEC_EQ(d, expected_slide_down_result, SlideDownBlocks(d, iota_0)); } #if HWY_MAX_BYTES >= 32 template static HWY_INLINE void DoTestSlideBy1Block(D /*d*/, size_t /*N*/) {} template static HWY_INLINE void DoTestSlideBy1Block(D d, size_t N) { if (N < (32 / sizeof(TFromD))) return; DoTestSlideByKBlocks<1>(d); } #if HWY_MAX_BYTES >= 64 template static HWY_INLINE void DoTestSlideBy2And3Blocks(D /*d*/, size_t /*N*/) {} template static HWY_INLINE void DoTestSlideBy2And3Blocks(D d, size_t N) { if (N < (64 / sizeof(TFromD))) return; DoTestSlideByKBlocks<2>(d); DoTestSlideByKBlocks<3>(d); } #endif // HWY_MAX_BYTES >= 64 #endif // HWY_MAX_BYTES >= 32 public: template HWY_NOINLINE void operator()(T /*unused*/, D d) { DoTestSlideByKBlocks<0>(d); #if HWY_MAX_BYTES >= 32 const size_t N = Lanes(d); DoTestSlideBy1Block(d, N); #if HWY_MAX_BYTES >= 64 DoTestSlideBy2And3Blocks(d, N); #endif // HWY_MAX_BYTES >= 64 #endif // HWY_MAX_BYTES >= 32 } }; HWY_NOINLINE void TestAllSlideBlocks() { ForAllTypes(ForPartialVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwySlideUpDownTest); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideUpLanes); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideDownLanes); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlide1); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideBlocks); } // namespace hwy #endif