// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "hwy/base.h" #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/swizzle_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestGetLane { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Iota(d, 1); HWY_ASSERT_EQ(T(1), GetLane(v)); } }; HWY_NOINLINE void TestAllGetLane() { ForAllTypes(ForPartialVectors()); } struct TestExtractLane { #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 template , 1)> static HWY_INLINE void DoTestExtractLaneWithConstAmt_0_7(D /*d*/, Vec v) { HWY_ASSERT_EQ(ConvertScalarTo>(1), ExtractLane(v, 0)); } template , 2)> static HWY_INLINE void DoTestExtractLaneWithConstAmt_0_7(D /*d*/, Vec v) { HWY_ASSERT_EQ(ConvertScalarTo>(1), ExtractLane(v, 0)); HWY_ASSERT_EQ(ConvertScalarTo>(2), ExtractLane(v, 1)); } template , 4)> static HWY_INLINE void DoTestExtractLaneWithConstAmt_0_7(D /*d*/, Vec v) { HWY_ASSERT_EQ(ConvertScalarTo>(1), ExtractLane(v, 0)); HWY_ASSERT_EQ(ConvertScalarTo>(2), ExtractLane(v, 1)); HWY_ASSERT_EQ(ConvertScalarTo>(3), ExtractLane(v, 2)); HWY_ASSERT_EQ(ConvertScalarTo>(4), ExtractLane(v, 3)); } template , 4)> static HWY_INLINE void DoTestExtractLaneWithConstAmt_0_7(D /*d*/, Vec v) { HWY_ASSERT_EQ(ConvertScalarTo>(1), ExtractLane(v, 0)); HWY_ASSERT_EQ(ConvertScalarTo>(2), ExtractLane(v, 1)); HWY_ASSERT_EQ(ConvertScalarTo>(3), ExtractLane(v, 2)); HWY_ASSERT_EQ(ConvertScalarTo>(4), ExtractLane(v, 3)); HWY_ASSERT_EQ(ConvertScalarTo>(5), ExtractLane(v, 4)); HWY_ASSERT_EQ(ConvertScalarTo>(6), ExtractLane(v, 5)); HWY_ASSERT_EQ(ConvertScalarTo>(7), ExtractLane(v, 6)); HWY_ASSERT_EQ(ConvertScalarTo>(8), ExtractLane(v, 7)); } template , 8)> static HWY_INLINE void DoTestExtractLaneWithConstAmt_8_15(D /*d*/, Vec /*v*/) {} template , 8)> static HWY_INLINE void DoTestExtractLaneWithConstAmt_8_15(D /*d*/, Vec v) { HWY_ASSERT_EQ(ConvertScalarTo>(9), ExtractLane(v, 8)); HWY_ASSERT_EQ(ConvertScalarTo>(10), ExtractLane(v, 9)); HWY_ASSERT_EQ(ConvertScalarTo>(11), ExtractLane(v, 10)); HWY_ASSERT_EQ(ConvertScalarTo>(12), ExtractLane(v, 11)); HWY_ASSERT_EQ(ConvertScalarTo>(13), ExtractLane(v, 12)); HWY_ASSERT_EQ(ConvertScalarTo>(14), ExtractLane(v, 13)); HWY_ASSERT_EQ(ConvertScalarTo>(15), ExtractLane(v, 14)); HWY_ASSERT_EQ(ConvertScalarTo>(16), ExtractLane(v, 15)); } #endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && // HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Iota(d, 1); #if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \ HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 DoTestExtractLaneWithConstAmt_0_7(d, v); DoTestExtractLaneWithConstAmt_8_15(d, v); #endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && // HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256 for (size_t i = 0; i < Lanes(d); ++i) { const T actual = ExtractLane(v, i); HWY_ASSERT_EQ(ConvertScalarTo(i + 1), actual); } } }; HWY_NOINLINE void TestAllExtractLane() { ForAllTypes(ForPartialVectors()); } struct TestInsertLane { #if !HWY_HAVE_SCALABLE template , 1)> static HWY_INLINE void DoTestInsertLaneWithConstAmt_0_7( D d, TFromD* HWY_RESTRICT lanes) { using T = TFromD; lanes[0] = ConvertScalarTo(1); Vec v = InsertLane(Zero(d), 0, ConvertScalarTo(1)); HWY_ASSERT_VEC_EQ(d, lanes, v); } template , 2)> static HWY_INLINE void DoTestInsertLaneWithConstAmt_0_7( D d, TFromD* HWY_RESTRICT lanes) { using T = TFromD; lanes[0] = ConvertScalarTo(1); Vec v = InsertLane(Zero(d), 0, ConvertScalarTo(1)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[1] = ConvertScalarTo(2); v = InsertLane(v, 1, ConvertScalarTo(2)); HWY_ASSERT_VEC_EQ(d, lanes, v); } template , 4)> static HWY_INLINE void DoTestInsertLaneWithConstAmt_0_7( D d, TFromD* HWY_RESTRICT lanes) { using T = TFromD; lanes[0] = ConvertScalarTo(1); Vec v = InsertLane(Zero(d), 0, ConvertScalarTo(1)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[1] = ConvertScalarTo(2); v = InsertLane(v, 1, ConvertScalarTo(2)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[2] = ConvertScalarTo(3); v = InsertLane(v, 2, ConvertScalarTo(3)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[3] = ConvertScalarTo(4); v = InsertLane(v, 3, ConvertScalarTo(4)); HWY_ASSERT_VEC_EQ(d, lanes, v); } template , 4)> static HWY_INLINE void DoTestInsertLaneWithConstAmt_0_7( D d, TFromD* HWY_RESTRICT lanes) { using T = TFromD; lanes[0] = ConvertScalarTo(1); Vec v = InsertLane(Zero(d), 0, ConvertScalarTo(1)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[1] = ConvertScalarTo(2); v = InsertLane(v, 1, ConvertScalarTo(2)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[2] = ConvertScalarTo(3); v = InsertLane(v, 2, ConvertScalarTo(3)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[3] = ConvertScalarTo(4); v = InsertLane(v, 3, ConvertScalarTo(4)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[4] = ConvertScalarTo(5); v = InsertLane(v, 4, ConvertScalarTo(5)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[5] = ConvertScalarTo(6); v = InsertLane(v, 5, ConvertScalarTo(6)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[6] = ConvertScalarTo(7); v = InsertLane(v, 6, ConvertScalarTo(7)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[7] = ConvertScalarTo(8); v = InsertLane(v, 7, ConvertScalarTo(8)); HWY_ASSERT_VEC_EQ(d, lanes, v); } template , 8)> static HWY_INLINE void DoTestInsertLaneWithConstAmt_8_15( D, TFromD* HWY_RESTRICT) {} template , 8)> static HWY_INLINE void DoTestInsertLaneWithConstAmt_8_15( D d, TFromD* HWY_RESTRICT lanes) { using T = TFromD; Vec v = Load(d, lanes); lanes[8] = ConvertScalarTo(9); v = InsertLane(v, 8, ConvertScalarTo(9)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[9] = ConvertScalarTo(10); v = InsertLane(v, 9, ConvertScalarTo(10)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[10] = ConvertScalarTo(11); v = InsertLane(v, 10, ConvertScalarTo(11)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[11] = ConvertScalarTo(12); v = InsertLane(v, 11, ConvertScalarTo(12)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[12] = ConvertScalarTo(13); v = InsertLane(v, 12, ConvertScalarTo(13)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[13] = ConvertScalarTo(14); v = InsertLane(v, 13, ConvertScalarTo(14)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[14] = ConvertScalarTo(15); v = InsertLane(v, 14, ConvertScalarTo(15)); HWY_ASSERT_VEC_EQ(d, lanes, v); lanes[15] = ConvertScalarTo(16); v = InsertLane(v, 15, ConvertScalarTo(16)); HWY_ASSERT_VEC_EQ(d, lanes, v); } template static HWY_INLINE void DoTestInsertLaneWithConstAmt( D d, TFromD* HWY_RESTRICT lanes) { DoTestInsertLaneWithConstAmt_0_7(d, lanes); DoTestInsertLaneWithConstAmt_8_15(d, lanes); Store(Zero(d), d, lanes); } template static HWY_INLINE void DoTestInsertLaneWithConstAmt(D, TFromD* HWY_RESTRICT) { } #endif // !HWY_HAVE_SCALABLE template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const V v = IotaForSpecial(d, 1); const size_t N = Lanes(d); auto lanes = AllocateAligned(N); HWY_ASSERT(lanes); Store(Zero(d), d, lanes.get()); #if !HWY_HAVE_SCALABLE DoTestInsertLaneWithConstAmt(d, lanes.get()); #endif // TODO(janwas): file compiler bug report #if HWY_COMPILER_CLANG && (HWY_COMPILER_CLANG < 1800) && HWY_ARCH_ARM if (IsSpecialFloat()) return; #endif V v2 = Zero(d); for (size_t i = 0; i < N; ++i) { lanes[i] = ConvertScalarTo(i + 1); v2 = InsertLane(v2, i, ConvertScalarTo(i + 1)); HWY_ASSERT_VEC_EQ(d, lanes.get(), v2); } HWY_ASSERT_VEC_EQ(d, v, v2); for (size_t i = 0; i < N; ++i) { lanes[i] = ConvertScalarTo(0); const V v3 = Load(d, lanes.get()); const V actual = InsertLane(v3, i, ConvertScalarTo(i + 1)); HWY_ASSERT_VEC_EQ(d, v, actual); lanes[i] = ConvertScalarTo(i + 1); // restore lane i } } }; HWY_NOINLINE void TestAllInsertLane() { ForAllTypesAndSpecial(ForPartialVectors()); } struct TestDupEven { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto expected = AllocateAligned(N); HWY_ASSERT(expected); for (size_t i = 0; i < N; ++i) { expected[i] = ConvertScalarTo((i & ~size_t{1}) + 1); } HWY_ASSERT_VEC_EQ(d, expected.get(), DupEven(Iota(d, 1))); } }; HWY_NOINLINE void TestAllDupEven() { ForAllTypes(ForShrinkableVectors()); } struct TestDupOdd { template HWY_NOINLINE void operator()(T /*unused*/, D d) { #if HWY_TARGET != HWY_SCALAR const size_t N = Lanes(d); auto expected = AllocateAligned(N); HWY_ASSERT(expected); for (size_t i = 0; i < N; ++i) { expected[i] = ConvertScalarTo((i & ~size_t{1}) + 2); } HWY_ASSERT_VEC_EQ(d, expected.get(), DupOdd(Iota(d, 1))); #else (void)d; #endif } }; HWY_NOINLINE void TestAllDupOdd() { ForAllTypes(ForShrinkableVectors()); } struct TestOddEven { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); const auto even = Iota(d, 1); const auto odd = Iota(d, 1 + N); auto expected = AllocateAligned(N); HWY_ASSERT(expected); for (size_t i = 0; i < N; ++i) { expected[i] = ConvertScalarTo(1 + i + ((i & 1) ? N : 0)); } HWY_ASSERT_VEC_EQ(d, expected.get(), OddEven(odd, even)); } }; HWY_NOINLINE void TestAllOddEven() { ForAllTypes(ForShrinkableVectors()); } class TestBroadcastLane { private: template (kLane))> static HWY_INLINE void DoTestBroadcastLane(D d, const size_t N) { using T = TFromD; // kLane < HWY_MAX_LANES_D(D) is true if (kLane >= N) return; const Vec expected = Set(d, ConvertScalarTo(kLane + 1)); const BlockDFromD d_block; static_assert(d_block.MaxLanes() <= d.MaxLanes(), "d_block.MaxLanes() <= d.MaxLanes() must be true"); constexpr size_t kLanesPer16ByteBlk = 16 / sizeof(T); constexpr int kBlockIdx = kLane / static_cast(kLanesPer16ByteBlk); constexpr int kLaneInBlkIdx = kLane & static_cast(kLanesPer16ByteBlk - 1); const Vec v = Iota(d, 1); const Vec actual = BroadcastLane(v); const Vec actual_block = ExtractBlock(Broadcast(v)); HWY_ASSERT_VEC_EQ(d, expected, actual); HWY_ASSERT_VEC_EQ(d_block, ResizeBitCast(d_block, expected), actual_block); } template (kLane))> static HWY_INLINE void DoTestBroadcastLane(D /*d*/, const size_t /*N*/) { // If kLane >= HWY_MAX_LANES_D(D) is true, do nothing } public: template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto N = Lanes(d); DoTestBroadcastLane<0>(d, N); DoTestBroadcastLane<1>(d, N); DoTestBroadcastLane<2>(d, N); DoTestBroadcastLane<3>(d, N); DoTestBroadcastLane<6>(d, N); DoTestBroadcastLane<14>(d, N); DoTestBroadcastLane<29>(d, N); DoTestBroadcastLane<53>(d, N); DoTestBroadcastLane<115>(d, N); DoTestBroadcastLane<251>(d, N); DoTestBroadcastLane<257>(d, N); } }; HWY_NOINLINE void TestAllBroadcastLane() { ForAllTypes(ForPartialFixedOrFullScalableVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwySwizzleTest); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllGetLane); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllExtractLane); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllInsertLane); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllDupEven); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllDupOdd); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllOddEven); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllBroadcastLane); } // namespace hwy #endif