// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/shuffle4_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { class TestPer4LaneBlockShuffle { private: template static HWY_INLINE VFromD InterleaveMaskVectors(D /*d*/, VFromD a, VFromD /*b*/) { return a; } #if HWY_TARGET != HWY_SCALAR template static HWY_INLINE VFromD InterleaveMaskVectors(D d, VFromD a, VFromD b) { return InterleaveLower(d, a, b); } #endif template static HWY_INLINE Mask Per4LaneBlockShufValidMask(D d, const size_t N, const size_t idx1, const size_t idx0) { if (N < 4) { const RebindToSigned di; using TI = TFromD; const auto lane_0_valid = Set(di, static_cast(-static_cast(idx0 < N))); if (N > 1) { const auto lane_1_valid = Set(di, static_cast(-static_cast(idx1 < N))); return RebindMask(d, MaskFromVec(InterleaveMaskVectors(di, lane_0_valid, lane_1_valid))); } return RebindMask(d, MaskFromVec(lane_0_valid)); } return FirstN(d, N); } // TODO(b/287462770): inline to work around incorrect SVE codegen template static HWY_INLINE void DoCheckPer4LaneBlkShufResult( D d, const size_t N, VFromD actual, const TFromD* HWY_RESTRICT src_lanes, TFromD* HWY_RESTRICT expected, size_t idx3, size_t idx2, size_t idx1, size_t idx0) { for (size_t i = 0; i < N; i += 4) { expected[i] = src_lanes[i + idx0]; expected[i + 1] = src_lanes[i + idx1]; expected[i + 2] = src_lanes[i + idx2]; expected[i + 3] = src_lanes[i + idx3]; } if (N < 4) { if (idx0 >= N) expected[0] = TFromD{0}; if (idx1 >= N) expected[1] = TFromD{0}; } const auto valid_lanes_mask = Per4LaneBlockShufValidMask(d, N, idx1, idx0); HWY_ASSERT_VEC_EQ(d, expected, IfThenElseZero(valid_lanes_mask, actual)); } #if HWY_TARGET != HWY_SCALAR template static HWY_NOINLINE void TestTblLookupPer4LaneBlkShuf( D d, const size_t N, const TFromD* HWY_RESTRICT src_lanes, TFromD* HWY_RESTRICT expected) { const auto v = Load(d, src_lanes); for (size_t idx3210 = 0; idx3210 <= 0xFF; idx3210++) { const size_t idx3 = (idx3210 >> 6) & 3; const size_t idx2 = (idx3210 >> 4) & 3; const size_t idx1 = (idx3210 >> 2) & 3; const size_t idx0 = idx3210 & 3; const auto actual = detail::TblLookupPer4LaneBlkShuf(v, idx3210); DoCheckPer4LaneBlkShufResult(d, N, actual, src_lanes, expected, idx3, idx2, idx1, idx0); } } #endif template static HWY_INLINE void DoTestPer4LaneBlkShuffle( D d, const size_t N, const VFromD v, const TFromD* HWY_RESTRICT src_lanes, TFromD* HWY_RESTRICT expected) { const auto actual = Per4LaneBlockShuffle(v); DoCheckPer4LaneBlkShufResult(d, N, actual, src_lanes, expected, kIdx3, kIdx2, kIdx1, kIdx0); } template static HWY_NOINLINE void DoTestPer4LaneBlkShuffles( D d, const size_t N, const VecArg> v, TFromD* HWY_RESTRICT src_lanes, TFromD* HWY_RESTRICT expected) { Store(v, d, src_lanes); #if HWY_TARGET != HWY_SCALAR TestTblLookupPer4LaneBlkShuf(d, N, src_lanes, expected); #endif DoTestPer4LaneBlkShuffle<0, 1, 2, 3>(d, N, v, src_lanes, expected); #if !HWY_COMPILER_MSVC // speed up MSVC builds DoTestPer4LaneBlkShuffle<0, 1, 3, 2>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<0, 2, 3, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<0, 3, 0, 2>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<1, 0, 1, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<1, 0, 3, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<1, 0, 3, 2>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<1, 2, 0, 3>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<1, 2, 1, 3>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<1, 1, 0, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<2, 0, 1, 3>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<2, 0, 2, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<2, 1, 2, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<2, 2, 0, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<2, 3, 0, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<2, 3, 3, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 0, 2, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 1, 0, 3>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 1, 3, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 2, 1, 0>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 2, 3, 2>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 3, 0, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 3, 1, 1>(d, N, v, src_lanes, expected); DoTestPer4LaneBlkShuffle<3, 3, 2, 2>(d, N, v, src_lanes, expected); #endif } template static HWY_INLINE Vec GenerateTestVect(hwy::NonFloatTag /*tag*/, D d) { const RebindToUnsigned du; using TU = TFromD; constexpr TU kIotaStart = static_cast(0x0706050403020101u & LimitsMax()); return BitCast(d, Iota(du, kIotaStart)); } template static HWY_INLINE Vec GenerateTestVect(hwy::FloatTag /*tag*/, D d) { const RebindToUnsigned du; using T = TFromD; using TU = TFromD; constexpr size_t kNumOfBitsInT = sizeof(T) * 8; constexpr TU kIntBitsMask = (kNumOfBitsInT > 16) ? static_cast(static_cast(~TU{0}) >> 16) : TU{0}; const auto flt_iota = Set(d, 1); if (kIntBitsMask == 0) return flt_iota; const auto int_iota = And(GenerateTestVect(hwy::NonFloatTag(), du), Set(du, kIntBitsMask)); return Or(flt_iota, BitCast(d, int_iota)); } public: template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); const size_t alloc_len = static_cast((N + 3) & (~size_t{3})); HWY_ASSERT(alloc_len >= 4); auto expected = AllocateAligned(alloc_len); auto src_lanes = AllocateAligned(alloc_len); HWY_ASSERT(expected && src_lanes); const T k0 = ConvertScalarTo(0); expected[alloc_len - 4] = k0; expected[alloc_len - 3] = k0; expected[alloc_len - 2] = k0; expected[alloc_len - 1] = k0; src_lanes[alloc_len - 4] = k0; src_lanes[alloc_len - 3] = k0; src_lanes[alloc_len - 2] = k0; src_lanes[alloc_len - 1] = k0; const auto v = GenerateTestVect(hwy::IsFloatTag(), d); DoTestPer4LaneBlkShuffles(d, N, v, src_lanes.get(), expected.get()); const RebindToUnsigned du; using TU = TFromD; const auto msb_mask = BitCast(d, Set(du, static_cast(TU{1} << (sizeof(TU) * 8 - 1)))); DoTestPer4LaneBlkShuffles(d, N, Xor(v, msb_mask), src_lanes.get(), expected.get()); } }; HWY_NOINLINE void TestAllPer4LaneBlockShuffle() { ForAllTypes(ForPartialFixedOrFullScalableVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyShuffle4Test); HWY_EXPORT_AND_TEST_P(HwyShuffle4Test, TestAllPer4LaneBlockShuffle); } // namespace hwy #endif