// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/table_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestTableLookupLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const RebindToSigned di; using TI = TFromD; #if HWY_TARGET != HWY_SCALAR const size_t N = Lanes(d); auto idx = AllocateAligned(N); auto expected = AllocateAligned(N); HWY_ASSERT(idx && expected); ZeroBytes(idx.get(), N * sizeof(TI)); const auto v = Iota(d, 1); if (N <= 8) { // Test all permutations for (size_t i0 = 0; i0 < N; ++i0) { idx[0] = static_cast(i0); for (size_t i1 = 0; i1 < N; ++i1) { if (N >= 2) idx[1] = static_cast(i1); for (size_t i2 = 0; i2 < N; ++i2) { if (N >= 4) idx[2] = static_cast(i2); for (size_t i3 = 0; i3 < N; ++i3) { if (N >= 4) idx[3] = static_cast(i3); for (size_t i = 0; i < N; ++i) { expected[i] = ConvertScalarTo(idx[i] + 1); // == v[idx[i]] } const auto opaque1 = IndicesFromVec(d, Load(di, idx.get())); const auto actual1 = TableLookupLanes(v, opaque1); HWY_ASSERT_VEC_EQ(d, expected.get(), actual1); const auto opaque2 = SetTableIndices(d, idx.get()); const auto actual2 = TableLookupLanes(v, opaque2); HWY_ASSERT_VEC_EQ(d, expected.get(), actual2); } } } } } else { // Too many permutations to test exhaustively; choose one with repeated // and cross-block indices and ensure indices do not exceed #lanes. // For larger vectors, upper lanes will be zero. HWY_ALIGN TI idx_source[16] = {1, 3, 2, 2, 8, 1, 7, 6, 15, 14, 14, 15, 4, 9, 8, 5}; for (size_t i = 0; i < N; ++i) { idx[i] = (i < 16) ? idx_source[i] : 0; // Avoid undefined results / asan error for scalar by capping indices. if (idx[i] >= static_cast(N)) { idx[i] = static_cast(N - 1); } expected[i] = ConvertScalarTo(idx[i] + 1); // == v[idx[i]] } const auto opaque1 = IndicesFromVec(d, Load(di, idx.get())); const auto actual1 = TableLookupLanes(v, opaque1); HWY_ASSERT_VEC_EQ(d, expected.get(), actual1); const auto opaque2 = SetTableIndices(d, idx.get()); const auto actual2 = TableLookupLanes(v, opaque2); HWY_ASSERT_VEC_EQ(d, expected.get(), actual2); } #else const TI index = 0; const auto v = Set(d, 1); const auto opaque1 = SetTableIndices(d, &index); HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque1)); const auto opaque2 = IndicesFromVec(d, Zero(di)); HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque2)); #endif } }; HWY_NOINLINE void TestAllTableLookupLanes() { ForAllTypes(ForPartialVectors()); } struct TestTwoTablesLookupLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const RebindToUnsigned du; using TU = TFromD; const size_t N = Lanes(d); const size_t twiceN = N * 2; auto idx = AllocateAligned(twiceN); auto expected = AllocateAligned(twiceN); HWY_ASSERT(idx && expected); ZeroBytes(idx.get(), twiceN * sizeof(TU)); const auto a = Iota(d, 1); const auto b = Add(a, Set(d, ConvertScalarTo(N))); if (twiceN <= 8) { // Test all permutations for (size_t i0 = 0; i0 < twiceN; ++i0) { idx[0] = static_cast(i0); for (size_t i1 = 0; i1 < twiceN; ++i1) { if (twiceN >= 2) idx[1] = static_cast(i1); for (size_t i2 = 0; i2 < twiceN; ++i2) { if (twiceN >= 4) idx[2] = static_cast(i2); for (size_t i3 = 0; i3 < twiceN; ++i3) { if (twiceN >= 4) idx[3] = static_cast(i3); for (size_t i = 0; i < twiceN; ++i) { expected[i] = ConvertScalarTo(idx[i] + 1); // == v[idx[i]] } const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get())); const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N)); const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a); const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b); HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a); HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b); const auto opaque2_a = SetTableIndices(d, idx.get()); const auto opaque2_b = SetTableIndices(d, idx.get() + N); const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a); const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b); HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a); HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b); } } } } } else { constexpr size_t kLanesPerBlock = 16 / sizeof(T); constexpr size_t kMaxBlockIdx = static_cast(LimitsMax()) >> 1; static_assert(kMaxBlockIdx > 0, "kMaxBlockIdx > 0 must be true"); const size_t num_of_blocks_per_vect = HWY_MAX(N / kLanesPerBlock, 1); const size_t num_of_blocks_to_check = HWY_MIN(num_of_blocks_per_vect * 2, kMaxBlockIdx); for (size_t i = 0; i < num_of_blocks_to_check; i++) { // Too many permutations to test exhaustively; choose one with repeated // and cross-block indices and ensure indices do not exceed #lanes. // For larger vectors, upper lanes will be zero. HWY_ALIGN TU idx_source[16] = {1, 3, 2, 2, 8, 1, 7, 6, 15, 14, 14, 15, 4, 9, 8, 5}; for (size_t j = 0; j < twiceN; ++j) { idx[j] = static_cast((i * kLanesPerBlock + idx_source[j & 15] + (j & static_cast(-16))) & (twiceN - 1)); expected[j] = ConvertScalarTo(idx[j] + 1); // == v[idx[j]] } const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get())); const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N)); const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a); const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b); HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a); HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b); const auto opaque2_a = SetTableIndices(d, idx.get()); const auto opaque2_b = SetTableIndices(d, idx.get() + N); const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a); const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b); HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a); HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b); } } } }; HWY_NOINLINE void TestAllTwoTablesLookupLanes() { ForAllTypes(ForPartialVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyTableTest); HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanes); HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanes); } // namespace hwy #endif