// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/minmax_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestUnsignedMinMax { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v0 = Zero(d); // Leave headroom such that v1 < v2 even after wraparound. const auto mod = And(Iota(d, 0), Set(d, LimitsMax() >> 1)); const auto v1 = Add(mod, Set(d, static_cast(1))); const auto v2 = Add(mod, Set(d, static_cast(2))); HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2)); HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2)); HWY_ASSERT_VEC_EQ(d, v0, Min(v1, v0)); HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v0)); const auto vmin = Set(d, LimitsMin()); const auto vmax = Set(d, LimitsMax()); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin)); HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax)); HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin)); } }; struct TestSignedMinMax { template HWY_NOINLINE void operator()(T /*unused*/, D d) { // Leave headroom such that v1 < v2 even after wraparound. const auto mod = And(Iota(d, 0), Set(d, ConvertScalarTo(LimitsMax() >> 1))); const auto v1 = Add(mod, Set(d, ConvertScalarTo(1))); const auto v2 = Add(mod, Set(d, ConvertScalarTo(2))); const auto v_neg = Sub(Zero(d), v1); HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2)); HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2)); HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg)); HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg)); const auto v0 = Zero(d); const auto vmin = Set(d, LimitsMin()); const auto vmax = Set(d, LimitsMax()); HWY_ASSERT_VEC_EQ(d, vmin, Min(v0, vmin)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, v0)); HWY_ASSERT_VEC_EQ(d, v0, Max(v0, vmin)); HWY_ASSERT_VEC_EQ(d, v0, Max(vmin, v0)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin)); HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax)); HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin)); } }; struct TestFloatMinMax { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v1 = Iota(d, 1); const auto v2 = Iota(d, 2); const auto v_neg = Iota(d, -ConvertScalarTo(Lanes(d))); HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2)); HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2)); HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg)); HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg)); const auto v0 = Zero(d); const auto vmin = Set(d, ConvertScalarTo(-1E30)); const auto vmax = Set(d, ConvertScalarTo(1E30)); HWY_ASSERT_VEC_EQ(d, vmin, Min(v0, vmin)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, v0)); HWY_ASSERT_VEC_EQ(d, v0, Max(v0, vmin)); HWY_ASSERT_VEC_EQ(d, v0, Max(vmin, v0)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax)); HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin)); HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax)); HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin)); } }; HWY_NOINLINE void TestAllMinMax() { ForUnsignedTypes(ForPartialVectors()); ForSignedTypes(ForPartialVectors()); ForFloatTypes(ForPartialVectors()); } template static HWY_NOINLINE Vec Make128(D d, uint64_t hi, uint64_t lo) { alignas(16) uint64_t in[2]; in[0] = lo; in[1] = hi; return LoadDup128(d, in); } struct TestMinMax128 { template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const size_t N = Lanes(d); auto a_lanes = AllocateAligned(N); auto b_lanes = AllocateAligned(N); auto min_lanes = AllocateAligned(N); auto max_lanes = AllocateAligned(N); RandomState rng; const V v00 = Zero(d); const V v01 = Make128(d, 0, 1); const V v10 = Make128(d, 1, 0); const V v11 = Add(v01, v10); // Same arg HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v00)); HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v01)); HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v10)); HWY_ASSERT_VEC_EQ(d, v11, Min128(d, v11, v11)); HWY_ASSERT_VEC_EQ(d, v00, Max128(d, v00, v00)); HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v01)); HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v10)); HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v11)); // First arg less HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v01)); HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v10)); HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v11)); HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v00, v01)); HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v01, v10)); HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v10, v11)); // Second arg less HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v01, v00)); HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v10, v01)); HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v11, v10)); HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v00)); HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v01)); HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v10)); // Also check 128-bit blocks are independent for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { for (size_t i = 0; i < N; ++i) { a_lanes[i] = Random64(&rng); b_lanes[i] = Random64(&rng); } const V a = Load(d, a_lanes.get()); const V b = Load(d, b_lanes.get()); for (size_t i = 0; i < N; i += 2) { const bool lt = a_lanes[i + 1] == b_lanes[i + 1] ? (a_lanes[i] < b_lanes[i]) : (a_lanes[i + 1] < b_lanes[i + 1]); min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0]; min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1]; max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0]; max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1]; } HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128(d, a, b)); HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128(d, a, b)); } } }; HWY_NOINLINE void TestAllMinMax128() { ForGEVectors<128, TestMinMax128>()(uint64_t()); } struct TestMinMax128Upper { template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const size_t N = Lanes(d); auto a_lanes = AllocateAligned(N); auto b_lanes = AllocateAligned(N); auto min_lanes = AllocateAligned(N); auto max_lanes = AllocateAligned(N); RandomState rng; const V v00 = Zero(d); const V v01 = Make128(d, 0, 1); const V v10 = Make128(d, 1, 0); const V v11 = Add(v01, v10); // Same arg HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v00, v00)); HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v01)); HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v10, v10)); HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v11, v11)); HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v00, v00)); HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v01, v01)); HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v10)); HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v11, v11)); // Equivalent but not equal (chooses second arg) HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v00, v01)); HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v10, v11)); HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v01, v00)); HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v11, v10)); HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v01, v00)); HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v11, v10)); HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v00, v01)); HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v10, v11)); // First arg less HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v10)); HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v01, v10)); // Second arg less HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v10, v01)); HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v01)); // Also check 128-bit blocks are independent for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { for (size_t i = 0; i < N; ++i) { a_lanes[i] = Random64(&rng); b_lanes[i] = Random64(&rng); } const V a = Load(d, a_lanes.get()); const V b = Load(d, b_lanes.get()); for (size_t i = 0; i < N; i += 2) { const bool lt = a_lanes[i + 1] < b_lanes[i + 1]; min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0]; min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1]; max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0]; max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1]; } HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128Upper(d, a, b)); HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128Upper(d, a, b)); } } }; HWY_NOINLINE void TestAllMinMax128Upper() { ForGEVectors<128, TestMinMax128Upper>()(uint64_t()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyMinMaxTest); HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax); HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax128); HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax128Upper); } // namespace hwy #endif