// Copyright 2023 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "hwy/base.h" #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/masked_arithmetic_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/nanobenchmark.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestUnsignedMinMax { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; const Vec v2 = Iota(d, hwy::Unpredictable1() + 1); const Vec v3 = Iota(d, hwy::Unpredictable1() + 2); const Vec v4 = Iota(d, hwy::Unpredictable1() + 3); const Vec k0 = Zero(d); const Vec vm = Set(d, LimitsMax()); using TI = MakeSigned; // For mask > 0 comparison const Rebind di; using VI = Vec; const size_t N = Lanes(d); auto bool_lanes = AllocateAligned(N); auto expected_min = AllocateAligned(N); auto expected_max = AllocateAligned(N); HWY_ASSERT(bool_lanes && expected_min && expected_max); // Ensure unsigned 0 < max. HWY_ASSERT_VEC_EQ(d, k0, MaskedMinOr(v2, MaskTrue(d), k0, vm)); HWY_ASSERT_VEC_EQ(d, k0, MaskedMinOr(v2, MaskTrue(d), vm, k0)); HWY_ASSERT_VEC_EQ(d, vm, MaskedMaxOr(v2, MaskTrue(d), k0, vm)); HWY_ASSERT_VEC_EQ(d, vm, MaskedMaxOr(v2, MaskTrue(d), vm, k0)); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { for (size_t i = 0; i < N; ++i) { bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); const T t2 = static_cast(AddWithWraparound(static_cast(i), 2)); const T t3 = static_cast(AddWithWraparound(static_cast(i), 3)); const T t4 = static_cast(AddWithWraparound(static_cast(i), 4)); if (bool_lanes[i]) { expected_min[i] = HWY_MIN(t3, t4); expected_max[i] = HWY_MAX(t3, t4); } else { expected_min[i] = expected_max[i] = t2; } } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); Print(di, "mi", mask_i); Print(d, "v2", v3); Print(d, "v3", v4); HWY_ASSERT_VEC_EQ(d, expected_min.get(), MaskedMinOr(v2, mask, v3, v4)); HWY_ASSERT_VEC_EQ(d, expected_min.get(), MaskedMinOr(v2, mask, v4, v3)); HWY_ASSERT_VEC_EQ(d, expected_max.get(), MaskedMaxOr(v2, mask, v3, v4)); HWY_ASSERT_VEC_EQ(d, expected_max.get(), MaskedMaxOr(v2, mask, v4, v3)); } } }; HWY_NOINLINE void TestAllUnsignedMinMax() { ForUnsignedTypes(ForPartialVectors()); } struct TestSignedMinMax { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; const Vec v2 = Iota(d, hwy::Unpredictable1() + 1); const Vec v3 = Iota(d, hwy::Unpredictable1() + 2); const Vec v4 = Iota(d, hwy::Unpredictable1() + 3); const Vec k0 = Zero(d); const Vec vm = Set(d, LowestValue()); using TI = MakeSigned; // For mask > 0 comparison const Rebind di; using VI = Vec; const size_t N = Lanes(d); auto bool_lanes = AllocateAligned(N); auto expected_min = AllocateAligned(N); auto expected_max = AllocateAligned(N); HWY_ASSERT(bool_lanes && expected_min && expected_max); // Ensure signed min < 0. HWY_ASSERT_VEC_EQ(d, vm, MaskedMinOr(v2, MaskTrue(d), k0, vm)); HWY_ASSERT_VEC_EQ(d, vm, MaskedMinOr(v2, MaskTrue(d), vm, k0)); HWY_ASSERT_VEC_EQ(d, k0, MaskedMaxOr(v2, MaskTrue(d), k0, vm)); HWY_ASSERT_VEC_EQ(d, k0, MaskedMaxOr(v2, MaskTrue(d), vm, k0)); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { for (size_t i = 0; i < N; ++i) { bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); const T t2 = AddWithWraparound(ConvertScalarTo(i), 2); const T t3 = AddWithWraparound(ConvertScalarTo(i), 3); const T t4 = AddWithWraparound(ConvertScalarTo(i), 4); if (bool_lanes[i]) { expected_min[i] = HWY_MIN(t3, t4); expected_max[i] = HWY_MAX(t3, t4); } else { expected_min[i] = expected_max[i] = t2; } } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); HWY_ASSERT_VEC_EQ(d, expected_min.get(), MaskedMinOr(v2, mask, v3, v4)); HWY_ASSERT_VEC_EQ(d, expected_min.get(), MaskedMinOr(v2, mask, v4, v3)); HWY_ASSERT_VEC_EQ(d, expected_max.get(), MaskedMaxOr(v2, mask, v3, v4)); HWY_ASSERT_VEC_EQ(d, expected_max.get(), MaskedMaxOr(v2, mask, v4, v3)); } } }; HWY_NOINLINE void TestAllSignedMinMax() { ForSignedTypes(ForPartialVectors()); ForFloatTypes(ForPartialVectors()); } struct TestAddSubMul { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; const Vec v2 = Iota(d, hwy::Unpredictable1() + 1); const Vec v3 = Iota(d, hwy::Unpredictable1() + 2); const Vec v4 = Iota(d, hwy::Unpredictable1() + 3); // So that we can subtract two iotas without resulting in a constant. const Vec tv4 = Add(v4, v4); // For range-limited (so mul does not overflow), non-constant inputs. // We cannot just And() because T might be floating-point. alignas(16) static const T mod_lanes[16] = { ConvertScalarTo(0), ConvertScalarTo(1), ConvertScalarTo(2), ConvertScalarTo(hwy::Unpredictable1() + 2)}; const Vec in_mul = LoadDup128(d, mod_lanes); using TI = MakeSigned; // For mask > 0 comparison const Rebind di; using VI = Vec; const size_t N = Lanes(d); auto bool_lanes = AllocateAligned(N); auto expected_add = AllocateAligned(N); auto expected_sub = AllocateAligned(N); auto expected_mul = AllocateAligned(N); HWY_ASSERT(bool_lanes && expected_add && expected_sub && expected_mul); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { for (size_t i = 0; i < N; ++i) { bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); if (bool_lanes[i]) { expected_add[i] = ConvertScalarTo(2 * i + 7); expected_sub[i] = ConvertScalarTo(i + 5); const size_t mod_i = i & ((16 / sizeof(T)) - 1); expected_mul[i] = ConvertScalarTo(mod_lanes[mod_i] * mod_lanes[mod_i]); } else { expected_add[i] = ConvertScalarTo(i + 2); expected_sub[i] = ConvertScalarTo(i + 2); expected_mul[i] = ConvertScalarTo(i + 2); } } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); HWY_ASSERT_VEC_EQ(d, expected_add.get(), MaskedAddOr(v2, mask, v3, v4)); HWY_ASSERT_VEC_EQ(d, expected_sub.get(), MaskedSubOr(v2, mask, tv4, v3)); HWY_ASSERT_VEC_EQ(d, expected_mul.get(), MaskedMulOr(v2, mask, in_mul, in_mul)); } } }; HWY_NOINLINE void TestAllAddSubMul() { ForAllTypes(ForPartialVectors()); } struct TestUnsignedSatAddSub { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; using TI = MakeSigned; // For mask > 0 comparison const Rebind di; using VI = Vec; const size_t N = Lanes(d); auto bool_lanes = AllocateAligned(N); HWY_ASSERT(bool_lanes); const Vec v2 = Iota(d, hwy::Unpredictable1() + 1); const Vec v0 = Zero(d); const Vec vi = Iota(d, 1); const Vec vm = Set(d, LimitsMax()); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { for (size_t i = 0; i < N; ++i) { bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); const Vec disabled_lane_val = Iota(d, 2); Vec expected_add = IfThenElse(mask, Set(d, static_cast(0)), disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, v0, v0)); expected_add = IfThenElse(mask, vi, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, v0, vi)); expected_add = IfThenElse(mask, Set(d, static_cast(LimitsMax())), disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, v0, vm)); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, vi, vm)); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, vm, vm)); Vec expected_sub = IfThenElse(mask, Set(d, static_cast(0)), disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, v0, v0)); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, v0, vi)); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, vi, vi)); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, vi, vm)); expected_sub = IfThenElse(mask, Sub(vm, vi), disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, vm, vi)); } } }; struct TestSignedSatAddSub { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; using TI = MakeSigned; // For mask > 0 comparison const Rebind di; using VI = Vec; const size_t N = Lanes(d); auto bool_lanes = AllocateAligned(N); HWY_ASSERT(bool_lanes); const Vec v2 = Iota(d, hwy::Unpredictable1() + 1); const Vec v0 = Zero(d); const Vec vpm = Set(d, LimitsMax()); const Vec vi = PositiveIota(d); const Vec vn = Sub(v0, vi); const Vec vnm = Set(d, LimitsMin()); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { for (size_t i = 0; i < N; ++i) { bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); const Vec disabled_lane_val = Iota(d, 2); Vec expected_add = IfThenElse(mask, v0, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, v0, v0)); expected_add = IfThenElse(mask, vi, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, v0, vi)); expected_add = IfThenElse(mask, vpm, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, v0, vpm)); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, vi, vpm)); HWY_ASSERT_VEC_EQ(d, expected_add, MaskedSatAddOr(v2, mask, vpm, vpm)); Vec expected_sub = IfThenElse(mask, v0, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, v0, v0)); expected_sub = IfThenElse(mask, Sub(v0, vi), disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, v0, vi)); expected_sub = IfThenElse(mask, vn, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, vn, v0)); expected_sub = IfThenElse(mask, vnm, disabled_lane_val); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, vnm, vi)); HWY_ASSERT_VEC_EQ(d, expected_sub, MaskedSatSubOr(v2, mask, vnm, vpm)); } } }; HWY_NOINLINE void TestAllSatAddSub() { ForU816(ForPartialVectors()); ForI816(ForPartialVectors()); } struct TestDiv { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; using TI = MakeSigned; // For mask > 0 comparison const Rebind di; using VI = Vec; // Wrap after 7 so that even float16_t can represent 1 << iota1. const VI iota1 = And(Iota(di, hwy::Unpredictable1()), Set(di, 7)); const Vec pows = ConvertTo(d, Shl(Set(di, 1), iota1)); const Vec no = ConvertTo(d, iota1); const size_t N = Lanes(d); auto bool_lanes = AllocateAligned(N); auto expected = AllocateAligned(N); HWY_ASSERT(bool_lanes && expected); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { ZeroBytes(expected.get(), N * sizeof(T)); for (size_t i = 0; i < N; ++i) { bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); const size_t iota1 = (i + 1) & 7; expected[i] = ConvertScalarTo(iota1); if (bool_lanes[i]) { expected[i] = ConvertScalarTo(static_cast(1 << iota1) / 2.0); } } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); const Vec div = Set(d, ConvertScalarTo(2)); HWY_ASSERT_VEC_EQ(d, expected.get(), MaskedDivOr(no, mask, pows, div)); } } }; HWY_NOINLINE void TestAllDiv() { ForFloatTypes(ForPartialVectors()); } struct TestIntegerDivMod { template static HWY_INLINE void DoSignedDivModTests( D d, const TFromD* HWY_RESTRICT expected_quot, const TFromD* HWY_RESTRICT expected_mod, const TFromD* HWY_RESTRICT neg_expected_quot, const TFromD* HWY_RESTRICT neg_expected_mod, Mask mask, Vec va, Vec vb) { using T = TFromD; const auto v1 = Set(d, static_cast(1)); const auto vneg1 = Set(d, static_cast(-1)); const auto neg_a = Neg(va); const auto neg_b = Neg(vb); HWY_ASSERT_VEC_EQ(d, neg_expected_quot, MaskedDivOr(vneg1, mask, neg_a, vb)); HWY_ASSERT_VEC_EQ(d, neg_expected_quot, MaskedDivOr(vneg1, mask, va, neg_b)); HWY_ASSERT_VEC_EQ(d, expected_quot, MaskedDivOr(v1, mask, neg_a, neg_b)); HWY_ASSERT_VEC_EQ(d, neg_expected_mod, MaskedModOr(neg_b, mask, neg_a, vb)); HWY_ASSERT_VEC_EQ(d, expected_mod, MaskedModOr(vb, mask, va, neg_b)); HWY_ASSERT_VEC_EQ(d, neg_expected_mod, MaskedModOr(neg_b, mask, neg_a, neg_b)); } template static HWY_INLINE void DoSignedDivModTests( D /*d*/, const TFromD* HWY_RESTRICT /*expected_quot*/, const TFromD* HWY_RESTRICT /*expected_mod*/, const TFromD* HWY_RESTRICT /*neg_expected_quot*/, const TFromD* HWY_RESTRICT /*neg_expected_mod*/, Mask /*mask*/, Vec /*va*/, Vec /*vb*/) {} template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; const auto v1 = Set(d, static_cast(1)); const auto vmax = Set(d, LimitsMax()); const auto vb = Max(And(Iota(d, static_cast(hwy::Unpredictable1() + 1)), Set(d, static_cast(LimitsMax() >> 1))), Set(d, static_cast(2))); const auto va = Max(And(Sub(vmax, Iota(d, static_cast(hwy::Unpredictable1() - 1))), vmax), Add(vb, vb)); using TI = MakeSigned; // For mask > 0 comparison using TU = MakeUnsigned; const Rebind di; using VI = Vec; const size_t N = Lanes(d); #if HWY_TARGET <= HWY_AVX3 && HWY_IS_MSAN // Workaround for MSAN bug on AVX3 if (sizeof(T) <= 2 && N >= 16) { return; } #endif auto bool_lanes = AllocateAligned(N); auto expected_quot = AllocateAligned(N); auto expected_mod = AllocateAligned(N); auto neg_expected_quot = AllocateAligned(N); auto neg_expected_mod = AllocateAligned(N); HWY_ASSERT(bool_lanes && expected_quot && expected_mod && neg_expected_quot && neg_expected_mod); // Each lane should have a chance of having mask=true. for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { for (size_t i = 0; i < N; ++i) { const auto a0 = static_cast((static_cast(LimitsMax()) - i) & LimitsMax()); const auto b0 = static_cast((i + 2u) & static_cast(LimitsMax() >> 1)); const auto b = static_cast(HWY_MAX(b0, 2)); const auto a = static_cast(HWY_MAX(a0, b + b)); bool_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0); if (bool_lanes[i]) { expected_quot[i] = static_cast(a / b); expected_mod[i] = static_cast(a % b); } else { expected_quot[i] = static_cast(1); expected_mod[i] = b; } neg_expected_quot[i] = static_cast(static_cast(0) - expected_quot[i]); neg_expected_mod[i] = static_cast(static_cast(0) - expected_mod[i]); } const VI mask_i = Load(di, bool_lanes.get()); const Mask mask = RebindMask(d, Gt(mask_i, Zero(di))); HWY_ASSERT_VEC_EQ(d, expected_quot.get(), MaskedDivOr(v1, mask, va, vb)); HWY_ASSERT_VEC_EQ(d, expected_mod.get(), MaskedModOr(vb, mask, va, vb)); DoSignedDivModTests(d, expected_quot.get(), expected_mod.get(), neg_expected_quot.get(), neg_expected_mod.get(), mask, va, vb); } } }; HWY_NOINLINE void TestAllIntegerDivMod() { ForIntegerTypes(ForPartialVectors()); } struct TestFloatExceptions { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const Vec v4 = Iota(d, hwy::Unpredictable1() + 3); const Mask m0 = MaskFalse(d); // No overflow const Vec inf = Inf(d); HWY_ASSERT_VEC_EQ(d, v4, MaskedAddOr(v4, m0, inf, inf)); HWY_ASSERT_VEC_EQ(d, v4, MaskedSubOr(v4, m0, Neg(inf), Neg(inf))); // No underflow const Vec eps = Set(d, Epsilon()); const Vec half = Set(d, static_cast(0.5f)); HWY_ASSERT_VEC_EQ(d, v4, MaskedMulOr(v4, m0, eps, half)); // Division by zero const Vec v0 = Set(d, ConvertScalarTo(0)); HWY_ASSERT_VEC_EQ(d, v4, MaskedDivOr(v4, m0, v4, v0)); } }; HWY_NOINLINE void TestAllFloatExceptions() { ForFloatTypes(ForPartialVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyMaskedArithmeticTest); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllUnsignedMinMax); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllSignedMinMax); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllAddSubMul); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllSatAddSub); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllDiv); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllIntegerDivMod); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllFloatExceptions); } // namespace hwy #endif