// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/compare_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { // All types. struct TestEquality { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v2 = Iota(d, 2); const auto v2b = Iota(d, 2); const auto v3 = Iota(d, 3); const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, mask_false, Eq(v2, v3)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq(v3, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq(v2, v2b)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne(v2, v3)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne(v3, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne(v2, v2b)); } }; HWY_NOINLINE void TestAllEquality() { ForAllTypes(ForPartialVectors()); } // a > b should be true, verify that for Gt/Lt and with swapped args. template void EnsureGreater(D d, TFromD a, TFromD b, const char* file, int line) { const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); const auto va = Set(d, a); const auto vb = Set(d, b); AssertMaskEqual(d, mask_true, Gt(va, vb), file, line); AssertMaskEqual(d, mask_false, Lt(va, vb), file, line); // Swapped order AssertMaskEqual(d, mask_false, Gt(vb, va), file, line); AssertMaskEqual(d, mask_true, Lt(vb, va), file, line); // Also ensure irreflexive AssertMaskEqual(d, mask_false, Gt(va, va), file, line); AssertMaskEqual(d, mask_false, Gt(vb, vb), file, line); AssertMaskEqual(d, mask_false, Lt(va, va), file, line); AssertMaskEqual(d, mask_false, Lt(vb, vb), file, line); } #define HWY_ENSURE_GREATER(d, a, b) EnsureGreater(d, a, b, __FILE__, __LINE__) // a >= b should be true, verify that for Ge/Le and with swapped args. template void EnsureGreaterOrEqual(D d, TFromD a, TFromD b, const char* file, int line) { const auto mask_true = MaskTrue(d); const auto va = Set(d, a); const auto vb = Set(d, b); const auto mask_eq = Eq(va, vb); AssertMaskEqual(d, mask_true, Ge(va, vb), file, line); AssertMaskEqual(d, mask_eq, Le(va, vb), file, line); // Swapped order AssertMaskEqual(d, mask_eq, Ge(vb, va), file, line); AssertMaskEqual(d, mask_true, Le(vb, va), file, line); // va >= va, vb >= vb, va <= va, and vb <= vb should all be true if // both a and b are non-NaN values AssertMaskEqual(d, mask_true, Ge(va, va), file, line); AssertMaskEqual(d, mask_true, Ge(vb, vb), file, line); AssertMaskEqual(d, mask_true, Le(va, va), file, line); AssertMaskEqual(d, mask_true, Le(vb, vb), file, line); } #define HWY_ENSURE_GREATER_OR_EQUAL(d, a, b) \ EnsureGreaterOrEqual(d, a, b, __FILE__, __LINE__) struct TestStrictUnsigned { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const T max = LimitsMax(); const Vec v0 = Zero(d); const Vec v2 = And(Iota(d, 2), Set(d, 255)); // 0..255 const Mask mask_false = MaskFalse(d); // Individual values of interest HWY_ENSURE_GREATER(d, 2, 1); HWY_ENSURE_GREATER(d, 1, 0); HWY_ENSURE_GREATER(d, 128, 127); HWY_ENSURE_GREATER(d, max, max / 2); HWY_ENSURE_GREATER(d, max, 1); HWY_ENSURE_GREATER(d, max, 0); // Also use Iota to ensure lanes are independent HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2)); } }; HWY_NOINLINE void TestAllStrictUnsigned() { ForUnsignedTypes(ForPartialVectors()); } struct TestWeakUnsigned { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const T max = LimitsMax(); const Vec v0 = Zero(d); const Vec v1 = Set(d, 1u); const Vec v2 = And(Iota(d, 2), Set(d, 255u)); // 0..255 const Mask mask_true = MaskTrue(d); // Individual values of interest HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 2); HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 1); HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 1); HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 0); HWY_ENSURE_GREATER_OR_EQUAL(d, 0, 0); HWY_ENSURE_GREATER_OR_EQUAL(d, 128, 127); HWY_ENSURE_GREATER_OR_EQUAL(d, 128, 128); HWY_ENSURE_GREATER_OR_EQUAL(d, 127, 127); HWY_ENSURE_GREATER_OR_EQUAL(d, max, max); HWY_ENSURE_GREATER_OR_EQUAL(d, max, max / 2); HWY_ENSURE_GREATER_OR_EQUAL(d, max, 1); HWY_ENSURE_GREATER_OR_EQUAL(d, max, 0); // Also use Iota to ensure lanes are independent const auto mask_v2_is_eq_to_v0 = Eq(v2, v0); HWY_ASSERT_MASK_EQ(d, mask_v2_is_eq_to_v0, Le(v2, v0)); HWY_ASSERT_MASK_EQ(d, mask_v2_is_eq_to_v0, Ge(v0, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2)); const auto v2_plus_1 = Add(v2, v1); HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_plus_1), Le(v2, v2_plus_1)); HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_plus_1), Ge(v2, v2_plus_1)); HWY_ASSERT_MASK_EQ(d, Lt(v2_plus_1, v2), Le(v2_plus_1, v2)); HWY_ASSERT_MASK_EQ(d, Gt(v2_plus_1, v2), Ge(v2_plus_1, v2)); const auto v2_minus_1 = Sub(v2, v1); HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_minus_1), Le(v2, v2_minus_1)); HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_minus_1), Ge(v2, v2_minus_1)); HWY_ASSERT_MASK_EQ(d, Lt(v2_minus_1, v2), Le(v2_minus_1, v2)); HWY_ASSERT_MASK_EQ(d, Gt(v2_minus_1, v2), Ge(v2_minus_1, v2)); } }; HWY_NOINLINE void TestAllWeakUnsigned() { ForUnsignedTypes(ForPartialVectors()); } struct TestStrictInt { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const T min = LimitsMin(); const T max = LimitsMax(); const Vec v0 = Zero(d); const Vec v2 = And(Iota(d, 2), Set(d, 127)); // 0..127 const Vec vn = Sub(Neg(v2), Set(d, 1)); // -1..-128 const Mask mask_false = MaskFalse(d); const Mask mask_true = MaskTrue(d); // Individual values of interest HWY_ENSURE_GREATER(d, 2, 1); HWY_ENSURE_GREATER(d, 1, 0); HWY_ENSURE_GREATER(d, 0, -1); HWY_ENSURE_GREATER(d, -1, -2); HWY_ENSURE_GREATER(d, max, max / 2); HWY_ENSURE_GREATER(d, max, 1); HWY_ENSURE_GREATER(d, max, 0); HWY_ENSURE_GREATER(d, max, -1); HWY_ENSURE_GREATER(d, max, min); HWY_ENSURE_GREATER(d, 0, min); HWY_ENSURE_GREATER(d, min / 2, min); // Also use Iota to ensure lanes are independent HWY_ASSERT_MASK_EQ(d, mask_true, Gt(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(vn, vn)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, vn)); } }; // S-SSE3 bug (#795): same upper, differing MSB in lower struct TestStrictInt64 { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto m0 = MaskFalse(d); const auto m1 = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, m0, Lt(Set(d, 0x380000000LL), Set(d, 0x300000001LL))); HWY_ASSERT_MASK_EQ(d, m1, Lt(Set(d, 0xF00000000LL), Set(d, 0xF80000000LL))); HWY_ASSERT_MASK_EQ(d, m1, Lt(Set(d, 0xF00000000LL), Set(d, 0xF80000001LL))); } }; HWY_NOINLINE void TestAllStrictInt() { ForSignedTypes(ForPartialVectors()); ForPartialVectors()(int64_t()); } struct TestWeakInt { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const T min = LimitsMin(); const T max = LimitsMax(); const Vec v0 = Zero(d); const Vec v1 = Set(d, 1); const Vec v2 = And(Iota(d, 2), Set(d, 127)); // 0..127 const Vec vn = Sub(Neg(v2), Set(d, 1)); // -1..-128 const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); // Individual values of interest HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 2); HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 1); HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 1); HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 0); HWY_ENSURE_GREATER_OR_EQUAL(d, 0, 0); HWY_ENSURE_GREATER_OR_EQUAL(d, 0, -1); HWY_ENSURE_GREATER_OR_EQUAL(d, -1, -1); HWY_ENSURE_GREATER_OR_EQUAL(d, -1, -2); HWY_ENSURE_GREATER_OR_EQUAL(d, -2, -2); HWY_ENSURE_GREATER_OR_EQUAL(d, max, max); HWY_ENSURE_GREATER_OR_EQUAL(d, max, max / 2); HWY_ENSURE_GREATER_OR_EQUAL(d, max, 1); HWY_ENSURE_GREATER_OR_EQUAL(d, max, 0); HWY_ENSURE_GREATER_OR_EQUAL(d, max, -1); HWY_ENSURE_GREATER_OR_EQUAL(d, max, min); HWY_ENSURE_GREATER_OR_EQUAL(d, 0, min); HWY_ENSURE_GREATER_OR_EQUAL(d, min / 2, min); HWY_ENSURE_GREATER_OR_EQUAL(d, min, min); // Also use Iota to ensure lanes are independent HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Le(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_false, Ge(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, vn)); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(vn, vn)); const auto v2_plus_1 = Add(v2, v1); HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_plus_1), Le(v2, v2_plus_1)); HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_plus_1), Ge(v2, v2_plus_1)); HWY_ASSERT_MASK_EQ(d, Lt(v2_plus_1, v2), Le(v2_plus_1, v2)); HWY_ASSERT_MASK_EQ(d, Gt(v2_plus_1, v2), Ge(v2_plus_1, v2)); const auto v2_minus_1 = Sub(v2, v1); HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_minus_1), Le(v2, v2_minus_1)); HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_minus_1), Ge(v2, v2_minus_1)); HWY_ASSERT_MASK_EQ(d, Lt(v2_minus_1, v2), Le(v2_minus_1, v2)); HWY_ASSERT_MASK_EQ(d, Gt(v2_minus_1, v2), Ge(v2_minus_1, v2)); } }; HWY_NOINLINE void TestAllWeakInt() { ForSignedTypes(ForPartialVectors()); } struct TestStrictFloat { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const T huge_pos = ConvertScalarTo(sizeof(T) >= 4 ? 1E36 : 1E4); const T huge_neg = -huge_pos; const Vec v0 = Zero(d); const Vec v2 = Iota(d, 2); const Vec vn = Neg(v2); const Mask mask_false = MaskFalse(d); const Mask mask_true = MaskTrue(d); // Individual values of interest HWY_ENSURE_GREATER(d, 2, 1); HWY_ENSURE_GREATER(d, 1, 0); HWY_ENSURE_GREATER(d, 0, -1); HWY_ENSURE_GREATER(d, -1, -2); HWY_ENSURE_GREATER(d, huge_pos, 1); HWY_ENSURE_GREATER(d, huge_pos, 0); HWY_ENSURE_GREATER(d, huge_pos, -1); HWY_ENSURE_GREATER(d, huge_pos, huge_neg); HWY_ENSURE_GREATER(d, 0, huge_neg); // Also use Iota to ensure lanes are independent HWY_ASSERT_MASK_EQ(d, mask_true, Gt(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt(vn, vn)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, vn)); } }; HWY_NOINLINE void TestAllStrictFloat() { ForFloatTypes(ForPartialVectors()); } struct TestWeakFloat { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const Vec v2 = Iota(d, 2); const Vec vn = Iota(d, -ConvertScalarTo(Lanes(d))); const Mask mask_false = MaskFalse(d); const Mask mask_true = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, vn)); HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, v2)); HWY_ASSERT_MASK_EQ(d, mask_false, Le(v2, vn)); HWY_ASSERT_MASK_EQ(d, mask_false, Ge(vn, v2)); } }; HWY_NOINLINE void TestAllWeakFloat() { ForFloatTypes(ForPartialVectors()); } template static HWY_NOINLINE Vec Make128(D d, uint64_t hi, uint64_t lo) { alignas(16) uint64_t in[2]; in[0] = lo; in[1] = hi; return LoadDup128(d, in); } struct TestLt128 { template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const V v00 = Zero(d); const V v01 = Make128(d, 0, 1); const V v10 = Make128(d, 1, 0); const V v11 = Add(v01, v10); const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v00, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v01, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v10, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v00, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v01, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v01, v11)); // Reversed order HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v01, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v10, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v11, v01)); // Also check 128-bit blocks are independent const V iota = Iota(d, 1); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, iota, Add(iota, v01))); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, iota, Add(iota, v10))); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, Add(iota, v01), iota)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, Add(iota, v10), iota)); // Max value const V vm = Make128(d, LimitsMax(), LimitsMax()); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v11)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v00, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v01, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v10, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v11, vm)); } }; HWY_NOINLINE void TestAllLt128() { ForGEVectors<128, TestLt128>()(uint64_t()); } struct TestLt128Upper { template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const V v00 = Zero(d); const V v01 = Make128(d, 0, 1); const V v10 = Make128(d, 1, 0); const V v11 = Add(v01, v10); const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v00, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v01, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v10, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v00, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v01, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v01, v11)); // Reversed order HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v01, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v10, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v11, v01)); // Also check 128-bit blocks are independent const V iota = Iota(d, 1); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, iota, Add(iota, v01))); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, iota, Add(iota, v10))); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, Add(iota, v01), iota)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, Add(iota, v10), iota)); // Max value const V vm = Make128(d, LimitsMax(), LimitsMax()); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v11)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v00, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v01, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v10, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v11, vm)); } }; HWY_NOINLINE void TestAllLt128Upper() { ForGEVectors<128, TestLt128Upper>()(uint64_t()); } struct TestEq128 { // Also Ne128 template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const V v00 = Zero(d); const V v01 = Make128(d, 0, 1); const V v10 = Make128(d, 1, 0); const V v11 = Add(v01, v10); const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, v00, v00)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, v01, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, v10, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, v00, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, v01, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, v10, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v00, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, v11)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v00, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, v11)); // Reversed order HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v10, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v11, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, v00)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v10, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v11, v01)); // Also check 128-bit blocks are independent const V iota = Iota(d, 1); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, iota, Add(iota, v01))); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, iota, Add(iota, v10))); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, Add(iota, v01), iota)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, Add(iota, v10), iota)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, iota, Add(iota, v01))); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, iota, Add(iota, v10))); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, Add(iota, v01), iota)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, Add(iota, v10), iota)); // Max value const V vm = Make128(d, LimitsMax(), LimitsMax()); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, vm, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, vm, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v11)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v00, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v10, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v11, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v00)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v11)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v00, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v10, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v11, vm)); } }; HWY_NOINLINE void TestAllEq128() { ForGEVectors<128, TestEq128>()(uint64_t()); } struct TestEq128Upper { // Also Ne128Upper template HWY_NOINLINE void operator()(T /*unused*/, D d) { using V = Vec; const V v00 = Zero(d); const V v01 = Make128(d, 0, 1); const V v10 = Make128(d, 1, 0); const V v11 = Add(v01, v10); const auto mask_false = MaskFalse(d); const auto mask_true = MaskTrue(d); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v00, v00)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v01, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v10, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v00, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v01, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v10, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v00, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v00, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v01, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v01, v11)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v01, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v01, v11)); // Reversed order HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v01, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v01, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v10, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v11, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v10, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v11, v01)); // Also check 128-bit blocks are independent const V iota = Iota(d, 1); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, iota, Add(iota, v01))); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, iota, Add(iota, v01))); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, iota, Add(iota, v10))); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, iota, Add(iota, v10))); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, Add(iota, v01), iota)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, Add(iota, v01), iota)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, Add(iota, v10), iota)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, Add(iota, v10), iota)); // Max value const V vm = Make128(d, LimitsMax(), LimitsMax()); HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, vm, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, vm, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v00)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v01)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v10)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v11)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v00, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v01, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v10, vm)); HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v11, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v00)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v01)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v10)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v11)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v00, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v01, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v10, vm)); HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v11, vm)); } }; HWY_NOINLINE void TestAllEq128Upper() { ForGEVectors<128, TestEq128Upper>()(uint64_t()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyCompareTest); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEquality); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictUnsigned); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictInt); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictFloat); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakUnsigned); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakInt); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakFloat); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllLt128); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllLt128Upper); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEq128); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEq128Upper); } // namespace hwy #endif