// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Tests some ops specific to floating-point types (Div, Round etc.) #include #include // std::ceil, std::floor #include "hwy/base.h" #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/float_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { HWY_NOINLINE void TestAllF16FromF32() { const FixedTag d1; // +/- 0 HWY_ASSERT_EQ(0, BitCastScalar(hwy::F16FromF32(0.0f))); HWY_ASSERT_EQ(0x8000, BitCastScalar(hwy::F16FromF32(-0.0f))); // smallest f32 subnormal HWY_ASSERT_EQ(0, BitCastScalar(hwy::F16FromF32(5.87747175411E-39f))); HWY_ASSERT_EQ(0x8000, BitCastScalar(hwy::F16FromF32(-5.87747175411E-39f))); // largest f16 subnormal HWY_ASSERT_EQ(0x3FF, BitCastScalar(hwy::F16FromF32(6.0975552E-5f))); HWY_ASSERT_EQ(0x83FF, BitCastScalar(hwy::F16FromF32(-6.0975552E-5f))); // smallest normalized f16 HWY_ASSERT_EQ(0x400, BitCastScalar(hwy::F16FromF32(6.103515625E-5f))); HWY_ASSERT_EQ(0x8400, BitCastScalar(hwy::F16FromF32(-6.103515625E-5f))); // rounding to nearest even HWY_ASSERT_EQ((15 << 10) + 0, // round down to even: 0[10..0] => 0 BitCastScalar(hwy::F16FromF32(1.00048828125f))); HWY_ASSERT_EQ((15 << 10) + 1, // round up: 0[1..1] => 1 BitCastScalar(hwy::F16FromF32(1.00097644329f))); HWY_ASSERT_EQ((15 << 10) + 2, // round up to even: 1[10..0] => 10 BitCastScalar(hwy::F16FromF32(1.00146484375f))); // greater than f16 max => inf HWY_ASSERT_EQ(0x7C00, BitCastScalar(hwy::F16FromF32(7E4f))); HWY_ASSERT_EQ(0xFC00, BitCastScalar(hwy::F16FromF32(-7E4f))); // infinity HWY_ASSERT_EQ(0x7C00, BitCastScalar(hwy::F16FromF32(GetLane(Inf(d1))))); HWY_ASSERT_EQ(0xFC00, BitCastScalar(hwy::F16FromF32(-GetLane(Inf(d1))))); // NaN HWY_ASSERT_EQ(0x7FFF, BitCastScalar(hwy::F16FromF32(GetLane(NaN(d1))))); HWY_ASSERT_EQ(0xFFFF, BitCastScalar(hwy::F16FromF32(-GetLane(NaN(d1))))); } HWY_NOINLINE void TestAllF32FromF16() { const FixedTag d1; // +/- 0 HWY_ASSERT_EQ(0.0f, hwy::F32FromF16(BitCastScalar(uint16_t{0}))); HWY_ASSERT_EQ(-0.0f, hwy::F32FromF16(BitCastScalar(uint16_t{0x8000}))); // largest f16 subnormal HWY_ASSERT_EQ(6.0975552E-5f, hwy::F32FromF16(BitCastScalar(uint16_t{0x3FF}))); HWY_ASSERT_EQ(-6.0975552E-5f, hwy::F32FromF16(BitCastScalar(uint16_t{0x83FF}))); // smallest normalized f16 HWY_ASSERT_EQ(6.103515625E-5f, hwy::F32FromF16(BitCastScalar(uint16_t{0x400}))); HWY_ASSERT_EQ(-6.103515625E-5f, hwy::F32FromF16(BitCastScalar(uint16_t{0x8400}))); // infinity HWY_ASSERT_EQ(GetLane(Inf(d1)), hwy::F32FromF16(BitCastScalar(uint16_t{0x7C00}))); HWY_ASSERT_EQ(-GetLane(Inf(d1)), hwy::F32FromF16(BitCastScalar(uint16_t{0xFC00}))); // NaN HWY_ASSERT_EQ(GetLane(NaN(d1)), hwy::F32FromF16(BitCastScalar(uint16_t{0x7FFF}))); HWY_ASSERT_EQ(-GetLane(NaN(d1)), hwy::F32FromF16(BitCastScalar(uint16_t{0xFFFF}))); } struct TestDiv { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Iota(d, -2); const auto v1 = Set(d, ConvertScalarTo(1)); // Unchanged after division by 1. HWY_ASSERT_VEC_EQ(d, v, Div(v, v1)); const size_t N = Lanes(d); auto expected = AllocateAligned(N); HWY_ASSERT(expected); for (size_t i = 0; i < N; ++i) { expected[i] = ConvertScalarTo((static_cast(i) - 2.0) / 2.0); } HWY_ASSERT_VEC_EQ(d, expected.get(), Div(v, Set(d, ConvertScalarTo(2)))); } }; HWY_NOINLINE void TestAllDiv() { ForFloatTypes(ForPartialVectors()); } struct TestApproximateReciprocal { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Iota(d, -2); const auto nonzero = IfThenElse(Eq(v, Zero(d)), Set(d, ConvertScalarTo(1)), v); const size_t N = Lanes(d); auto input = AllocateAligned(N); auto actual = AllocateAligned(N); HWY_ASSERT(input && actual); Store(nonzero, d, input.get()); Store(ApproximateReciprocal(nonzero), d, actual.get()); double max_l1 = 0.0; double worst_expected = 0.0; double worst_actual = 0.0; for (size_t i = 0; i < N; ++i) { const double expected = 1.0 / input[i]; const double l1 = ScalarAbs(expected - actual[i]); if (l1 > max_l1) { max_l1 = l1; worst_expected = expected; worst_actual = actual[i]; } } const double abs_worst_expected = ScalarAbs(worst_expected); if (abs_worst_expected > 1E-5) { const double max_rel = max_l1 / abs_worst_expected; fprintf(stderr, "max l1 %f rel %f (%f vs %f)\n", max_l1, max_rel, worst_expected, worst_actual); HWY_ASSERT(max_rel < 0.004); } } }; HWY_NOINLINE void TestAllApproximateReciprocal() { ForFloatTypes(ForPartialVectors()); } struct TestSquareRoot { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto vi = Iota(d, 0); HWY_ASSERT_VEC_EQ(d, vi, Sqrt(Mul(vi, vi))); } }; HWY_NOINLINE void TestAllSquareRoot() { ForFloatTypes(ForPartialVectors()); } struct TestReciprocalSquareRoot { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const Vec v = Set(d, ConvertScalarTo(123.0f)); const size_t N = Lanes(d); auto lanes = AllocateAligned(N); HWY_ASSERT(lanes); Store(ApproximateReciprocalSqrt(v), d, lanes.get()); for (size_t i = 0; i < N; ++i) { T err = ConvertScalarTo(ConvertScalarTo(lanes[i]) - 0.090166f); if (err < ConvertScalarTo(0)) err = -err; if (static_cast(err) >= 4E-4) { HWY_ABORT("Lane %d (%d): actual %f err %f\n", static_cast(i), static_cast(N), static_cast(lanes[i]), static_cast(err)); } } } }; HWY_NOINLINE void TestAllReciprocalSquareRoot() { ForFloatTypes(ForPartialVectors()); } template AlignedFreeUniquePtr RoundTestCases(T /*unused*/, D d, size_t& padded) { const T eps = Epsilon(); const T huge = ConvertScalarTo(sizeof(T) >= 4 ? 1E34 : 3E4); const T test_cases[] = { // +/- 1 ConvertScalarTo(1), ConvertScalarTo(-1), // +/- 0 ConvertScalarTo(0), ConvertScalarTo(-0), // near 0 ConvertScalarTo(0.4), ConvertScalarTo(-0.4), // +/- integer ConvertScalarTo(4), ConvertScalarTo(-32), // positive near limit ConvertScalarTo(MantissaEnd() - ConvertScalarTo(1.5)), ConvertScalarTo(MantissaEnd() + ConvertScalarTo(1.5)), // negative near limit ConvertScalarTo(-MantissaEnd() - ConvertScalarTo(1.5)), ConvertScalarTo(-MantissaEnd() + ConvertScalarTo(1.5)), // positive tiebreak ConvertScalarTo(1.5), ConvertScalarTo(2.5), // negative tiebreak ConvertScalarTo(-1.5), ConvertScalarTo(-2.5), // positive +/- delta ConvertScalarTo(2.0001), ConvertScalarTo(3.9999), // negative +/- delta ConvertScalarTo(-999.9999), ConvertScalarTo(-998.0001), // positive +/- epsilon ConvertScalarTo(ConvertScalarTo(1) + eps), ConvertScalarTo(ConvertScalarTo(1) - eps), // negative +/- epsilon ConvertScalarTo(ConvertScalarTo(-1) + eps), ConvertScalarTo(ConvertScalarTo(-1) - eps), // +/- huge (but still fits in float) huge, -huge, // +/- infinity GetLane(Inf(d)), GetLane(Neg(Inf(d))), // qNaN GetLane(NaN(d))}; const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]); const size_t N = Lanes(d); padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors auto in = AllocateAligned(padded); auto expected = AllocateAligned(padded); HWY_ASSERT(in && expected); CopyBytes(test_cases, in.get(), kNumTestCases * sizeof(T)); ZeroBytes(in.get() + kNumTestCases, (padded - kNumTestCases) * sizeof(T)); return in; } struct TestRound { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); HWY_ASSERT(expected); for (size_t i = 0; i < padded; ++i) { // Avoid [std::]round, which does not round to nearest *even*. // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html // Cast to f32/64 because nearbyint does not support _Float16. #if HWY_HAVE_FLOAT64 const double f = ConvertScalarTo(in[i]); #else const float f = ConvertScalarTo(in[i]); #endif expected[i] = ConvertScalarTo(nearbyint(f)); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Round(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllRound() { ForFloatTypes(ForPartialVectors()); } struct TestNearestInt { template HWY_NOINLINE void operator()(TF tf, const DF df) { using TI = MakeSigned; const RebindToSigned di; size_t padded; auto in = RoundTestCases(tf, df, padded); auto expected = AllocateAligned(padded); HWY_ASSERT(expected); constexpr double kMax = static_cast(LimitsMax()); for (size_t i = 0; i < padded; ++i) { if (ScalarIsNaN(in[i])) { // We replace NaN with 0 below (no_nan) expected[i] = 0; } else if (ScalarIsInf(in[i]) || static_cast(ScalarAbs(in[i])) >= kMax) { // Avoid undefined result for lrintf expected[i] = std::signbit(in[i]) ? LimitsMin() : LimitsMax(); } else { expected[i] = static_cast(lrintf(ConvertScalarTo(in[i]))); } } for (size_t i = 0; i < padded; i += Lanes(df)) { const auto v = Load(df, &in[i]); const auto no_nan = IfThenElse(Eq(v, v), v, Zero(df)); HWY_ASSERT_VEC_EQ(di, &expected[i], NearestInt(no_nan)); } } }; HWY_NOINLINE void TestAllNearestInt() { ForPartialVectors()(float()); } struct TestTrunc { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); HWY_ASSERT(expected); for (size_t i = 0; i < padded; ++i) { // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html // Cast to double because trunc does not support _Float16. expected[i] = ConvertScalarTo(trunc(ConvertScalarTo(in[i]))); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Trunc(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllTrunc() { ForFloatTypes(ForPartialVectors()); } struct TestCeil { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); HWY_ASSERT(expected); for (size_t i = 0; i < padded; ++i) { // Cast to double because ceil does not support _Float16. expected[i] = ConvertScalarTo(std::ceil(ConvertScalarTo(in[i]))); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Ceil(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllCeil() { ForFloatTypes(ForPartialVectors()); } struct TestFloor { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); HWY_ASSERT(expected); for (size_t i = 0; i < padded; ++i) { // Cast to double because floor does not support _Float16. expected[i] = ConvertScalarTo(std::floor(ConvertScalarTo(in[i]))); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Floor(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllFloor() { ForFloatTypes(ForPartialVectors()); } struct TestAbsDiff { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto in_lanes_a = AllocateAligned(N); auto in_lanes_b = AllocateAligned(N); auto out_lanes = AllocateAligned(N); HWY_ASSERT(in_lanes_a && in_lanes_b && out_lanes); for (size_t i = 0; i < N; ++i) { in_lanes_a[i] = ConvertScalarTo((i ^ 1u) << i); in_lanes_b[i] = ConvertScalarTo(i << i); out_lanes[i] = ConvertScalarTo( ScalarAbs(ConvertScalarTo(in_lanes_a[i] - in_lanes_b[i]))); } const auto a = Load(d, in_lanes_a.get()); const auto b = Load(d, in_lanes_b.get()); const auto expected = Load(d, out_lanes.get()); HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b)); HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a)); } }; HWY_NOINLINE void TestAllAbsDiff() { ForFloatTypes(ForPartialVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyFloatTest); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF16FromF32); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF32FromF16); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllDiv); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllApproximateReciprocal); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllSquareRoot); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllReciprocalSquareRoot); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllRound); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllNearestInt); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllTrunc); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllCeil); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllFloor); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllAbsDiff); } // namespace hwy #endif