// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Target-specific helper functions for use by *_test.cc. #include #include // memset // IWYU pragma: begin_exports #include "hwy/aligned_allocator.h" #include "hwy/base.h" #include "hwy/detect_targets.h" #include "hwy/per_target.h" #include "hwy/targets.h" #include "hwy/tests/hwy_gtest.h" #include "hwy/tests/test_util.h" // IWYU pragma: end_exports // After test_util (also includes highway.h) #include "hwy/print-inl.h" // Per-target include guard // clang-format off #if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE) // NOLINT // clang-format on #ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_ #undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_ #else #define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_ #endif HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { // Like Iota, but avoids wrapping around to negative integers. template HWY_INLINE Vec PositiveIota(D d) { return Iota(d, 1); } template HWY_INLINE Vec PositiveIota(D d) { const auto vi = Iota(d, 1); return Max(And(vi, Set(d, LimitsMax>())), Set(d, static_cast>(1))); } // Same as Iota, but supports bf16. This is possibly too expensive for general // use, but fine for tests. template VFromD IotaForSpecial(D d, First first) { return Iota(d, first); } #if HWY_HAVE_FLOAT16 template VFromD IotaForSpecial(D d, First first) { return Iota(d, first); } #else // !HWY_HAVE_FLOAT16 template VFromD IotaForSpecial(D d, First first) { const Repartition df; const size_t NW = Lanes(d) / 2; const Half dh; const float first2 = static_cast(first) + static_cast(NW); return Combine(d, DemoteTo(dh, Iota(df, first2)), DemoteTo(dh, Iota(df, first))); // TODO(janwas): enable when supported for f16 // return OrderedDemote2To(d, Iota(df, first), Iota(df, first + NW)); } // For partial vectors, a single f32 vector is enough, and the prior overload // might not be able to Repartition. template VFromD IotaForSpecial(D d, First first) { const Rebind df; return DemoteTo(d, Iota(df, first)); } #endif // HWY_HAVE_FLOAT16 template VFromD IotaForSpecial(D d, First first) { const Repartition df; const float first1 = ConvertScalarTo(first); const float first2 = first1 + static_cast(Lanes(d) / 2); return OrderedDemote2To(d, Iota(df, first1), Iota(df, first2)); } // For partial vectors, a single f32 vector is enough, and the prior overload // might not be able to Repartition. template VFromD IotaForSpecial(D d, First first) { const Rebind df; return DemoteTo(d, Iota(df, first)); } // OrderedDemote2To does not work for single lanes, so special-case that. template VFromD IotaForSpecial(D d, First first) { const Rebind df; return DemoteTo(d, Set(df, static_cast(first))); } // Compare expected array to vector. // TODO(b/287462770): inline to work around incorrect SVE codegen. template > HWY_INLINE void AssertVecEqual(D d, const T* expected, Vec actual, const char* filename, const int line) { const size_t N = Lanes(d); auto actual_lanes = AllocateAligned(N); Store(actual, d, actual_lanes.get()); const auto info = hwy::detail::MakeTypeInfo(); const char* target_name = hwy::TargetName(HWY_TARGET); hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N, target_name, filename, line); } // Compare expected vector to vector. // TODO(b/287462770): inline to work around incorrect SVE codegen. template > HWY_INLINE void AssertVecEqual(D d, Vec expected, Vec actual, const char* filename, int line) { const size_t N = Lanes(d); auto expected_lanes = AllocateAligned(N); auto actual_lanes = AllocateAligned(N); Store(expected, d, expected_lanes.get()); Store(actual, d, actual_lanes.get()); const auto info = hwy::detail::MakeTypeInfo(); const char* target_name = hwy::TargetName(HWY_TARGET); hwy::detail::AssertArrayEqual(info, expected_lanes.get(), actual_lanes.get(), N, target_name, filename, line); } // Only checks the valid mask elements (those whose index < Lanes(d)). template HWY_NOINLINE void AssertMaskEqual(D d, VecArg> a, VecArg> b, const char* filename, int line) { // lvalues prevented MSAN failure in farm_sve. const Vec va = VecFromMask(d, a); const Vec vb = VecFromMask(d, b); AssertVecEqual(d, va, vb, filename, line); const char* target_name = hwy::TargetName(HWY_TARGET); AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line); AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line); AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line); const size_t N = Lanes(d); #if HWY_TARGET == HWY_SCALAR const Rebind d8; #else const Repartition d8; #endif const size_t N8 = Lanes(d8); auto bits_a = AllocateAligned(HWY_MAX(size_t{8}, N8)); auto bits_b = AllocateAligned(size_t{HWY_MAX(8, N8)}); memset(bits_a.get(), 0, N8); memset(bits_b.get(), 0, N8); const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get()); const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get()); AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line); size_t i = 0; // First check whole bytes (if that many elements are still valid) for (; i < N / 8; ++i) { if (bits_a[i] != bits_b[i]) { fprintf(stderr, "Mismatch in byte %d: %d != %d\n", static_cast(i), bits_a[i], bits_b[i]); Print(d8, "expect", Load(d8, bits_a.get()), 0, N8); Print(d8, "actual", Load(d8, bits_b.get()), 0, N8); hwy::Abort(filename, line, "Masks not equal"); } } // Then the valid bit(s) in the last byte. const size_t remainder = N % 8; if (remainder != 0) { const int mask = (1 << remainder) - 1; const int valid_a = bits_a[i] & mask; const int valid_b = bits_b[i] & mask; if (valid_a != valid_b) { fprintf(stderr, "Mismatch in last byte %d: %d != %d\n", static_cast(i), valid_a, valid_b); Print(d8, "expect", Load(d8, bits_a.get()), 0, N8); Print(d8, "actual", Load(d8, bits_b.get()), 0, N8); hwy::Abort(filename, line, "Masks not equal"); } } } // Only sets valid elements (those whose index < Lanes(d)). This helps catch // tests that are not masking off the (undefined) upper mask elements. // // TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks. template HWY_INLINE Mask MaskTrue(const D d) { return FirstN(d, Lanes(d)); } // MaskFalse is now implemented in x86_128-inl.h on AVX3, arm_sve-inl.h on SVE, // rvv-inl.h on RVV, and generic_ops-inl.h on all other targets #ifndef HWY_ASSERT_EQ #define HWY_ASSERT_EQ(expected, actual) \ hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \ __LINE__) #define HWY_ASSERT_ARRAY_EQ(expected, actual, count) \ hwy::AssertArrayEqual(expected, actual, count, hwy::TargetName(HWY_TARGET), \ __FILE__, __LINE__) #define HWY_ASSERT_STRING_EQ(expected, actual) \ hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \ __FILE__, __LINE__) #define HWY_ASSERT_VEC_EQ(d, expected, actual) \ AssertVecEqual(d, expected, actual, __FILE__, __LINE__) #define HWY_ASSERT_MASK_EQ(d, expected, actual) \ AssertMaskEqual(d, expected, actual, __FILE__, __LINE__) #endif // HWY_ASSERT_EQ namespace detail { // Helpers for instantiating tests with combinations of lane types / counts. // Calls Test for each CappedTag where N is in [kMinLanes, kMul * kMinArg] // and the resulting Lanes() is in [min_lanes, max_lanes]. The upper bound // is required to ensure capped vectors remain extendable. Implemented by // recursively halving kMul until it is zero. template struct ForeachCappedR { static void Do(size_t min_lanes, size_t max_lanes) { const CappedTag d; // If we already don't have enough lanes, stop. const size_t lanes = Lanes(d); if (lanes < min_lanes) return; if (lanes <= max_lanes) { Test()(T(), d); } ForeachCappedR::Do(min_lanes, max_lanes); } }; // Base case to stop the recursion. template struct ForeachCappedR { static void Do(size_t, size_t) {} }; #if HWY_HAVE_SCALABLE template constexpr int MinPow2() { // Highway follows RVV LMUL in that the smallest fraction is 1/8th (encoded // as kPow2 == -3). The fraction also must not result in zero lanes for the // smallest possible vector size, which is 128 bits even on RISC-V (with the // application processor profile). return HWY_MAX(-3, -static_cast(CeilLog2(16 / sizeof(T)))); } constexpr int MaxPow2() { #if HWY_TARGET == HWY_RVV // Only RVV allows multiple vector registers. return 3; // LMUL=8 #else // For all other platforms, we cannot exceed a full vector. return 0; #endif } // Iterates kPow2 up to and including kMaxPow2. Below we specialize for // valid=false to stop the iteration. The ForeachPow2Trim enables shorter // argument lists, but use ForeachPow2 when you want to specify the actual min. template struct ForeachPow2 { static void Do(size_t min_lanes) { const ScalableTag d; static_assert(MinPow2() <= kPow2 && kPow2 <= MaxPow2(), ""); if (Lanes(d) >= min_lanes) { Test()(T(), d); } else { fprintf(stderr, "%d lanes < %d: T=%d pow=%d\n", static_cast(Lanes(d)), static_cast(min_lanes), static_cast(sizeof(T)), kPow2); HWY_ASSERT(min_lanes != 1); } ForeachPow2::Do( min_lanes); } }; // Base case to stop the iteration. template struct ForeachPow2 { static void Do(size_t) {} }; // Iterates kPow2 over [MinPow2() + kAddMin, MaxPow2() - kSubMax]. // This is a wrapper that shortens argument lists, allowing users to skip the // MinPow2 and MaxPow2. Nonzero kAddMin implies a minimum LMUL, and nonzero // kSubMax reduces the maximum LMUL (e.g. for type promotions, where the result // is larger, thus the input cannot already use the maximum LMUL). template using ForeachPow2Trim = ForeachPow2() + kAddMin, MaxPow2() - kSubMax, MinPow2() + kAddMin <= MaxPow2() - kSubMax, Test>; #else // ForeachCappedR already handled all possible sizes. #endif // HWY_HAVE_SCALABLE } // namespace detail // These 'adapters' call a test for all possible N or kPow2 subject to // constraints such as "vectors must be extendable" or "vectors >= 128 bits". // They may be called directly, or via For*Types. Note that for an adapter C, // `C(T())` does not call the test - the correct invocation is // `C()(T())`, or preferably `ForAllTypes(C())`. We check at runtime // that operator() is called to prevent such bugs. Note that this is not // thread-safe, but that is fine because C are typically local variables. // Calls Test for all powers of two in [1, Lanes(d) * (RVV? 2 : 1) ]. For // interleaved_test; RVV segments are limited to 8 registers, so we can only go // up to LMUL=2. template class ForMaxPow2 { mutable bool called_ = false; public: ~ForMaxPow2() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; #if HWY_TARGET == HWY_SCALAR detail::ForeachCappedR::Do(1, 1); #else detail::ForeachCappedR::Do( 1, Lanes(ScalableTag())); #if HWY_TARGET == HWY_RVV // To get LMUL=2 (kPow2=1), 2 is what we subtract from MaxPow2()=3. detail::ForeachPow2Trim::Do(1); #elif HWY_HAVE_SCALABLE detail::ForeachPow2Trim::Do(1); #endif #endif // HWY_TARGET == HWY_SCALAR } }; // Calls Test for all powers of two in [1, Lanes(d) >> kPow2]. This is for // ops that widen their input, e.g. Combine (not supported by HWY_SCALAR). template class ForExtendableVectors { mutable bool called_ = false; public: ~ForExtendableVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; constexpr size_t kMaxCapped = HWY_LANES(T); // Skip CappedTag that are already full vectors. const size_t max_lanes = Lanes(ScalableTag()) >> kPow2; (void)kMaxCapped; (void)max_lanes; #if HWY_TARGET == HWY_SCALAR // not supported #else constexpr size_t kMul = kMaxCapped >> kPow2; constexpr size_t kMinArg = size_t{1} << kPow2; detail::ForeachCappedR::Do(1, max_lanes); #if HWY_HAVE_SCALABLE detail::ForeachPow2Trim::Do(1); #endif #endif // HWY_SCALAR } }; // Calls Test for all power of two N in [1 << kPow2, Lanes(d)]. This is for ops // that narrow their input, e.g. UpperHalf. template class ForShrinkableVectors { mutable bool called_ = false; public: ~ForShrinkableVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; constexpr size_t kMinLanes = size_t{1} << kPow2; constexpr size_t kMaxCapped = HWY_LANES(T); // For shrinking, an upper limit is unnecessary. constexpr size_t max_lanes = kMaxCapped; (void)kMinLanes; (void)max_lanes; (void)max_lanes; #if HWY_TARGET == HWY_SCALAR // not supported #elif HWY_HAVE_SCALABLE detail::ForeachPow2Trim::Do(kMinLanes); #else detail::ForeachCappedR> kPow2), kMinLanes, Test>::Do( kMinLanes, max_lanes); #endif // HWY_TARGET == HWY_SCALAR } }; // Calls Test for all supported power of two vectors of at least kMinBits. // Examples: AES or 64x64 require 128 bits, casts may require 64 bits. template class ForGEVectors { mutable bool called_ = false; public: ~ForGEVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; constexpr size_t kMaxCapped = HWY_LANES(T); constexpr size_t kMinLanes = kMinBits / 8 / sizeof(T); // An upper limit is unnecessary. constexpr size_t max_lanes = kMaxCapped; (void)max_lanes; #if HWY_TARGET == HWY_SCALAR (void)kMinLanes; // not supported #else detail::ForeachCappedR::Do( kMinLanes, max_lanes); #if HWY_HAVE_SCALABLE // Can be 0 (handled below) if kMinBits > 128. constexpr size_t kRatio = 128 / kMinBits; constexpr int kMinPow2 = kRatio == 0 ? 0 : -static_cast(CeilLog2(kRatio)); constexpr bool kValid = kMinPow2 <= detail::MaxPow2(); detail::ForeachPow2::Do( kMinLanes); #endif #endif // HWY_TARGET == HWY_SCALAR } }; template using ForGE128Vectors = ForGEVectors<128, Test>; // Calls Test for all N that can be promoted (not the same as Extendable because // HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper. template class ForPromoteVectors { mutable bool called_ = false; public: ~ForPromoteVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; constexpr size_t kFactor = size_t{1} << kPow2; static_assert(kFactor >= 2 && kFactor * sizeof(T) <= sizeof(uint64_t), ""); constexpr size_t kMaxCapped = HWY_LANES(T); // Skip CappedTag that are already full vectors. const size_t max_lanes = Lanes(ScalableTag()) >> kPow2; (void)kMaxCapped; (void)max_lanes; #if HWY_TARGET == HWY_SCALAR detail::ForeachCappedR::Do(1, 1); #else using DLargestFrom = CappedTag> kPow2) * kFactor, -kPow2>; static_assert(HWY_MAX_LANES_D(DLargestFrom) <= (kMaxCapped >> kPow2), "HWY_MAX_LANES_D(DLargestFrom) must be less than or equal to " "(kMaxCapped >> kPow2)"); detail::ForeachCappedR> kPow2), kFactor, Test, -kPow2>::Do( 1, max_lanes); #if HWY_HAVE_SCALABLE detail::ForeachPow2Trim::Do(1); #endif #endif // HWY_SCALAR } }; // Calls Test for all N than can be demoted (not the same as Shrinkable because // HWY_SCALAR has one lane and as a one-lane vector with a lane size of at least // 2 bytes can always be demoted to a vector with a smaller lane type). template class ForDemoteVectors { mutable bool called_ = false; public: ~ForDemoteVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; #if HWY_HAVE_SCALABLE // kMinTVecPow2 is the smallest Pow2 for a vector with lane type T that is // supported by detail::ForeachPow2Trim constexpr int kMinTVecPow2 = detail::MinPow2(); // detail::MinPow2() + kMinPow2Adj is the smallest Pow2 for a vector with // lane type T that can be demoted to a vector with a lane size of // (sizeof(T) >> kPow2) constexpr int kMinPow2Adj = HWY_MAX(-3 - kMinTVecPow2 + kPow2, 0); detail::ForeachPow2Trim::Do(1); // On targets with scalable vectors, detail::ForeachCappedR below only // needs to be executed for vectors that have less than // Lanes(ScalableTag()) as full vectors were already checked by the // detail::ForeachPow2Trim above. constexpr size_t kMaxCapped = HWY_LANES(T) >> 1; const size_t max_lanes = Lanes(ScalableTag()) >> 1; #else // On targets where HWY_HAVE_SCALABLE is 0, any vector with HWY_LANES(T) // or fewer lanes can always be demoted to a vector with a smaller lane // type. constexpr size_t kMaxCapped = HWY_LANES(T); const size_t max_lanes = kMaxCapped; #endif detail::ForeachCappedR::Do(1, max_lanes); } }; // For LowerHalf/Quarter. template class ForHalfVectors { mutable bool called_ = false; public: ~ForHalfVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*unused*/) const { called_ = true; #if HWY_TARGET == HWY_SCALAR detail::ForeachCappedR::Do(1, 1); #else constexpr size_t kMinLanes = size_t{1} << kPow2; // For shrinking, an upper limit is unnecessary. constexpr size_t kMaxCapped = HWY_LANES(T); detail::ForeachCappedR> kPow2), kMinLanes, Test>::Do( kMinLanes, kMaxCapped); // TODO(janwas): call Extendable if kMinLanes check not required? #if HWY_HAVE_SCALABLE detail::ForeachPow2Trim::Do(kMinLanes); #endif #endif // HWY_TARGET == HWY_SCALAR } }; // Calls Test for all power of two N in [1, Lanes(d)]. This is the default // for ops that do not narrow nor widen their input, nor require 128 bits. template class ForPartialVectors { mutable bool called_ = false; public: ~ForPartialVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T t) const { called_ = true; #if HWY_TARGET == HWY_SCALAR (void)t; detail::ForeachCappedR::Do(1, 1); #else ForExtendableVectors()(t); #endif } }; // ForPartialFixedOrFullScalableVectors calls Test for each D where // MaxLanes(D()) == MaxLanes(DFromV>()) #if HWY_HAVE_SCALABLE template class ForPartialFixedOrFullScalableVectors { mutable bool called_ = false; public: ~ForPartialFixedOrFullScalableVectors() { if (!called_) { HWY_ABORT("Test is incorrect, ensure operator() is called"); } } template void operator()(T /*t*/) const { called_ = true; #if HWY_TARGET == HWY_RVV constexpr int kMinPow2 = -3 + static_cast(CeilLog2(sizeof(T))); constexpr int kMaxPow2 = 3; #else constexpr int kMinPow2 = 0; constexpr int kMaxPow2 = 0; #endif detail::ForeachPow2::Do(1); } }; #elif HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128 template using ForPartialFixedOrFullScalableVectors = ForGEVectors; #else template using ForPartialFixedOrFullScalableVectors = ForPartialVectors; #endif // Type lists to shorten call sites: template void ForSignedTypes(const Func& func) { func(int8_t()); func(int16_t()); func(int32_t()); #if HWY_HAVE_INTEGER64 func(int64_t()); #endif } template void ForUnsignedTypes(const Func& func) { func(uint8_t()); func(uint16_t()); func(uint32_t()); #if HWY_HAVE_INTEGER64 func(uint64_t()); #endif } template void ForIntegerTypes(const Func& func) { ForSignedTypes(func); ForUnsignedTypes(func); } template void ForFloat16Types(const Func& func) { #if HWY_HAVE_FLOAT16 func(float16_t()); #else (void)func; #endif } template void ForFloat64Types(const Func& func) { #if HWY_HAVE_FLOAT64 func(double()); #else (void)func; #endif } // `#if HWY_HAVE_FLOAT*` is sufficient for tests using static dispatch. In // sort_test we also use dynamic dispatch, so there we call the For*Dynamic // functions which also check hwy::HaveFloat*. template void ForFloat16TypesDynamic(const Func& func) { #if HWY_HAVE_FLOAT16 if (hwy::HaveFloat16()) { func(float16_t()); } #else (void)func; #endif } template void ForFloat64TypesDynamic(const Func& func) { #if HWY_HAVE_FLOAT64 if (hwy::HaveFloat64()) { func(double()); } #else (void)func; #endif } template void ForFloat3264Types(const Func& func) { func(float()); ForFloat64Types(func); } template void ForFloatTypes(const Func& func) { ForFloat16Types(func); ForFloat3264Types(func); } template void ForFloatTypesDynamic(const Func& func) { ForFloat16TypesDynamic(func); func(float()); ForFloat64TypesDynamic(func); } template void ForAllTypes(const Func& func) { ForIntegerTypes(func); ForFloatTypes(func); } // For ops that are also unconditionally available for bfloat16_t/float16_t. template void ForSpecialTypes(const Func& func) { func(float16_t()); func(bfloat16_t()); } template void ForAllTypesAndSpecial(const Func& func) { ForAllTypes(func); ForSpecialTypes(func); } template void ForUI8(const Func& func) { func(uint8_t()); func(int8_t()); } template void ForUI16(const Func& func) { func(uint16_t()); func(int16_t()); } template void ForUIF16(const Func& func) { ForUI16(func); ForFloat16Types(func); } template void ForUI32(const Func& func) { func(uint32_t()); func(int32_t()); } template void ForUIF32(const Func& func) { ForUI32(func); func(float()); } template void ForUI64(const Func& func) { #if HWY_HAVE_INTEGER64 func(uint64_t()); func(int64_t()); #endif } template void ForUIF64(const Func& func) { ForUI64(func); ForFloat64Types(func); } template void ForUI3264(const Func& func) { ForUI32(func); ForUI64(func); } template void ForUIF3264(const Func& func) { ForUIF32(func); ForUIF64(func); } template void ForU816(const Func& func) { func(uint8_t()); func(uint16_t()); } template void ForI816(const Func& func) { func(int8_t()); func(int16_t()); } template void ForU163264(const Func& func) { func(uint16_t()); func(uint32_t()); #if HWY_HAVE_INTEGER64 func(uint64_t()); #endif } template void ForUI163264(const Func& func) { ForUI16(func); ForUI3264(func); } template void ForUIF163264(const Func& func) { ForUIF16(func); ForUIF3264(func); } // For tests that involve loops, adjust the trip count so that emulated tests // finish quickly (but always at least 2 iterations to ensure some diversity). constexpr size_t AdjustedReps(size_t max_reps) { #if HWY_ARCH_RVV return HWY_MAX(max_reps / 32, 2); #elif HWY_IS_DEBUG_BUILD return HWY_MAX(max_reps / 8, 2); #elif HWY_ARCH_ARM return HWY_MAX(max_reps / 4, 2); #elif HWY_COMPILER_MSVC return HWY_MAX(max_reps / 2, 2); #else return HWY_MAX(max_reps, 2); #endif } // Same as above, but the loop trip count will be 1 << max_pow2. constexpr size_t AdjustedLog2Reps(size_t max_pow2) { // If "negative" (unsigned wraparound), use original. #if HWY_ARCH_RVV return HWY_MIN(max_pow2 - 4, max_pow2); #elif HWY_IS_DEBUG_BUILD return HWY_MIN(max_pow2 - 1, max_pow2); #elif HWY_ARCH_ARM return HWY_MIN(max_pow2 - 1, max_pow2); #else return max_pow2; #endif } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #endif // per-target include guard