// Copyright 2022 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Per-target include guard #if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \ defined(HWY_TARGET_TOGGLE) #ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_ #undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_ #else #define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_ #endif #include #include "hwy/highway.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { // These functions avoid having to write a loop plus remainder handling in the // (unfortunately still common) case where arrays are not aligned/padded. If the // inputs are known to be aligned/padded, it is more efficient to write a single // loop using Load(). We do not provide a TransformAlignedPadded because it // would be more verbose than such a loop. // // Func is either a functor with a templated operator()(d, v[, v1[, v2]]), or a // generic lambda if using C++14. The d argument is the same as was passed to // the Generate etc. functions. Due to apparent limitations of Clang, it is // currently necessary to add HWY_ATTR before the opening { of the lambda to // avoid errors about "always_inline function .. requires target". // // We do not check HWY_MEM_OPS_MIGHT_FAULT because LoadN/StoreN do not fault. // Fills `out[0, count)` with the vectors returned by `func(d, index_vec)`, // where `index_vec` is `Vec>`. On the first call to `func`, // the value of its lane i is i, and increases by `Lanes(d)` after every call. // Note that some of these indices may be `>= count`, but the elements that // `func` returns in those lanes will not be written to `out`. template > void Generate(D d, T* HWY_RESTRICT out, size_t count, const Func& func) { const RebindToUnsigned du; using TU = TFromD; const size_t N = Lanes(d); size_t idx = 0; Vec vidx = Iota(du, 0); if (count >= N) { for (; idx <= count - N; idx += N) { StoreU(func(d, vidx), d, out + idx); vidx = Add(vidx, Set(du, static_cast(N))); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); StoreN(func(d, vidx), d, out + idx, remaining); } // Calls `func(d, v)` for each input vector; out of bound lanes with index i >= // `count` are instead taken from `no[i % Lanes(d)]`. template > void Foreach(D d, const T* HWY_RESTRICT in, const size_t count, const Vec no, const Func& func) { const size_t N = Lanes(d); size_t idx = 0; if (count >= N) { for (; idx <= count - N; idx += N) { const Vec v = LoadU(d, in + idx); func(d, v); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); const Vec v = LoadNOr(no, d, in + idx, remaining); func(d, v); } // Replaces `inout[idx]` with `func(d, inout[idx])`. Example usage: multiplying // array elements by a constant. template > void Transform(D d, T* HWY_RESTRICT inout, size_t count, const Func& func) { const size_t N = Lanes(d); size_t idx = 0; if (count >= N) { for (; idx <= count - N; idx += N) { const Vec v = LoadU(d, inout + idx); StoreU(func(d, v), d, inout + idx); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); const Vec v = LoadN(d, inout + idx, remaining); StoreN(func(d, v), d, inout + idx, remaining); } // Replaces `inout[idx]` with `func(d, inout[idx], in1[idx])`. Example usage: // multiplying array elements by those of another array. template > void Transform1(D d, T* HWY_RESTRICT inout, size_t count, const T* HWY_RESTRICT in1, const Func& func) { const size_t N = Lanes(d); size_t idx = 0; if (count >= N) { for (; idx <= count - N; idx += N) { const Vec v = LoadU(d, inout + idx); const Vec v1 = LoadU(d, in1 + idx); StoreU(func(d, v, v1), d, inout + idx); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); const Vec v = LoadN(d, inout + idx, remaining); const Vec v1 = LoadN(d, in1 + idx, remaining); StoreN(func(d, v, v1), d, inout + idx, remaining); } // Replaces `inout[idx]` with `func(d, inout[idx], in1[idx], in2[idx])`. Example // usage: FMA of elements from three arrays, stored into the first array. template > void Transform2(D d, T* HWY_RESTRICT inout, size_t count, const T* HWY_RESTRICT in1, const T* HWY_RESTRICT in2, const Func& func) { const size_t N = Lanes(d); size_t idx = 0; if (count >= N) { for (; idx <= count - N; idx += N) { const Vec v = LoadU(d, inout + idx); const Vec v1 = LoadU(d, in1 + idx); const Vec v2 = LoadU(d, in2 + idx); StoreU(func(d, v, v1, v2), d, inout + idx); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); const Vec v = LoadN(d, inout + idx, remaining); const Vec v1 = LoadN(d, in1 + idx, remaining); const Vec v2 = LoadN(d, in2 + idx, remaining); StoreN(func(d, v, v1, v2), d, inout + idx, remaining); } template > void Replace(D d, T* HWY_RESTRICT inout, size_t count, T new_t, T old_t) { const size_t N = Lanes(d); const Vec old_v = Set(d, old_t); const Vec new_v = Set(d, new_t); size_t idx = 0; if (count >= N) { for (; idx <= count - N; idx += N) { Vec v = LoadU(d, inout + idx); StoreU(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); const Vec v = LoadN(d, inout + idx, remaining); StoreN(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx, remaining); } template > void ReplaceIf(D d, T* HWY_RESTRICT inout, size_t count, T new_t, const Func& func) { const size_t N = Lanes(d); const Vec new_v = Set(d, new_t); size_t idx = 0; if (count >= N) { for (; idx <= count - N; idx += N) { Vec v = LoadU(d, inout + idx); StoreU(IfThenElse(func(d, v), new_v, v), d, inout + idx); } } // `count` was a multiple of the vector length `N`: already done. if (HWY_UNLIKELY(idx == count)) return; const size_t remaining = count - idx; HWY_DASSERT(0 != remaining && remaining < N); const Vec v = LoadN(d, inout + idx, remaining); StoreN(IfThenElse(func(d, v), new_v, v), d, inout + idx, remaining); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #endif // HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_