/*! \file ducc0/infra/mav.h * Classes for dealing with multidimensional arrays * * \copyright Copyright (C) 2019-2023 Max-Planck-Society * \author Martin Reinecke * */ /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */ /* All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This code is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this code; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef DUCC0_MAV_H #define DUCC0_MAV_H #include #include #include #include #include #include #include #include "ducc0/infra/error_handling.h" #include "ducc0/infra/aligned_array.h" #include "ducc0/infra/misc_utils.h" #include "ducc0/infra/threading.h" namespace ducc0 { namespace detail_mav { using namespace std; // the next line is necessary to address some sloppy name choices in hipSYCL using std::min, std::max; struct uninitialized_dummy {}; constexpr uninitialized_dummy UNINITIALIZED; template class cmembuf { protected: shared_ptr> ptr; shared_ptr> rawptr; const T *d; cmembuf(const T *d_, const cmembuf &other) : ptr(other.ptr), rawptr(other.rawptr), d(d_) {} // externally owned data pointer cmembuf(const T *d_) : d(d_) {} // share another memory buffer, but read-only cmembuf(const cmembuf &other) : ptr(other.ptr), rawptr(other.rawptr), d(other.d) {} cmembuf(size_t sz) : ptr(make_shared>(sz)), d(ptr->data()) {} #if 1 cmembuf(size_t sz, uninitialized_dummy) : rawptr(make_shared>(sz)), d(rawptr->data()) {} # else // "poison" the array with a fixed value; use for debugging cmembuf(size_t sz, uninitialized_dummy) : rawptr(make_shared>(sz)), d(rawptr->data()) { for (size_t i=0; i const T &raw(I i) const { return d[i]; } // read access to data area const T *data() const { return d; } }; constexpr size_t MAXIDX=~(size_t(0)); struct slice { size_t beg, end; ptrdiff_t step; slice() : beg(0), end(MAXIDX), step(1) {} slice(size_t idx) : beg(idx), end(idx), step(1) {} slice(size_t beg_, size_t end_, ptrdiff_t step_=1) : beg(beg_), end(end_), step(step_) { // FIXME: add sanity checks here } size_t size(size_t shp) const { if (beg==end) return 0; if (step>0) return (min(shp,end)-beg+step-1)/step; // negative step if (end==MAXIDX) return (beg-step)/(-step); return (beg-end-step-1)/(-step); } }; /// Helper class containing shape and stride information of an `fmav` object class fmav_info { public: /// vector of nonnegative integers for storing the array shape using shape_t = vector; /// vector of integers for storing the array strides using stride_t = vector; protected: shape_t shp; stride_t str; size_t sz; static stride_t shape2stride(const shape_t &shp) { auto ndim = shp.size(); // MR using the static_cast just to avoid a GCC warning. // stride_t res(ndim); stride_t res(static_cast(ndim)); if (ndim==0) return res; res[ndim-1]=1; for (size_t i=2; i<=ndim; ++i) res[ndim-i] = res[ndim-i+1]*ptrdiff_t(shp[ndim-i+1]); return res; } template ptrdiff_t getIdx(size_t dim, size_t n, Ns... ns) const { return str[dim]*ptrdiff_t(n) + getIdx(dim+1, ns...); } ptrdiff_t getIdx(size_t dim, size_t n) const { return str[dim]*ptrdiff_t(n); } ptrdiff_t getIdx(size_t /*dim*/) const { return 0; } public: /// Constructs a 1D object with all extents and strides set to zero. fmav_info() : shp(1,0), str(1,0), sz(0) {} /// Constructs an object with the given shape and stride. fmav_info(const shape_t &shape_, const stride_t &stride_) : shp(shape_), str(stride_), sz(accumulate(shp.begin(),shp.end(),size_t(1),multiplies<>())) { MR_assert(shp.size()==str.size(), "dimensions mismatch"); } /// Constructs an object with the given shape and computes the strides /// automatically, assuming a C-contiguous memory layout. fmav_info(const shape_t &shape_) : fmav_info(shape_, shape2stride(shape_)) {} void assign(const fmav_info &other) { shp = other.shp; str = other.str; sz = other.sz; } /// Returns the dimensionality of the object. size_t ndim() const { return shp.size(); } /// Returns the total number of entries in the object. size_t size() const { return sz; } /// Returns the shape of the object. const shape_t &shape() const { return shp; } /// Returns the length along dimension \a i. size_t shape(size_t i) const { return shp[i]; } /// Returns the strides of the object. const stride_t &stride() const { return str; } /// Returns the stride along dimension \a i. const ptrdiff_t &stride(size_t i) const { return str[i]; } /// Returns true iff the last dimension has stride 1. /** Typically used for optimization purposes. */ bool last_contiguous() const { return ((ndim()==0) || (str.back()==1)); } /** Returns true iff the object is C-contiguous, i.e. if the stride of the * last dimension is 1, the stride for the next-to-last dimension is the * shape of the last dimension etc. */ bool contiguous() const { auto ndim = shp.size(); ptrdiff_t stride=1; for (size_t i=0; ishape and \a other.shape match. bool conformable(const fmav_info &other) const { return shp==other.shp; } /// Returns the one-dimensional index of an entry from the given /// multi-dimensional index tuple, taking strides into account. template ptrdiff_t idx(Ns... ns) const { MR_assert(ndim()==sizeof...(ns), "incorrect number of indices"); return getIdx(0, ns...); } ptrdiff_t idx(const shape_t &ns) const { MR_assert(ndim()==ns.size(), "incorrect number of indices"); size_t res = 0; for (size_t i=0; i ptrdiff_t idxval(RAiter beg, RAiter end) const { MR_assert(ndim()==size_t(end-beg), "incorrect number of indices"); size_t res = 0; for (size_t i=0; i=shp.size(), "cannot reduce dimensionality"); stride_t newstr(shp2.size(), 0); for (size_t i=0; i=ndim(), "new shape smaller than original one"); MR_assert(axpos.size()==ndim(), "bad axpos size"); stride_t new_stride(new_shape.size(), 0); vector used(new_shape.size(),0); for (size_t i=0; i &slices) const { auto ndim = shp.size(); shape_t nshp(ndim); stride_t nstr(ndim); MR_assert(slices.size()==ndim, "incorrect number of slices"); size_t n0=0; for (auto x:slices) if (x.beg==x.end) ++n0; ptrdiff_t nofs=0; nshp.resize(ndim-n0); nstr.resize(ndim-n0); for (size_t i=0, i2=0; i class mav_info { public: /// Fixed-size array of nonnegative integers for storing the array shape using shape_t = array; /// Fixed-size array of integers for storing the array strides using stride_t = array; protected: shape_t shp; stride_t str; size_t sz; static stride_t shape2stride(const shape_t &shp) { stride_t res; if (ndim==0) return res; res[ndim-1]=1; for (size_t i=2; i<=ndim; ++i) res[ndim-i] = res[ndim-i+1]*ptrdiff_t(shp[ndim-i+1]); return res; } template ptrdiff_t getIdx(size_t dim, size_t n, Ns... ns) const { return str[dim]*n + getIdx(dim+1, ns...); } ptrdiff_t getIdx(size_t dim, size_t n) const { return str[dim]*n; } ptrdiff_t getIdx(size_t /*dim*/) const { return 0; } public: /// Constructs an object with all extents and strides set to zero. mav_info() : sz(0) { for (size_t i=0; i())) {} /// Constructs an object with the given shape and computes the strides /// automatically, assuming a C-contiguous memory layout. mav_info(const shape_t &shape_) : mav_info(shape_, shape2stride(shape_)) {} mav_info(const fmav_info &inp) { MR_assert(inp.ndim()==ndim, "dimensionality mismatch"); sz=1; for (size_t i=0; ishape and \a other.shape match. bool conformable(const mav_info &other) const { return shp==other.shp; } /// Returns true iff this->shape and \a other match. bool conformable(const shape_t &other) const { return shp==other; } /// Returns the one-dimensional index of an entry from the given /// multi-dimensional index tuple, taking strides into account. template ptrdiff_t idx(Ns... ns) const { static_assert(ndim==sizeof...(ns), "incorrect number of indices"); return getIdx(0, ns...); } mav_info transpose() const { shape_t shp2; stride_t str2; for (size_t i=0; i prepend_1() const { typename mav_info::shape_t newshp; typename mav_info::stride_t newstr; newshp[0] = 1; newstr[0] = 0; for (size_t i=0; i(newshp, newstr); } protected: template auto subdata(const vector &slices) const { MR_assert(slices.size()==ndim, "bad number of slices"); array nshp; array nstr; // unnecessary, but gcc warns otherwise for (size_t i=0; i(nshp, nstr), nofs); } }; template class cfmav: public fmav_info, public cmembuf { protected: using tbuf = cmembuf; using tinfo = fmav_info; using fmav_info::idx; public: using typename tinfo::shape_t; using typename tinfo::stride_t; using tbuf::raw, tbuf::data; protected: cfmav(const shape_t &shp_) : tinfo(shp_), tbuf(size()) {} cfmav(const shape_t &shp_, uninitialized_dummy) : tinfo(shp_), tbuf(size(), UNINITIALIZED) {} cfmav(const shape_t &shp_, const stride_t &str_, uninitialized_dummy) : tinfo(shp_, str_), tbuf(size(), UNINITIALIZED) { ptrdiff_t ofs=0; for (size_t i=0; i const T &operator()(Ns... ns) const { return raw(idx(ns...)); } const T &operator()(const shape_t &ns) const { return raw(idx(ns)); } template const T& val(RAiter beg, RAiter end) const { return raw(idxval(beg, end)); } cfmav subarray(const vector &slices) const { auto [ninfo, nofs] = subdata(slices); return cfmav(ninfo, tbuf::d+nofs, *this); } cfmav extend_and_broadcast(const shape_t &new_shape, const shape_t &axpos) const { return cfmav(fmav_info::extend_and_broadcast(new_shape, axpos), *this); } cfmav extend_and_broadcast(const shape_t &new_shape, size_t firstaxis) const { return cfmav(fmav_info::extend_and_broadcast(new_shape, firstaxis), *this); } cfmav transpose() const { return cfmav(static_cast(this)->transpose(), *static_cast(this)); } }; template cfmav subarray (const cfmav &arr, const vector &slices) { return arr.subarray(slices); } template class vfmav: public cfmav { protected: using tbuf = cmembuf; using tinfo = fmav_info; using tinfo::shp, tinfo::str; using fmav_info::idx; public: using typename tinfo::shape_t; using typename tinfo::stride_t; using tinfo::size, tinfo::shape, tinfo::stride; protected: vfmav(const fmav_info &info, tbuf &buf) : cfmav(info, buf) {} vfmav(const fmav_info &info, T *d_, tbuf &buf) : cfmav(info, d_, buf) {} public: using tbuf::raw, tbuf::data, tinfo::ndim; vfmav() {} vfmav(T *d_, const fmav_info &info) : cfmav(d_, info) {} vfmav(T *d_, const shape_t &shp_, const stride_t &str_) : cfmav(d_, shp_, str_) {} vfmav(T *d_, const shape_t &shp_) : cfmav(d_, shp_) {} vfmav(const shape_t &shp_) : cfmav(shp_) {} vfmav(const shape_t &shp_, uninitialized_dummy) : cfmav(shp_, UNINITIALIZED) {} vfmav(const shape_t &shp_, const stride_t &str_, uninitialized_dummy) : cfmav(shp_, str_, UNINITIALIZED) { ptrdiff_t ofs=0; for (size_t i=0; i(buf, shp_, str_) {} using cfmav::data; T *data() { return const_cast(tbuf::d); } using cfmav::raw; template T &raw(I i) { return data()[i]; } // no-op. Needed for template tricks. using cfmav::to_fmav; vfmav to_fmav() { return *this; } void assign(const vfmav &other) { fmav_info::assign(other); cmembuf::assign(other); } using cfmav::operator(); template T &operator()(Ns... ns) { return raw(idx(ns...)); } T &operator()(const shape_t &ns) { return raw(idx(ns)); } using cfmav::val; template T& val(RAiter beg, RAiter end) { return raw(idxval(beg, end)); } vfmav subarray(const vector &slices) { auto [ninfo, nofs] = tinfo::subdata(slices); return vfmav(ninfo, data()+nofs, *this); } /** Returns a writable fmav with the specified shape. * The strides are chosen in such a way that critical strides (multiples * of 4096 bytes) along any dimension are avoided, by enlarging the * allocated memory slightly if necessary. * The array data is default-initialized. */ static vfmav build_noncritical(const shape_t &shape) { auto ndim = shape.size(); auto shape2 = noncritical_shape(shape, sizeof(T)); vfmav tmp(shape2); vector slc(ndim); for (size_t i=0; i slc(ndim); for (size_t i=0; i(this)->transpose(), *static_cast(this)); } }; template vfmav subarray (vfmav &arr, const vector &slices) { return arr.subarray(slices); } template class cmav: public mav_info, public cmembuf { protected: template friend class cmav; template friend class vmav; using tinfo = mav_info; using tbuf = cmembuf; using tinfo::shp, tinfo::str; public: using typename tinfo::shape_t; using typename tinfo::stride_t; using tbuf::raw, tbuf::data; using tinfo::contiguous, tinfo::size, tinfo::idx, tinfo::conformable; protected: cmav() {} cmav(const shape_t &shp_, uninitialized_dummy) : tinfo(shp_), tbuf(size(), UNINITIALIZED) {} cmav(const shape_t &shp_) : tinfo(shp_), tbuf(size()) {} cmav(const tbuf &buf, const shape_t &shp_, const stride_t &str_) : tinfo(shp_, str_), tbuf(buf) {} cmav(const tinfo &info, const T *d_, const tbuf &buf) : tinfo(info), tbuf(d_, buf) {} cmav(const tinfo &info, const tbuf &buf) : tinfo(info), tbuf(buf) {} public: cmav(const T *d_, const shape_t &shp_, const stride_t &str_) : tinfo(shp_, str_), tbuf(d_) {} cmav(const T *d_, const shape_t &shp_) : tinfo(shp_), tbuf(d_) {} cmav(const cfmav &inp) : tinfo(inp), tbuf(inp) {} void assign(const cmav &other) { mav_info::assign(other); cmembuf::assign(other); } operator cfmav() const { return cfmav(*this, {shp.begin(), shp.end()}, {str.begin(), str.end()}); } // Needed for template tricks. cfmav to_fmav() const { return operator cfmav(); } template const T &operator()(Ns... ns) const { return raw(idx(ns...)); } template cmav subarray(const vector &slices) const { auto [ninfo, nofs] = tinfo::template subdata (slices); return cmav (ninfo, tbuf::d+nofs, *this); } static cmav build_uniform(const shape_t &shape, const T &value) { // Don't do this at home! shape_t tshp; tshp.fill(1); cmav tmp(tshp); const_cast(tmp.raw(0)) = value; stride_t nstr; nstr.fill(0); return cmav(tmp, shape, nstr); } cmav transpose() const { return cmav(static_cast(this)->transpose(), *static_cast(this)); } cmav prepend_1() const { return cmav(static_cast(this)->prepend_1(), *static_cast(this)); } template cmav reinterpret (const typename cmav::shape_t &newshp, const typename cmav::stride_t &newstr) const { return cmav(*static_cast(this), newshp, newstr); } }; template cmav subarray (const cmav &arr, const vector &slices) { return arr.template subarray(slices); } template class vmav: public cmav { protected: template friend class vmav; using parent = cmav; using tinfo = mav_info; using tbuf = cmembuf; using tinfo::shp, tinfo::str; public: using typename tinfo::shape_t; using typename tinfo::stride_t; using tbuf::raw, tbuf::data; using tinfo::contiguous, tinfo::size, tinfo::idx, tinfo::conformable; protected: vmav(const tinfo &info, T *d_, tbuf &buf) : parent(info, d_, buf) {} vmav(const tinfo &info, tbuf &buf) : parent(info, buf) {} vmav(const tbuf &buf, const shape_t &shp_, const stride_t &str_) : parent(buf, shp_, str_){} public: vmav() {} vmav(T *d_, const shape_t &shp_, const stride_t &str_) : parent(d_, shp_, str_) {} vmav(T *d_, const shape_t &shp_) : parent(d_, shp_) {} vmav(const shape_t &shp_) : parent(shp_) {} vmav(const shape_t &shp_, uninitialized_dummy) : parent(shp_, UNINITIALIZED) {} vmav(vfmav &inp) : parent(inp) {} void assign(vmav &other) { parent::assign(other); } void dealloc() { vmav empty; assign(empty); } operator vfmav() { return vfmav(*this, {shp.begin(), shp.end()}, {str.begin(), str.end()}); } // Needed for template tricks. using cmav::to_fmav; vfmav to_fmav() { return operator vfmav(); } using parent::operator(); template T &operator()(Ns... ns) { return const_cast(parent::operator()(ns...)); } template vmav subarray(const vector &slices) { auto [ninfo, nofs] = tinfo::template subdata (slices); return vmav (ninfo, data()+nofs, *this); } using parent::data; T *data() { return const_cast(tbuf::d); } // read access to element #i using parent::raw; template T &raw(I i) { return data()[i]; } static vmav build_empty() { shape_t nshp; nshp.fill(0); return vmav(static_cast(nullptr), nshp); } static vmav build_noncritical(const shape_t &shape) { auto shape2 = noncritical_shape(shape, sizeof(T)); vmav tmp(shape2); vector slc(ndim); for (size_t i=0; i(slc); } static vmav build_noncritical(const shape_t &shape, uninitialized_dummy) { if (ndim<=1) return vmav(shape, UNINITIALIZED); auto shape2 = noncritical_shape(shape, sizeof(T)); vmav tmp(shape2, UNINITIALIZED); vector slc(ndim); for (size_t i=0; i(slc); } vmav transpose() { return vmav(static_cast(this)->transpose(), *static_cast(this)); } vmav prepend_1() { return vmav(static_cast(this)->prepend_1(), *static_cast(this)); } template vmav reinterpret (const typename vmav::shape_t &newshp, const typename vmav::stride_t &newstr) { return vmav(*static_cast(this), newshp, newstr); } }; template vmav subarray (vmav &arr, const vector &slices) { return arr.template subarray(slices); } // various operations involving fmav objects of the same shape -- experimental DUCC0_NOINLINE tuple, size_t, size_t> multiprep(const vector &info, const vector &tsizes); DUCC0_NOINLINE tuple> multiprep(const vector &info); template constexpr inline size_t tuplelike_size() { return tuple_size_v>; } template inline void call_with_tuple_impl(Func &&func, const Ttuple& tuple, index_sequence) { func(std::forward::type>(get(tuple))...); } template inline void call_with_tuple (Func &&func, Ttuple &&tuple) { call_with_tuple_impl(std::forward(func), tuple, make_index_sequence()>()); } template inline void call_with_tuple2_impl(Func &&func, const Ttuple& tuple, index_sequence) { func(get(tuple)...); } template inline void call_with_tuple2 (Func &&func, Ttuple &&tuple) { call_with_tuple2_impl(std::forward(func), tuple, make_index_sequence()>()); } template inline auto tuple_transform_impl(tuple const& inputs, Func &&func, index_sequence) { return tuple...>{func(get(inputs))...}; } template inline auto tuple_transform(tuple const& inputs, Func &&func) { return tuple_transform_impl(inputs, std::forward(func), make_index_sequence{}); } template inline void tuple_for_each_impl(tuple &tpl, Func &&func, index_sequence) { (func(get(tpl)), ...); } template inline void tuple_for_each(tuple &tpl, Func &&func) { tuple_for_each_impl(tpl, std::forward(func), make_index_sequence{}); } template inline void tuple_for_each_impl(const tuple &tpl, Func &&func, index_sequence) { (func(get(tpl)), ...); } template inline void tuple_for_each(const tuple &tpl, Func &&func) { tuple_for_each_impl(tpl, std::forward(func), make_index_sequence{}); } template inline auto tuple_transform_idx_impl(const tuple &inputs, Func &&func, index_sequence) { return tuple...> {func(get(inputs), Is)...}; } template inline auto tuple_transform_idx(const tuple &inputs, Func &&func) { return tuple_transform_idx_impl(inputs, std::forward(func), make_index_sequence{}); } template inline void tuple_for_each_idx_impl(tuple &tpl, Func &&func, index_sequence) { (func(get(tpl), Is), ...); } template inline void tuple_for_each_idx(tuple &tpl, Func &&func) { tuple_for_each_idx_impl(tpl, std::forward(func), make_index_sequence{}); } template inline auto to_ref (const Ttuple &tuple) { return tuple_transform(tuple,[](auto &&ptr) -> typename std::add_lvalue_reference_t{ return *ptr; }); } template inline Ttuple update_pointers (const Ttuple &ptrs, const vector> &str, size_t idim, size_t i) { return tuple_transform_idx(ptrs, [i,idim,&str](auto &&ptr, size_t idx) { return ptr + i*str[idx][idim]; }); } template inline Ttuple update_pointers_contiguous (const Ttuple &ptrs, size_t i) { return tuple_transform(ptrs, [i](auto &&ptr) { return ptr+i; }); } template inline void advance_contiguous (Ttuple &ptrs) { tuple_for_each(ptrs, [](auto &&ptr) { ++ptr; }); } template inline void advance (Ttuple &ptrs, const vector> &str, size_t idim) { tuple_for_each_idx(ptrs, [idim,&str](auto &&ptr, size_t idx) { ptr += str[idx][idim]; }); } template inline void advance_by_n (Ttuple &ptrs, const vector> &str, size_t idim, size_t n) { tuple_for_each_idx(ptrs, [idim,n,&str](auto &&ptr, size_t idx) { ptr += n*str[idx][idim]; }); } template DUCC0_NOINLINE void applyHelper_block(size_t idim, const vector &shp, const vector> &str, size_t bsi, size_t bsj, const Ttuple &ptrs, Func &&func) { auto leni=shp[idim], lenj=shp[idim+1]; size_t nbi = (leni+bsi-1)/bsi; size_t nbj = (lenj+bsj-1)/bsj; for (size_t bi=0; bi DUCC0_NOINLINE void applyHelper(size_t idim, const vector &shp, const vector> &str, size_t block0, size_t block1, const Ttuple &ptrs, Func &&func, bool last_contiguous) { auto len = shp[idim]; if ((idim+2==shp.size()) && (block0!=0)) // we should do blocking applyHelper_block(idim, shp, str, block0, block1, ptrs, func); else if (idim+1 inline void applyHelper(const vector &shp, const vector> &str, size_t block0, size_t block1, const Ttuple &ptrs, Func &&func, size_t nthreads, bool last_contiguous) { if (shp.size()==0) call_with_tuple(std::forward(func), to_ref(ptrs)); else if (nthreads==1) applyHelper(0, shp, str, block0, block1, ptrs, std::forward(func), last_contiguous); else execParallel(shp[0], nthreads, [&](size_t lo, size_t hi) { auto locptrs = update_pointers(ptrs, str, 0, lo); auto locshp(shp); locshp[0] = hi-lo; applyHelper(0, locshp, str, block0, block1, locptrs, func, last_contiguous); }); } template void mav_apply(Func &&func, int nthreads, Targs... args) { vector infos; (infos.push_back(args), ...); vector tsizes; (tsizes.push_back(sizeof(args.data()[0])), ...); auto [shp, str, block0, block1] = multiprep(infos, tsizes); bool last_contiguous = true; if (shp.size()>0) for (const auto &s:str) last_contiguous &= (s.back()==1); auto ptrs = tuple_transform(forward_as_tuple(args...), [](auto &&arg){return arg.data();}); applyHelper(shp, str, block0, block1, ptrs, std::forward(func), nthreads, last_contiguous); } DUCC0_NOINLINE tuple> multiprep_noopt(const vector &info); template inline void call_with_tuple_arg_impl(Func &&func, Arg &&arg, const Ttuple& tuple, index_sequence) { func(std::forward::type>(get(tuple))..., arg); } template inline void call_with_tuple_arg (Func &&func, Arg &&arg, Ttuple &&tuple) { call_with_tuple_arg_impl(std::forward(func), arg, tuple, make_index_sequence()>()); } template DUCC0_NOINLINE void applyHelper_with_index(size_t idim, const vector &shp, const vector> &str, const Ttuple &ptrs, Func &&func, vector &index) { auto len = shp[idim]; if (idim+1 &>(index), to_ref(locptrs)); index[idim] = idxbak; } } template inline void applyHelper_with_index(const vector &shp, const vector> &str, const Ttuple &ptrs, Func &&func, size_t nthreads, vector &index) { if (shp.size()==0) call_with_tuple_arg(std::forward(func), const_cast &>(index), to_ref(ptrs)); else if (nthreads==1) applyHelper_with_index(0, shp, str, ptrs, std::forward(func), index); else execParallel(shp[0], nthreads, [&](size_t lo, size_t hi) { auto locptrs = update_pointers(ptrs, str, 0, lo); auto locshp(shp); locshp[0] = hi-lo; auto locidx(index); locidx[0]=lo; applyHelper_with_index(0, locshp, str, locptrs, func, locidx); }); } template void mav_apply_with_index(Func &&func, int nthreads, Targs... args) { vector infos; (infos.push_back(args), ...); auto [shp, str] = multiprep_noopt(infos); vector index(shp.size(), 0); auto ptrs = tuple_transform(forward_as_tuple(args...), [](auto &&arg){return arg.data();}); applyHelper_with_index(shp, str, ptrs, std::forward(func), nthreads, index); } template class mavref { private: const mav_info &info; T *d; public: using shape_t = typename mav_info::shape_t; using stride_t = typename mav_info::stride_t; mavref(const mav_info &info_, T *d_) : info(info_), d(d_) {} template T &operator()(Ns... ns) const { return d[info.idx(ns...)]; } /// Returns the total number of entries in the object. size_t size() const { return info.size(); } /// Returns the shape of the object. const shape_t &shape() const { return info.shape(); } /// Returns the length along dimension \a i. size_t shape(size_t i) const { return info.shape(i); } /// Returns the strides of the object. const stride_t &stride() const { return info.stride(); } /// Returns the stride along dimension \a i. const ptrdiff_t &stride(size_t i) const { return info.stride(i); } /// Returns true iff the last dimension has stride 1. /** Typically used for optimization purposes. */ bool last_contiguous() const { return info.last_contiguous(); } /** Returns true iff the object is C-contiguous, i.e. if the stride of the * last dimension is 1, the stride for the next-to-last dimension is the * shape of the last dimension etc. */ bool contiguous() const { return info.contiguous(); } /// Returns true iff this->shape and \a other.shape match. bool conformable(const mavref &other) const { return shape()==other.shape(); } }; template mavref make_mavref(const mav_info &info_, T *d_) { return mavref(info_, d_); } template inline auto tuple_transform2_impl(const tuple &i1, const tuple &i2, Func &&func, index_sequence) { return tuple...>{func(get(i1),get(i2))...}; } template inline auto tuple_transform2(const tuple &i1, const tuple &i2, Func &&func) { return tuple_transform2_impl(i1, i2, std::forward(func), make_index_sequence{}); } template auto make_mavrefs(const Tptrs &ptrs, const Tinfos &infos) { return tuple_transform2(ptrs, infos, [](auto &&ptr, auto &&info) { return make_mavref(info, ptr); }); } template auto make_infos(const fmav_info &info) { if constexpr(ndim>0) MR_assert(ndim<=info.ndim(), "bad dimensionality"); auto iterdim = info.ndim()-ndim; fmav_info fout({info.shape().begin(),info.shape().begin()+iterdim}, {info.stride().begin(),info.stride().begin()+iterdim}); typename mav_info::shape_t shp; typename mav_info::stride_t str; if constexpr (ndim>0) // just to silence compiler warnings for (size_t i=0; i iout(shp, str); return make_tuple(fout, iout); } template DUCC0_NOINLINE void flexible_mav_applyHelper(size_t idim, const vector &shp, const vector> &str, const Tptrs &ptrs, const Tinfos &infos, Func &&func) { auto len = shp[idim]; auto locptrs(ptrs); if (idim+1 DUCC0_NOINLINE void flexible_mav_applyHelper(const vector &shp, const vector> &str, const Tptrs &ptrs, const Tinfos &infos, Func &&func, size_t nthreads) { if (shp.size()==0) call_with_tuple2(func, make_mavrefs(ptrs, infos)); else if (nthreads==1) flexible_mav_applyHelper(0, shp, str, ptrs, infos, std::forward(func)); else execParallel(shp[0], nthreads, [&](size_t lo, size_t hi) { auto locptrs = update_pointers(ptrs, str, 0, lo); auto locshp(shp); locshp[0] = hi-lo; flexible_mav_applyHelper(0, locshp, str, locptrs, infos, func); }); } template struct Xdim { static constexpr size_t dim=ndim; }; template void xflexible_mav_apply(const Ttuple &tuple, const Tdim &dim, Func &&func, size_t nthreads) { auto fullinfos = tuple_transform2(tuple, dim, [](const auto &arg, const auto &dim) { return make_infos::dim>(fmav_info(arg)); }); vector iter_infos; tuple_for_each(fullinfos,[&iter_infos](const auto &entry){iter_infos.push_back(get<0>(entry));}); auto [shp, str] = multiprep(iter_infos); auto infos2 = tuple_transform(fullinfos, [](const auto &arg) { return get<1>(arg); }); auto ptrs = tuple_transform(tuple, [](auto &&arg){return arg.data();}); flexible_mav_applyHelper(shp, str, ptrs, infos2, std::forward(func), nthreads); } template void flexible_mav_apply(Func &&func, size_t nthreads, T0 &&m0) { xflexible_mav_apply(forward_as_tuple(m0), forward_as_tuple(Xdim()), std::forward(func), nthreads); } template void flexible_mav_apply(Func &&func, size_t nthreads, T0 &&m0, T1 &&m1) { xflexible_mav_apply(forward_as_tuple(m0, m1), forward_as_tuple(Xdim(), Xdim()), std::forward(func), nthreads); } template void flexible_mav_apply(Func &&func, size_t nthreads, T0 &&m0, T1 &&m1, T2 &&m2) { xflexible_mav_apply(forward_as_tuple(m0, m1, m2), forward_as_tuple(Xdim(), Xdim(), Xdim()), std::forward(func), nthreads); } } using detail_mav::UNINITIALIZED; using detail_mav::fmav_info; using detail_mav::mav_info; using detail_mav::slice; using detail_mav::MAXIDX; using detail_mav::cfmav; using detail_mav::vfmav; using detail_mav::cmav; using detail_mav::vmav; using detail_mav::subarray; using detail_mav::mav_apply; using detail_mav::mav_apply_with_index; using detail_mav::flexible_mav_apply; } #endif