/* * This code is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this code; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* Copyright (C) 2020-2022 Max-Planck-Society Author: Martin Reinecke */ #ifndef DUCC0_GRIDDING_KERNEL_H #define DUCC0_GRIDDING_KERNEL_H #include #include #include #include #include #include #include #include #include #include "ducc0/infra/useful_macros.h" #include "ducc0/infra/error_handling.h" #include "ducc0/infra/threading.h" #include "ducc0/math/gl_integrator.h" #include "ducc0/math/constants.h" namespace ducc0 { namespace detail_gridding_kernel { using namespace std; vector getCoeffs(size_t W, size_t D, const function &func); /*! A GriddingKernel is considered to be a symmetric real-valued function defined on the interval [-1; 1]. This range is subdivided into W equal-sized parts. */ class GriddingKernel { public: virtual ~GriddingKernel() {} virtual size_t support() const = 0; /* Computes the correction function at a given coordinate. Useful coordinates lie in the range [0; 0.5]. */ virtual double corfunc(double x) const = 0; /* Computes the correction function values at a coordinates [0, dx, 2*dx, ..., (n-1)*dx] */ virtual vector corfunc(size_t n, double dx, int nthreads=1) const = 0; /* Returns the kernel's value at x */ virtual double eval(double x) const = 0; }; class KernelCorrection { protected: vector x, wgtpsi; size_t supp; public: /* Compute correction factors for gridding kernel This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ template [[gnu::always_inline]] T corfunc(T v) const { T tmp=0; for (size_t i=0; i corfunc(size_t n, double dx, int nthreads=1) const { vector res(n); // The commented lines would add vectorization support, // but this doesn't appear beneficial. // constexpr size_t vlen = native_simd::size(); // native_simd itimesdx; // for (size_t i=0; i &X() const { return x; } const vector &Wgtpsi() const { return wgtpsi; } size_t Supp() const { return supp; } }; class GLFullCorrection: public KernelCorrection { public: GLFullCorrection(size_t W, const function &func) { supp = W; size_t p = size_t(1.5*W)+2; GL_Integrator integ(2*p); x = integ.coordsSymmetric(); wgtpsi = integ.weightsSymmetric(); for (size_t i=0; i coeff; KernelCorrection corr; public: PolynomialKernel(size_t W_, size_t D_, const function &func, const KernelCorrection &corr_) : W(W_), D(D_), coeff(getCoeffs(W_, D_, func)), corr(corr_) {} virtual size_t support() const { return W; } virtual double corfunc(double x) const { return corr.corfunc(x); } /* Computes the correction function values at a coordinates [0, dx, 2*dx, ..., (n-1)*dx] */ virtual vector corfunc(size_t n, double dx, int nthreads=1) const { return corr.corfunc(n, dx, nthreads); } const vector &Coeff() const { return coeff; } size_t degree() const { return D; } const KernelCorrection &Corr() const { return corr; } double eval(double x) const { if (abs(x)>=1) return 0.; double xrel = W*0.5*(x+1.); size_t nth = size_t(xrel); nth = min(nth, W-1); double locx = ((xrel-nth)-0.5)*2; // should be in [-1; 1] double res = coeff[nth]; for (size_t i=1; i<=D; ++i) res = res*locx+coeff[i*W+nth]; return res; } }; /*! This class is initialized with a \a PolynomialKernel object and provides low level methods for extremely fast kernel evaluations. */ template class TemplateKernel { private: static constexpr auto D=W+3+(W&1); using T = typename Tsimd::value_type; using Tvl = typename Tsimd::Tv; static constexpr auto vlen = Tsimd::size(); static constexpr auto nvec = (W+vlen-1)/vlen; std::array coeff; const T *scoeff; static constexpr auto sstride = nvec*vlen; void transferCoeffs(const vector &input, size_t d_input) { auto ofs = D-d_input; if (ofs>0) for (size_t i=0; i(&coeff[0])) { MR_assert(W==krn.support(), "support mismatch"); MR_assert(D>=krn.degree(), "degree mismatch"); transferCoeffs(krn.Coeff(), krn.degree()); } constexpr size_t support() const { return W; } double eval(double x) const { if (abs(x)>=1) return 0.; double xrel = W*0.5*(x+1.); size_t nth = size_t(xrel); nth = min(nth, W-1); double locx = ((xrel-nth)-0.5)*2; // should be in [-1; 1] double res = scoeff[nth]; for (size_t i=1; i<=D; ++i) res = res*locx+scoeff[i*sstride+nth]; return res; } [[gnu::always_inline]] void eval2s(T x, T y, T z, size_t nth, Tsimd * DUCC0_RESTRICT res) const { z = (z-nth)*2+(W-1); T x2=x*x, y2=y*y, z2=z*z; if constexpr (nvec==1) { Tvl tvalx = coeff[0], tvaly = coeff[0], tvalz = coeff[0]; Tvl tvalx2 = coeff[1], tvaly2 = coeff[1], tvalz2 = coeff[1]; for (size_t j=2; j selectKernel(size_t idx); const KernelParams &getKernel(size_t idx); template constexpr inline size_t Wmax() { return is_same::value ? 8 : 16; } extern const vector KernelDB; /*! Returns the 2-parameter ES kernel for the given oversampling factor, * dimensionality, and error that has the smallest support. */ template auto selectKernel(double ofactor, size_t ndim, double epsilon) { constexpr bool singleprec = is_same::value; size_t Wmin = Wmax(); size_t idx = KernelDB.size(); for (size_t i=0; i auto getAvailableKernels(double epsilon, size_t ndim, double ofactor_min=1.1, double ofactor_max=2.6) { constexpr bool singleprec = is_same::value; vector ofc(20, ofactor_max); vector idx(20, KernelDB.size()); size_t Wlim = Wmax(); for (size_t i=0; i=ofactor_min)) { ofc[W] = ofactor; idx[W] = i; } } vector res; for (auto v: idx) if (v