/* ----------------------------------------------------------------------------- * Programmer(s): Daniel McGreer, Cody Balos @ LLNL * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start * Copyright (c) 2002-2024, Lawrence Livermore National Security * and Southern Methodist University. * All rights reserved. * * See the top-level LICENSE and NOTICE files for details. * * SPDX-License-Identifier: BSD-3-Clause * SUNDIALS Copyright End * ----------------------------------------------------------------------------- * This is the header file an NVector implementation using Kokkos. * ---------------------------------------------------------------------------*/ #ifndef _NVECTOR_KOKKOS_HPP #define _NVECTOR_KOKKOS_HPP #include #include #include namespace sundials { namespace kokkos { // Forward declaration template class Vector; // Get the Kokkos vector wrapped by an N_Vector template inline VectorType* GetVec(N_Vector v) { return static_cast(v->content); } // ============================================================================= // Everything in the implementation (impl) namespace is private and should not // be referred to directly in user code. // ============================================================================= namespace impl { /* N_Vector API ops */ inline N_Vector_ID N_VGetVectorID_Kokkos(N_Vector v) { return SUNDIALS_NVEC_KOKKOS; } template sunindextype N_VGetLength_Kokkos(N_Vector v) { auto vec{GetVec(v)}; return static_cast(vec->Length()); } template sunrealtype* N_VGetArrayPointer_Kokkos(N_Vector v) { auto vec{GetVec(v)}; return vec->HostView().data(); } template sunrealtype* N_VGetDeviceArrayPointer_Kokkos(N_Vector v) { auto vec{GetVec(v)}; return vec->View().data(); } template N_Vector N_VClone_Kokkos(N_Vector w) { auto vec{GetVec(w)}; auto new_vec{new VectorType(*vec)}; return new_vec->Convert(); } template void N_VDestroy_Kokkos(N_Vector v) { auto vec{GetVec(v)}; delete vec; return; } template void N_VPrint_Kokkos(N_Vector v) { auto vec{GetVec(v)}; } template void N_VPrintFile_Kokkos(N_Vector v, FILE* outfile) { auto vec{GetVec(v)}; } /* OPTIONAL local reduction kernels (no parallel communication) */ template sunrealtype N_VWSqrSumLocal_Kokkos(N_Vector x, N_Vector w) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto wvec{GetVec(w)}; auto wdata{wvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{0.0}; Kokkos::parallel_reduce( "N_VWSqrSumLocal", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { update += (xdata(i) * wdata(i) * xdata(i) * wdata(i)); }, gpu_result); return gpu_result; } template sunrealtype N_VWSqrSumMaskLocal_Kokkos(N_Vector x, N_Vector w, N_Vector id) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto wvec{GetVec(w)}; auto wdata{wvec->View()}; auto idvec{GetVec(id)}; auto iddata{idvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{0.0}; Kokkos::parallel_reduce( "N_VWSqrSumMaskLocal", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { if (iddata(i) > sunrealtype{0.0}) update += (xdata(i) * wdata(i) * xdata(i) * wdata(i)); }, gpu_result); return gpu_result; } /* standard vector operations */ template void N_VAbs_Kokkos(N_Vector x, N_Vector z) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VAbs", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = std::abs(xdata(i)); }); } template void N_VAddConst_Kokkos(N_Vector x, sunrealtype b, N_Vector z) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VAddConst", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = xdata(i) + b; }); } template void N_VCompare_Kokkos(sunrealtype c, N_Vector x, N_Vector z) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VCompare", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = std::abs(xdata(i)) >= c ? sunrealtype{1.0} : sunrealtype{0.0}; }); } template void N_VConst_Kokkos(sunrealtype c, N_Vector z) { auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VConst", typename VectorType::range_policy(0, zvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = c; }); } template sunbooleantype N_VConstrMask_Kokkos(N_Vector c, N_Vector x, N_Vector m) { auto cvec{GetVec(c)}; auto cdata{cvec->View()}; auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto mvec{GetVec(m)}; auto mdata{mvec->View()}; using size_type = typename VectorType::size_type; sunrealtype sum{0.0}; Kokkos::parallel_reduce( "N_VConstrMask", typename VectorType::range_policy(0, mvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { bool test = (std::abs(cdata(i)) > sunrealtype{1.5} && cdata(i) * xdata(i) <= sunrealtype{0.0}) || (std::abs(cdata(i)) > sunrealtype{0.5} && cdata(i) * xdata(i) < sunrealtype{0.0}); mdata(i) = test ? sunrealtype{1.0} : sunrealtype{0.0}; update += mdata(i); }, sum); return (sum < sunrealtype{0.5}); } template void N_VDiv_Kokkos(N_Vector x, N_Vector y, N_Vector z) { auto xvec{GetVec(x)}; auto yvec{GetVec(y)}; auto zvec{GetVec(z)}; auto xdata{xvec->View()}; auto ydata{yvec->View()}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VDiv", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = xdata(i) / ydata(i); }); } template sunrealtype N_VDotProd_Kokkos(N_Vector x, N_Vector y) { auto xvec{GetVec(x)}; auto yvec{GetVec(y)}; auto xdata{xvec->View()}; auto ydata{yvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{0.0}; Kokkos::parallel_reduce( "N_VDotProd", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { update += xdata(i) * ydata(i); }, gpu_result); return gpu_result; } template void N_VInv_Kokkos(N_Vector x, N_Vector z) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VInv", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = sunrealtype{1.0} / xdata(i); }); } template sunbooleantype N_VInvTest_Kokkos(N_Vector x, N_Vector z) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; sunrealtype minimum{0.0}; Kokkos::parallel_reduce( "N_VInvTest", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { if (xdata(i) == sunrealtype{0.0}) { update += sunrealtype{1.0}; } else { zdata(i) = sunrealtype{1.0} / xdata(i); } }, minimum); return (minimum < sunrealtype{0.5}); } template sunrealtype N_VL1Norm_Kokkos(N_Vector x) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{0.0}; Kokkos::parallel_reduce( "N_VL1Norm", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { update += (std::abs(xdata(i))); }, gpu_result); return gpu_result; } template void N_VLinearSum_Kokkos(sunrealtype a, N_Vector x, sunrealtype b, N_Vector y, N_Vector z) { auto xvec{GetVec(x)}; auto yvec{GetVec(y)}; auto zvec{GetVec(z)}; auto xdata{xvec->View()}; auto ydata{yvec->View()}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VLinearSum", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = a * xdata(i) + b * ydata(i); }); } template sunrealtype N_VMaxNorm_Kokkos(N_Vector x) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{0.0}; Kokkos::parallel_reduce( "N_VMaxNorm", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { if (std::abs(xdata(i)) > update) update = std::abs(xdata(i)); }, Kokkos::Max(gpu_result)); return gpu_result; } template sunrealtype N_VMin_Kokkos(N_Vector x) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{std::numeric_limits::max()}; Kokkos::parallel_reduce( "N_VMin", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { if (xdata(i) < update) update = xdata(i); }, Kokkos::Min(gpu_result)); return gpu_result; } template sunrealtype N_VMinQuotient_Kokkos(N_Vector num, N_Vector denom) { auto nvec{GetVec(num)}; auto ndata{nvec->View()}; auto dvec{GetVec(denom)}; auto ddata{dvec->View()}; using size_type = typename VectorType::size_type; sunrealtype gpu_result{std::numeric_limits::max()}; Kokkos::parallel_reduce( "N_VMinQuotient", typename VectorType::range_policy(0, nvec->Length()), KOKKOS_LAMBDA(const size_type i, sunrealtype& update) { if (ddata(i) != sunrealtype{0.0}) { if ((ndata(i) / ddata(i)) < update) update = ndata(i) / ddata(i); } }, Kokkos::Min(gpu_result)); return gpu_result; } template void N_VProd_Kokkos(N_Vector x, N_Vector y, N_Vector z) { auto xvec{GetVec(x)}; auto yvec{GetVec(y)}; auto zvec{GetVec(z)}; auto xdata{xvec->View()}; auto ydata{yvec->View()}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VProd", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = xdata(i) * ydata(i); }); } template void N_VScale_Kokkos(sunrealtype c, N_Vector x, N_Vector z) { auto xvec{GetVec(x)}; auto xdata{xvec->View()}; auto zvec{GetVec(z)}; auto zdata{zvec->View()}; using size_type = typename VectorType::size_type; Kokkos::parallel_for( "N_VScale", typename VectorType::range_policy(0, xvec->Length()), KOKKOS_LAMBDA(const size_type i) { zdata(i) = c * xdata(i); }); } template sunrealtype N_VWL2Norm_Kokkos(N_Vector x, N_Vector w) { return std::sqrt(impl::N_VWSqrSumLocal_Kokkos(x, w)); } template sunrealtype N_VWrmsNorm_Kokkos(N_Vector x, N_Vector w) { auto xvec{GetVec(x)}; return std::sqrt(impl::N_VWSqrSumLocal_Kokkos(x, w) / static_cast(xvec->Length())); } template sunrealtype N_VWrmsNormMask_Kokkos(N_Vector x, N_Vector w, N_Vector id) { auto xvec{GetVec(x)}; return std::sqrt(impl::N_VWSqrSumMaskLocal_Kokkos(x, w, id) / static_cast(xvec->Length())); } } // namespace impl // ============================================================================= // Public namespace // ============================================================================= template class Vector : public sundials::impl::BaseNVector, public sundials::ConvertibleTo { public: using view_type = Kokkos::View; using size_type = typename view_type::size_type; using host_view_type = typename view_type::HostMirror; using memory_space = MemorySpace; using exec_space = typename MemorySpace::execution_space; using range_policy = Kokkos::RangePolicy; // Default constructor Vector() = default; Vector(size_type length, SUNContext sunctx) : sundials::impl::BaseNVector(sunctx), view_("Vector device view", length), host_view_(Kokkos::create_mirror_view(view_)) { initNvector(); } Vector(view_type view, SUNContext sunctx) : sundials::impl::BaseNVector(sunctx), view_(view), host_view_(Kokkos::create_mirror_view(view_)) { initNvector(); } Vector(view_type view, host_view_type host_view, SUNContext sunctx) : sundials::impl::BaseNVector(sunctx), view_(view), host_view_(host_view) { initNvector(); } // Move constructor Vector(Vector&& that_vector) noexcept : sundials::impl::BaseNVector(std::move(that_vector)), view_(std::move(that_vector.view_)), host_view_(std::move(that_vector.host_view_)) { initNvector(); } // Copy constructor Vector(const Vector& that_vector) : sundials::impl::BaseNVector(that_vector), view_("Vector device view", that_vector.Length()), host_view_(Kokkos::create_mirror_view(view_)) { initNvector(); } // Move assignment Vector& operator=(Vector&& rhs) noexcept { view_ = std::move(rhs.view_); host_view_ = std::move(rhs.host_view_); sundials::impl::BaseNVector::operator=(std::move(rhs)); return *this; } // Copy assignment Vector& operator=(const Vector& rhs) { view_ = Kokkos::View("Vector device view", rhs.Length()); host_view_ = Kokkos::create_mirror_view(view_); sundials::impl::BaseNVector::operator=(rhs); return *this; } // Default destructor virtual ~Vector() = default; // Accessors size_type Length() const { return static_cast(view_.extent(0)); } view_type View() { return view_; } host_view_type HostView() { return host_view_; } // Override ConvertibleTo operations operator N_Vector() override { return object_.get(); } operator N_Vector() const override { return object_.get(); } N_Vector Convert() override { return object_.get(); } N_Vector Convert() const override { return object_.get(); } private: view_type view_; host_view_type host_view_; void initNvector() { using this_type = Vector; this->object_->content = this; /* constructors, destructors, and utility operations */ this->object_->ops->nvclone = impl::N_VClone_Kokkos; this->object_->ops->nvdestroy = impl::N_VDestroy_Kokkos; this->object_->ops->nvgetarraypointer = impl::N_VGetArrayPointer_Kokkos; this->object_->ops->nvgetdevicearraypointer = impl::N_VGetDeviceArrayPointer_Kokkos; this->object_->ops->nvgetlength = impl::N_VGetLength_Kokkos; this->object_->ops->nvgetvectorid = impl::N_VGetVectorID_Kokkos; /* standard vector operations */ this->object_->ops->nvabs = impl::N_VAbs_Kokkos; this->object_->ops->nvaddconst = impl::N_VAddConst_Kokkos; this->object_->ops->nvcompare = impl::N_VCompare_Kokkos; this->object_->ops->nvconst = impl::N_VConst_Kokkos; this->object_->ops->nvconstrmask = impl::N_VConstrMask_Kokkos; this->object_->ops->nvdiv = impl::N_VDiv_Kokkos; this->object_->ops->nvdotprod = impl::N_VDotProd_Kokkos; this->object_->ops->nvinv = impl::N_VInv_Kokkos; this->object_->ops->nvinvtest = impl::N_VInvTest_Kokkos; this->object_->ops->nvl1norm = impl::N_VL1Norm_Kokkos; this->object_->ops->nvlinearsum = impl::N_VLinearSum_Kokkos; this->object_->ops->nvmaxnorm = impl::N_VMaxNorm_Kokkos; this->object_->ops->nvmin = impl::N_VMin_Kokkos; this->object_->ops->nvminquotient = impl::N_VMinQuotient_Kokkos; this->object_->ops->nvprod = impl::N_VProd_Kokkos; this->object_->ops->nvscale = impl::N_VScale_Kokkos; this->object_->ops->nvwl2norm = impl::N_VWL2Norm_Kokkos; this->object_->ops->nvwrmsnorm = impl::N_VWrmsNorm_Kokkos; this->object_->ops->nvwrmsnormmask = impl::N_VWrmsNormMask_Kokkos; /* local reduction operations */ this->object_->ops->nvconstrmasklocal = impl::N_VConstrMask_Kokkos; this->object_->ops->nvdotprodlocal = impl::N_VDotProd_Kokkos; this->object_->ops->nvinvtestlocal = impl::N_VInvTest_Kokkos; this->object_->ops->nvl1normlocal = impl::N_VL1Norm_Kokkos; this->object_->ops->nvmaxnormlocal = impl::N_VMaxNorm_Kokkos; this->object_->ops->nvminlocal = impl::N_VMin_Kokkos; this->object_->ops->nvminquotientlocal = impl::N_VMinQuotient_Kokkos; this->object_->ops->nvwsqrsumlocal = impl::N_VWSqrSumLocal_Kokkos; this->object_->ops->nvwsqrsummasklocal = impl::N_VWSqrSumMaskLocal_Kokkos; } }; template void CopyToDevice(N_Vector v) { auto vec{GetVec(v)}; CopyToDevice(*vec); } template void CopyFromDevice(N_Vector v) { auto vec{GetVec(v)}; CopyFromDevice(*vec); } template void CopyToDevice(VectorType& v) { Kokkos::deep_copy(v.View(), v.HostView()); } template void CopyFromDevice(VectorType& v) { Kokkos::deep_copy(v.HostView(), v.View()); } template view_type GetView(N_Vector v) { auto vec{GetVec(v)}; return vec->View(); } template host_view_type GetHostView(N_Vector v) { auto vec{GetVec(v)}; return vec->HostView(); } } // namespace kokkos } // namespace sundials #endif