// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced // at the Lawrence Livermore National Laboratory. All Rights reserved. See files // LICENSE and NOTICE for details. LLNL-CODE-806117. // // This file is part of the MFEM library. For more information and source code // availability visit https://mfem.org. // // MFEM is free software; you can redistribute it and/or modify it under the // terms of the BSD-3 license. We welcome feedback and contributions, see file // CONTRIBUTING.md for details. #ifndef MFEM_TEMPLATE_BILINEAR_FORM #define MFEM_TEMPLATE_BILINEAR_FORM #include "../config/tconfig.hpp" #include "../linalg/simd.hpp" #include "../linalg/ttensor.hpp" #include "bilinearform.hpp" #include "tevaluator.hpp" #include "teltrans.hpp" #include "tcoefficient.hpp" #include "fespace.hpp" namespace mfem { /** @brief Templated bilinear form class, cf. bilinearform.?pp @tparam meshType typically TMesh, which is templated on FE type @tparam solFESpace eg. H1_FiniteElementSpace @tparam IR integration rule, typically TIntegrationRule, which is further templated on element geometry @tparam IntegratorType typically a TIntegrator, which is templated on a kernel, eg. TDiffusionKernel or TMassKernel. This describes what actual problem you solve. @tparam solVecLayout_t describes how degrees of freedom are laid out, scalar or vector, column/row major, etc. @tparam complex_t data type for solution dofs @tparam real_t data type for mesh nodes, solution basis, and mesh basis */ template > class TBilinearForm : public Operator { public: typedef impl_traits_t impl_traits_type; protected: typedef complex_t complex_type; typedef real_t real_type; typedef typename meshType::FE_type meshFE_type; typedef ShapeEvaluator meshShapeEval; typedef typename solFESpace::FE_type solFE_type; typedef ShapeEvaluator solShapeEval; typedef solVecLayout_t solVecLayout_type; static const int dim = meshType::dim; static const int sdim = meshType::space_dim; static const int dofs = solFE_type::dofs; static const int vdim = solVecLayout_t::vec_dim; static const int qpts = IR::qpts; static const int AB = impl_traits_t::align_bytes; static const int SS = impl_traits_t::simd_size; static const int BE = impl_traits_t::batch_size; static const int TE = SS*BE; typedef typename impl_traits_t::vcomplex_t vcomplex_t; typedef typename impl_traits_t::vreal_t vreal_t; /// @name IntegratorType defines several internal types ///@{ typedef IntegratorType integ_t; /// coeff_t might be TConstantCoefficient or TFunctionCoefficient, for example typedef typename integ_t::coefficient_type coeff_t; /// kernel_t may be TDiffusionKernel or TMassKernel typedef typename integ_t::template kernel::type kernel_t; /// p_assembled_t is something like a TTensor or TMatrix for partial assembly typedef typename kernel_t::template p_asm_data::type p_assembled_t; /// f_assembled_t is something like a TTensor or TMatrix for full assembly typedef typename kernel_t::template f_asm_data::type f_assembled_t; ///@} typedef typename kernel_t::template CoefficientEval::Type coeff_eval_t; typedef TElementTransformation Trans_t; struct T_result { static const int EvalOps = Trans_t::template Get::EvalOps; typedef typename Trans_t::template Result Type; }; typedef FieldEvaluator solFieldEval; /** @brief Contains matrix sizes, type of kernel (ElementMatrix is templated on a kernel, e.g. ElementMatrix::Compute may be AssembleGradGrad()). */ struct S_spec { typedef typename solFieldEval::template Spec Spec; typedef typename Spec::DataType DataType; typedef typename Spec::ElementMatrix ElementMatrix; }; // Data members meshType mesh; meshShapeEval meshEval; solFE_type sol_fe; solShapeEval solEval; mutable solFESpace solFES; solVecLayout_t solVecLayout; IR int_rule; coeff_t coeff; Memory assembled_data; const FiniteElementSpace &in_fes; public: TBilinearForm(const IntegratorType &integ, const FiniteElementSpace &sol_fes) : Operator(sol_fes.GetNDofs()*vdim), mesh(*sol_fes.GetMesh()), meshEval(mesh.fe), sol_fe(*sol_fes.FEColl()), solEval(sol_fe), solFES(sol_fe, sol_fes), solVecLayout(sol_fes), int_rule(), coeff(integ.coeff), assembled_data(), in_fes(sol_fes) { assembled_data.Reset(AB == 64 ? MemoryType::HOST_64 : AB == 32 ? MemoryType::HOST_32 : MemoryType::HOST); } virtual ~TBilinearForm() { assembled_data.Delete(); } /// Get the input finite element space prolongation matrix virtual const Operator *GetProlongation() const { return ((FiniteElementSpace &)in_fes).GetProlongationMatrix(); } /// Get the input finite element space restriction matrix virtual const Operator *GetRestriction() const { return ((FiniteElementSpace &)in_fes).GetRestrictionMatrix(); } virtual void Mult(const Vector &x, Vector &y) const { if (!assembled_data.Empty()) { MultAssembled(x, y); } else { MultUnassembled(x, y); } } // complex_t = double void MultUnassembled(const Vector &x, Vector &y) const { y = 0.0; // For better performance, create stack copies of solFES, and solEval // inside 'solFEval'. The element-transformation 'T' also copies the // meshFES, meshEval, etc internally. // Is performance actually better with this implementation? Trans_t T(mesh, meshEval); solFieldEval solFEval(solFES, solEval, solVecLayout, x.GetData(), y.GetData()); coeff_eval_t wQ(int_rule, coeff); const int NE = mesh.GetNE(); for (int el = 0; el < NE; el += TE) { #if 0 typename S_spec::DataType R; solFEval.Eval(el, R); typename T_result::Type F; T.Eval(el, F); #else typename T_result::Type F; T.Eval(el, F); typename S_spec::DataType R; solFEval.Eval(el, R); #endif typename coeff_eval_t::result_t res; wQ.Eval(F, res); for (int k = 0; k < BE; k++) { kernel_t::Action(k, F, wQ, res, R); } solFEval.template Assemble(R); } } /// Partial assembly of quadrature point data void Assemble() { Trans_t T(mesh, meshEval); coeff_eval_t wQ(int_rule, coeff); const int NE = mesh.GetNE(); if (assembled_data.Empty()) { const int size = ((NE+TE-1)/TE)*BE; assembled_data.New(size, assembled_data.GetMemoryType()); } for (int el = 0; el < NE; el += TE) { typename T_result::Type F; T.Eval(el, F); typename coeff_eval_t::result_t res; wQ.Eval(F, res); for (int k = 0; k < BE; k++) { kernel_t::Assemble(k, F, wQ, res, assembled_data[el/SS+k]); } } } inline MFEM_ALWAYS_INLINE void ElementAddMultAssembled(int el, solFieldEval &solFEval) const { typename S_spec::DataType R; solFEval.Eval(el, R); for (int k = 0; k < BE; k++) { kernel_t::MultAssembled(k, assembled_data[el/SS+k], R); } solFEval.template Assemble(R); } // complex_t = double void MultAssembled(const Vector &x, Vector &y) const { y = 0.0; solFieldEval solFEval(solFES, solEval, solVecLayout, x.GetData(), y.GetData()); const int NE = mesh.GetNE(); for (int el = 0; el < NE; el += TE) { ElementAddMultAssembled(el, solFEval); } } #ifdef MFEM_TEMPLATE_ENABLE_SERIALIZE // complex_t = double void TestElementwiseExtractAssemble(const Vector &x, Vector &y) const { y = 0.0; solVecLayout_type solVecLayoutLoc(this->solVecLayout); solFESpace solFESLoc(this->solFES); TTensor3 xy_dof; const int NE = mesh.GetNE(); for (int el = 0; el < NE; el += TE) { solFESLoc.SetElement(el); solFESLoc.VectorExtract(solVecLayoutLoc, x, xy_dof.layout, xy_dof); solFESLoc.VectorAssemble(xy_dof.layout, xy_dof, solVecLayoutLoc, y); } } // real_t = double void SerializeNodes(Vector &sNodes) const { typedef typename meshType::FESpace_type meshFESpace; meshFESpace meshFES(mesh.t_fes); typedef TTensor3 lnodes_t; const int NE = mesh.GetNE(); // TODO: How do we make sure that this array is aligned properly, AND the // compiler knows that it is aligned? => ALIGN_32|ALIGN_64 when ready const int NVE = (NE+TE-1)/TE; vreal_t *vsNodes = new vreal_t[lnodes_t::size*NVE]; sNodes.NewDataAndSize(vsNodes[0].vec, (lnodes_t::size*SS)*NVE); sNodes.MakeDataOwner(); for (int el = 0; el < NE; el += TE) { meshFES.SetElement(el); meshFES.VectorExtract(mesh.node_layout, mesh.Nodes, lnodes_t::layout, vsNodes); vsNodes += lnodes_t::size; } } /// Partial assembly from "serialized" nodes // real_t = double void AssembleFromSerializedNodes(const Vector &sNodes) { Trans_t T(mesh, meshEval); coeff_eval_t wQ(int_rule, coeff); const int NE = mesh.GetNE(); if (assembled_data.Empty()) { const int size = ((NE+TE-1)/TE)*BE; assembled_data.New(size, assembled_data.GetMemoryType()); } const vreal_t *vsNodes = (const vreal_t*)(sNodes.GetData()); for (int el = 0; el < NE; el += TE) { typename T_result::Type F; T.EvalSerialized(el, vsNodes, F); typename coeff_eval_t::result_t res; wQ.Eval(F, res); for (int k = 0; k < BE; k++) { kernel_t::Assemble(k, F, wQ, res, assembled_data[el/SS+k]); } } } // complex_t = double void Serialize(const Vector &x, Vector &sx) const { typedef TTensor3 vdof_data_t; solVecLayout_t solVecLayoutLoc(this->solVecLayout); solFESpace solFESLoc(this->solFES); const int NE = mesh.GetNE(); // TODO: How do we make sure that this array is aligned properly, AND // the compiler knows that it is aligned? => ALIGN_32|ALIGN_64 when ready const int NVE = (NE+TE-1)/TE; vreal_t *vsx = new vreal_t[vdof_data_t::size*NVE]; sx.NewDataAndSize(vsx[0].vec, (vdof_data_t::size*SS)*NVE); sx.MakeDataOwner(); for (int el = 0; el < NE; el += TE) { solFESLoc.SetElement(el); solFESLoc.VectorExtract(solVecLayoutLoc, x, vdof_data_t::layout, vsx); vsx += vdof_data_t::size; } } /// serialized vector sx --> serialized vector 'sy' // complex_t = double void MultAssembledSerialized(const Vector &sx, Vector &sy) const { solFieldEval solFEval(solFES, solEval, solVecLayout, NULL, NULL); const int NE = mesh.GetNE(); const vreal_t *vsx = (const vreal_t*)(sx.GetData()); vreal_t *vsy = (vreal_t*)(sy.GetData()); for (int el = 0; el < NE; el += TE) { typename S_spec::DataType R; solFEval.EvalSerialized(vsx, R); for (int k = 0; k < BE; k++) { kernel_t::MultAssembled(k, assembled_data[el/SS+k], R); } solFEval.template AssembleSerialized(R, vsy); vsx += vdim*dofs*BE; vsy += vdim*dofs*BE; } } #endif // MFEM_TEMPLATE_ENABLE_SERIALIZE /// Assemble the operator in a SparseMatrix. // complex_t = double void AssembleMatrix(SparseMatrix &M) const { Trans_t T(mesh, meshEval); solFESpace solFESLoc(this->solFES); solShapeEval solEvalLoc(this->solEval); solVecLayout_t solVecLayoutLoc(this->solVecLayout); coeff_eval_t wQ(int_rule, coeff); const int NE = mesh.GetNE(); for (int el = 0; el < NE; el += TE) { f_assembled_t asm_qpt_data[BE]; { typename T_result::Type F; T.Eval(el, F); typename coeff_eval_t::result_t res; wQ.Eval(F, res); for (int k = 0; k < BE; k++) { kernel_t::Assemble(k, F, wQ, res, asm_qpt_data[k]); } } // For now, when vdim > 1, assume block-diagonal matrix with the same // diagonal block for all components. for (int k = 0; k < BE; k++) { const int el_k = el+SS*k; if (el_k >= NE) { break; } TMatrix M_loc; S_spec::ElementMatrix::Compute( asm_qpt_data[k].layout, asm_qpt_data[k], M_loc.layout, M_loc, solEvalLoc); solFESLoc.SetElement(el_k); for (int bi = 0; bi < vdim; bi++) { solFESLoc.AssembleBlock(bi, bi, solVecLayoutLoc, M_loc, M); } } } } /// Assemble element matrices and store them as a DenseTensor object. // complex_t = double void AssembleMatrix(DenseTensor &M) const { Trans_t T(mesh, meshEval); solShapeEval solEvalLoc(this->solEval); coeff_eval_t wQ(int_rule, coeff); const int NE = mesh.GetNE(); for (int el = 0; el < NE; el += TE) { f_assembled_t asm_qpt_data[BE]; { typename T_result::Type F; T.Eval(el, F); typename coeff_eval_t::result_t res; wQ.Eval(F, res); for (int k = 0; k < BE; k++) { kernel_t::Assemble(k, F, wQ, res, asm_qpt_data[k]); } } // For now, when vdim > 1, assume block-diagonal matrix with the same // diagonal block for all components. // M is assumed to be (dof x dof x NE). for (int k = 0; k < BE; k++) { const int el_k = el+SS*k; if (el_k >= NE) { break; } TMatrix M_loc; S_spec::ElementMatrix::Compute( asm_qpt_data[k].layout, asm_qpt_data[k], M_loc.layout, M_loc, solEvalLoc); for (int s = 0; s < SS && el_k+s < NE; s++) { complex_t *M_data = M.GetData(el_k+s); for (int j = 0; j < dofs; j++) { for (int i = 0; i < dofs; i++) { M_data[j+dofs*i] = M_loc(i,j)[s]; } } } } } } /// Assemble element matrices and add them to the bilinear form // complex_t = double void AssembleBilinearForm(BilinearForm &a) const { Trans_t T(mesh, meshEval); solShapeEval solEvalLoc(this->solEval); coeff_eval_t wQ(int_rule, coeff); Array vdofs; const Array *dof_map = sol_fe.GetDofMap(); const int *dof_map_ = dof_map->GetData(); DenseMatrix M_loc_perm(dofs*vdim,dofs*vdim); // initialized with zeros const int NE = mesh.GetNE(); for (int el = 0; el < NE; el += TE) { f_assembled_t asm_qpt_data[BE]; { typename T_result::Type F; T.Eval(el, F); typename coeff_eval_t::result_t res; wQ.Eval(F, res); for (int k = 0; k < BE; k++) { kernel_t::Assemble(k, F, wQ, res, asm_qpt_data[k]); } } // For now, when vdim > 1, assume block-diagonal matrix with the same // diagonal block for all components. for (int k = 0; k < BE; k++) { const int el_k = el+SS*k; if (el_k >= NE) { break; } TMatrix M_loc; S_spec::ElementMatrix::Compute( asm_qpt_data[k].layout, asm_qpt_data[k], M_loc.layout, M_loc, solEvalLoc); if (dof_map) // switch from tensor-product ordering { for (int s = 0; s < SS && el_k+s < NE; s++) { for (int i = 0; i < dofs; i++) { for (int j = 0; j < dofs; j++) { M_loc_perm(dof_map_[i],dof_map_[j]) = M_loc(i,j)[s]; } } for (int bi = 1; bi < vdim; bi++) { M_loc_perm.CopyMN(M_loc_perm, dofs, dofs, 0, 0, bi*dofs, bi*dofs); } a.AssembleElementMatrix(el_k+s, M_loc_perm, vdofs); } } else if (SS == 1) { DenseMatrix DM(M_loc.data[0].vec, dofs, dofs); if (vdim == 1) { a.AssembleElementMatrix(el_k, DM, vdofs); } else { for (int bi = 0; bi < vdim; bi++) { M_loc_perm.CopyMN(DM, dofs, dofs, 0, 0, bi*dofs, bi*dofs); } a.AssembleElementMatrix(el_k, M_loc_perm, vdofs); } } else { for (int s = 0; s < SS && el_k+s < NE; s++) { for (int i = 0; i < dofs; i++) { for (int j = 0; j < dofs; j++) { M_loc_perm(i,j) = M_loc(i,j)[s]; } } for (int bi = 1; bi < vdim; bi++) { M_loc_perm.CopyMN(M_loc_perm, dofs, dofs, 0, 0, bi*dofs, bi*dofs); } a.AssembleElementMatrix(el_k+s, M_loc_perm, vdofs); } } } } } /// Multiplication using assembled element matrices stored as a DenseTensor. // complex_t = double void AddMult(DenseTensor &M, const Vector &x, Vector &y) const { // For now, when vdim > 1, assume block-diagonal matrix with the same // diagonal block for all components. // M is assumed to be (dof x dof x NE). solVecLayout_t solVecLayoutLoc(this->solVecLayout); const int NE = mesh.GetNE(); for (int el = 0; el < NE; el++) { TTensor3 > x_dof, y_dof; solFES.SetElement(el); solFES.VectorExtract(solVecLayoutLoc, x, x_dof.layout, x_dof); Mult_AB(TMatrix::layout, M(el).Data(), x_dof.layout.merge_23(), x_dof, y_dof.layout.merge_23(), y_dof); solFES.VectorAssemble(y_dof.layout, y_dof, solVecLayoutLoc, y); } } using Operator::AddMult; }; } // namespace mfem #endif // MFEM_TEMPLATE_BILINEAR_FORM