// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced // at the Lawrence Livermore National Laboratory. All Rights reserved. See files // LICENSE and NOTICE for details. LLNL-CODE-806117. // // This file is part of the MFEM library. For more information and source code // availability visit https://mfem.org. // // MFEM is free software; you can redistribute it and/or modify it under the // terms of the BSD-3 license. We welcome feedback and contributions, see file // CONTRIBUTING.md for details. #include "lor_rt.hpp" #include "lor_util.hpp" #include "../../linalg/dtensor.hpp" #include "../../general/forall.hpp" namespace mfem { template void BatchedLOR_RT::Assemble2D() { const int nel_ho = fes_ho.GetNE(); static constexpr int nv = 4; static constexpr int ne = 4; static constexpr int dim = 2; static constexpr int ddm2 = (dim*(dim+1))/2; static constexpr int ngeom = ddm2 + 1; static constexpr int o = ORDER; static constexpr int op1 = ORDER + 1; static constexpr int ndof_per_el = dim*o*op1; static constexpr int nnz_per_row = 7; static constexpr int sz_local_mat = ne*ne; const bool const_mq = c1.Size() == 1; const auto MQ = const_mq ? Reshape(c1.Read(), 1, 1, 1) : Reshape(c1.Read(), op1, op1, nel_ho); const bool const_dq = c2.Size() == 1; const auto DQ = const_dq ? Reshape(c2.Read(), 1, 1, 1) : Reshape(c2.Read(), op1, op1, nel_ho); sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho); auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1, dim, nel_ho); auto X = X_vert.Read(); mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho) { MFEM_FOREACH_THREAD(iy,y,o) { MFEM_FOREACH_THREAD(ix,x,op1) { for (int c=0; c<2; ++c) { for (int j=0; j Q(Q_, ngeom, 2, 2); DeviceTensor<2> local_mat(local_mat_, ne, ne); // local_mat is the local (dense) stiffness matrix for (int i=0; i(X, iel_ho, kx, ky, vx, vy); for (int iqx=0; iqx<2; ++iqx) { for (int iqy=0; iqy<2; ++iqy) { const double x = iqx; const double y = iqy; const double w = 1.0/4.0; double J_[2*2]; DeviceTensor<2> J(J_, 2, 2); Jacobian2D(x, y, vx, vy, J); const double detJ = Det2D(J); const double w_detJ = w/detJ; Q(0,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0)); // 1,1 Q(1,iqy,iqx) = w_detJ * (J(0,0)*J(0,1) + J(1,0)*J(1,1)); // 1,2 Q(2,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1)); // 2,2 Q(3,iqy,iqx) = w_detJ; } } for (int iqx=0; iqx<2; ++iqx) { for (int iqy=0; iqy<2; ++iqy) { const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho); const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho); // Loop over x,y components. c=0 => x, c=1 => y for (int cj=0; cj ii_loc) { continue; } double val = 0.0; val += bxi*bxj*Q(0,iqy,iqx); val += byi*bxj*Q(1,iqy,iqx); val += bxi*byj*Q(1,iqy,iqx); val += byi*byj*Q(2,iqy,iqx); val *= mq; val += dq*div_j*div_i*Q(3,iqy,iqx); local_mat(ii_loc, jj_loc) += val; } } } } } } // Assemble the local matrix into the macro-element sparse matrix // in a format similar to coordinate format. The (I,J) arrays // are implicit (not stored explicitly). for (int ii_loc=0; ii_loc 0) ? i0-1 : i0; const int j0_end = (cj_rel == 0) ? ((i0 < o) ? i0+1 : i0) : ((i0 < o) ? i0 : i0-1); const int j1_begin = i1; const int j1_end = (cj_rel == 0) ? i1 : i1+1; for (int j0=j0_begin; j0<=j0_end; ++j0) { const int d0 = 1 + j0 - i0; for (int j1=j1_begin; j1<=j1_end; ++j1) { const int d1 = j1 - i1; int jj_lex[2]; jj_lex[id0] = j0; jj_lex[id1] = j1; const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj; const int jj_off = (cj_rel == 0) ? d0 : 3 + d0 + 2*d1; map(jj_off, ii_el) = jj_el; } } } } } } } template void BatchedLOR_RT::Assemble3D() { const int nel_ho = fes_ho.GetNE(); static constexpr int nv = 8; // number of vertices in hexahedron static constexpr int nf = 6; // number of faces in hexahedron static constexpr int dim = 3; static constexpr int ddm2 = (dim*(dim+1))/2; static constexpr int ngeom = ddm2 + 1; // number of geometric factors stored static constexpr int o = ORDER; static constexpr int op1 = ORDER + 1; static constexpr int ndof_per_el = dim*o*o*op1; static constexpr int nnz_per_row = 11; static constexpr int sz_local_mat = nf*nf; const bool const_mq = c1.Size() == 1; const auto MQ = const_mq ? Reshape(c1.Read(), 1, 1, 1, 1) : Reshape(c1.Read(), op1, op1, op1, nel_ho); const bool const_dq = c2.Size() == 1; const auto DQ = const_dq ? Reshape(c2.Read(), 1, 1, 1, 1) : Reshape(c2.Read(), op1, op1, op1, nel_ho); sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho); auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*o*op1, dim, nel_ho); auto X = X_vert.Read(); // Last thread dimension is lowered to avoid "too many resources" error mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER, [=] MFEM_HOST_DEVICE (int iel_ho) { MFEM_FOREACH_THREAD(iz,z,o) { MFEM_FOREACH_THREAD(iy,y,o) { MFEM_FOREACH_THREAD(ix,x,op1) { for (int c=0; c Q(Q_, ngeom, 2, 2, 2); double local_mat_[sz_local_mat]; DeviceTensor<2> local_mat(local_mat_, nf, nf); for (int i=0; i(X, iel_ho, kx, ky, kz, vx, vy, vz); for (int iqz=0; iqz<2; ++iqz) { for (int iqy=0; iqy<2; ++iqy) { for (int iqx=0; iqx<2; ++iqx) { const double x = iqx; const double y = iqy; const double z = iqz; const double w = 1.0/8.0; double J_[3*3]; DeviceTensor<2> J(J_, 3, 3); Jacobian3D(x, y, z, vx, vy, vz, J); const double detJ = Det3D(J); const double w_detJ = w/detJ; Q(0,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0)); // 1,1 Q(1,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,0)+J(1,1)*J(1,0)+J(2,1)*J(2,0)); // 2,1 Q(2,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,0)+J(1,2)*J(1,0)+J(2,2)*J(2,0)); // 3,1 Q(3,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1)); // 2,2 Q(4,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,1)+J(1,2)*J(1,1)+J(2,2)*J(2,1)); // 3,2 Q(5,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2)); // 3,3 Q(6,iqz,iqy,iqx) = w_detJ; } } } for (int iqz=0; iqz<2; ++iqz) { for (int iqy=0; iqy<2; ++iqy) { for (int iqx=0; iqx<2; ++iqx) { const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho); const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho); // Loop over x,y,z components. 0 => x, 1 => y, 2 => z for (int cj=0; cj ii_loc) { continue; } const double div_div = Q(6,iqz,iqy,iqx)*div_i*div_j; double basis_basis = 0.0; basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0]; basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]); basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]); basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1]; basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]); basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2]; const double val = dq*div_div + mq*basis_basis; // const double val = 1.0; local_mat(ii_loc, jj_loc) += val; } } } } } } } // Assemble the local matrix into the macro-element sparse matrix // The nonzeros of the macro-element sparse matrix are ordered as // follows: // // The axes are ordered relative to the direction of the basis // vector, e.g. for x-vectors, the axes are (x,y,z), for // y-vectors the axes are (y,z,x), and for z-vectors the axes are // (z,x,y). // // The nonzeros are then given in "rotated lexicographic" // ordering, according to these axes. for (int ii_loc=0; ii_loc 0) ? i0-1 : i0; const int j0_end = (cj_rel == 0) ? ((i0 < o) ? i0+1 : i0) : ((i0 < o) ? i0 : i0-1); const int j1_begin = i1; const int j1_end = (cj_rel == 1) ? i1+1 : i1; const int j2_begin = i2; const int j2_end = (cj_rel == 2) ? i2+1 : i2; for (int j0=j0_begin; j0<=j0_end; ++j0) { const int d0 = 1 + j0 - i0; for (int j1=j1_begin; j1<=j1_end; ++j1) { const int d1 = j1 - i1; for (int j2=j2_begin; j2<=j2_end; ++j2) { const int d2 = j2 - i2; int jj_lex[3]; jj_lex[id0] = j0; jj_lex[id1] = j1; jj_lex[id2] = j2; const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj; int jj_off; if (cj_rel == 0) { jj_off = d0; } else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; } else /* if (cj_rel == 2) */ { jj_off = 7 + d0 + 2*d2; } map(jj_off, ii_el) = jj_el; } } } } } } } } } // Explicit template instantiations template void BatchedLOR_RT::Assemble2D<1>(); template void BatchedLOR_RT::Assemble2D<2>(); template void BatchedLOR_RT::Assemble2D<3>(); template void BatchedLOR_RT::Assemble2D<4>(); template void BatchedLOR_RT::Assemble2D<5>(); template void BatchedLOR_RT::Assemble2D<6>(); template void BatchedLOR_RT::Assemble2D<7>(); template void BatchedLOR_RT::Assemble2D<8>(); template void BatchedLOR_RT::Assemble3D<1>(); template void BatchedLOR_RT::Assemble3D<2>(); template void BatchedLOR_RT::Assemble3D<3>(); template void BatchedLOR_RT::Assemble3D<4>(); template void BatchedLOR_RT::Assemble3D<5>(); template void BatchedLOR_RT::Assemble3D<6>(); template void BatchedLOR_RT::Assemble3D<7>(); template void BatchedLOR_RT::Assemble3D<8>(); BatchedLOR_RT::BatchedLOR_RT(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array &sparse_mapping_) : BatchedLORKernel(fes_ho_, X_vert_, sparse_ij_, sparse_mapping_) { ProjectLORCoefficient(a, c1); ProjectLORCoefficient(a, c2); } } // namespace mfem