// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced // at the Lawrence Livermore National Laboratory. All Rights reserved. See files // LICENSE and NOTICE for details. LLNL-CODE-806117. // // This file is part of the MFEM library. For more information and source code // availability visit https://mfem.org. // // MFEM is free software; you can redistribute it and/or modify it under the // terms of the BSD-3 license. We welcome feedback and contributions, see file // CONTRIBUTING.md for details. #include "lor_nd.hpp" #include "lor_util.hpp" #include "../../linalg/dtensor.hpp" #include "../../general/forall.hpp" namespace mfem { template void BatchedLOR_ND::Assemble2D() { const int nel_ho = fes_ho.GetNE(); static constexpr int nv = 4; static constexpr int ne = 4; static constexpr int dim = 2; static constexpr int ddm2 = (dim*(dim+1))/2; static constexpr int ngeom = ddm2 + 1; static constexpr int o = ORDER; static constexpr int op1 = ORDER + 1; static constexpr int ndof_per_el = dim*o*op1; static constexpr int nnz_per_row = 7; static constexpr int sz_local_mat = ne*ne; const bool const_mq = c1.Size() == 1; const auto MQ = const_mq ? Reshape(c1.Read(), 1, 1, 1) : Reshape(c1.Read(), op1, op1, nel_ho); const bool const_dq = c2.Size() == 1; const auto DQ = const_dq ? Reshape(c2.Read(), 1, 1, 1) : Reshape(c2.Read(), op1, op1, nel_ho); sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho); auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1, dim, nel_ho); auto X = X_vert.Read(); mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho) { // Assemble a sparse matrix over the macro-element by looping over each // subelement. // V(j,ix,iy) stores the jth nonzero in the row of the sparse matrix // corresponding to local DOF (ix, iy). MFEM_FOREACH_THREAD(iy,y,o) { MFEM_FOREACH_THREAD(ix,x,op1) { for (int c=0; c<2; ++c) { for (int j=0; j Q(Q_, ngeom, 2, 2); DeviceTensor<2> local_mat(local_mat_, ne, ne); // local_mat is the local (dense) stiffness matrix for (int i=0; i(X, iel_ho, kx, ky, vx, vy); for (int iqx=0; iqx<2; ++iqx) { for (int iqy=0; iqy<2; ++iqy) { const double x = iqx; const double y = iqy; const double w = 1.0/4.0; double J_[2*2]; DeviceTensor<2> J(J_, 2, 2); Jacobian2D(x, y, vx, vy, J); const double detJ = Det2D(J); const double w_detJ = w/detJ; Q(0,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1)); // 1,1 Q(1,iqy,iqx) = -w_detJ * (J(0,1)*J(0,0) + J(1,1)*J(1,0)); // 1,2 Q(2,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0)); // 2,2 Q(3,iqy,iqx) = w_detJ; } } for (int iqx=0; iqx<2; ++iqx) { for (int iqy=0; iqy<2; ++iqy) { const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho); const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho); // Loop over x,y components. c=0 => x, c=1 => y for (int cj=0; cj ii_loc) { continue; } double val = 0.0; val += bxi*bxj*Q(0,iqy,iqx); val += byi*bxj*Q(1,iqy,iqx); val += bxi*byj*Q(1,iqy,iqx); val += byi*byj*Q(2,iqy,iqx); val *= mq; val += dq*curl_i*curl_j*Q(3,iqy,iqx); local_mat(ii_loc, jj_loc) += val; } } } } } } // Assemble the local matrix into the macro-element sparse matrix // in a format similar to coordinate format. The (I,J) arrays // are implicit (not stored explicitly). for (int ii_loc=0; ii_loc 0) ? i2-1 : i2); const int j1_end = (ci == cj) ? i1 : ((i2 < o) ? i2 : i2-1); const int j2_begin = (ci == cj) ? ((i2 > 0) ? i2-1 : i2) : i1; const int j2_end = (ci == cj) ? ((i2 < o) ? i2+1 : i2) : i1+1; for (int j1=j1_begin; j1<=j1_end; ++j1) { for (int j2=j2_begin; j2<=j2_end; ++j2) { const int jj_el = (cj == 0) ? j1 + j2*o : j2 + j1*op1 + o*op1; int jj_off = (ci == cj) ? (j2-i2+1) : 3 + (j2-i1) + 2*(j1-i2+1); map(jj_off, ii_el) = jj_el; } } } } } } } template void BatchedLOR_ND::Assemble3D() { const int nel_ho = fes_ho.GetNE(); static constexpr int nv = 8; // number of vertices in hexahedron static constexpr int ne = 12; // number of edges in hexahedron static constexpr int dim = 3; static constexpr int ddm2 = (dim*(dim+1))/2; static constexpr int ngeom = 2*ddm2; // number of geometric factors stored static constexpr int o = ORDER; static constexpr int op1 = ORDER + 1; static constexpr int ndof_per_el = dim*o*op1*op1; static constexpr int nnz_per_row = 33; static constexpr int sz_local_mat = ne*ne; const bool const_mq = c1.Size() == 1; const auto MQ = const_mq ? Reshape(c1.Read(), 1, 1, 1, 1) : Reshape(c1.Read(), op1, op1, op1, nel_ho); const bool const_dq = c2.Size() == 1; const auto DQ = const_dq ? Reshape(c2.Read(), 1, 1, 1, 1) : Reshape(c2.Read(), op1, op1, op1, nel_ho); sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho); auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1*op1, dim, nel_ho); auto X = X_vert.Read(); // Last thread dimension is lowered to avoid "too many resources" error mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER, [=] MFEM_HOST_DEVICE (int iel_ho) { MFEM_FOREACH_THREAD(iz,z,o) { MFEM_FOREACH_THREAD(iy,y,op1) { MFEM_FOREACH_THREAD(ix,x,op1) { for (int c=0; c Q(Q_, ngeom, 2, 2, 2); double local_mat_[sz_local_mat]; DeviceTensor<2> local_mat(local_mat_, ne, ne); for (int i=0; i(X, iel_ho, kx, ky, kz, vx, vy, vz); for (int iqz=0; iqz<2; ++iqz) { for (int iqy=0; iqy<2; ++iqy) { for (int iqx=0; iqx<2; ++iqx) { const double x = iqx; const double y = iqy; const double z = iqz; const double w = 1.0/8.0; double J_[3*3]; DeviceTensor<2> J(J_, 3, 3); Jacobian3D(x, y, z, vx, vy, vz, J); const double detJ = Det3D(J); const double w_detJ = w/detJ; // adj(J) double A_[3*3]; DeviceTensor<2> A(A_, 3, 3); Adjugate3D(J, A); Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2)); // 1,1 Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2)); // 2,1 Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2)); // 3,1 Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2)); // 2,2 Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2)); // 3,2 Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2)); // 3,3 // w J^T J / det(J) Q(6,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0)); // 1,1 Q(7,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,1)+J(1,0)*J(1,1)+J(2,0)*J(2,1)); // 2,1 Q(8,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,2)+J(1,0)*J(1,2)+J(2,0)*J(2,2)); // 3,1 Q(9,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1)); // 2,2 Q(10,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,2)+J(1,1)*J(1,2)+J(2,1)*J(2,2)); // 3,2 Q(11,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2)); // 3,3 } } } for (int iqz=0; iqz<2; ++iqz) { for (int iqy=0; iqy<2; ++iqy) { for (int iqx=0; iqx<2; ++iqx) { const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho); const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho); // Loop over x,y,z components. 0 => x, 1 => y, 2 => z for (int cj=0; cj ii_loc) { continue; } double curl_curl = 0.0; curl_curl += Q(6,iqz,iqy,iqx)*curl_i[0]*curl_j[0]; curl_curl += Q(7,iqz,iqy,iqx)*(curl_i[0]*curl_j[1] + curl_i[1]*curl_j[0]); curl_curl += Q(8,iqz,iqy,iqx)*(curl_i[0]*curl_j[2] + curl_i[2]*curl_j[0]); curl_curl += Q(9,iqz,iqy,iqx)*curl_i[1]*curl_j[1]; curl_curl += Q(10,iqz,iqy,iqx)*(curl_i[1]*curl_j[2] + curl_i[2]*curl_j[1]); curl_curl += Q(11,iqz,iqy,iqx)*curl_i[2]*curl_j[2]; double basis_basis = 0.0; basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0]; basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]); basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]); basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1]; basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]); basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2]; const double val = dq*curl_curl + mq*basis_basis; local_mat(ii_loc, jj_loc) += val; } } } } } } } // Assemble the local matrix into the macro-element sparse matrix // The nonzeros of the macro-element sparse matrix are ordered as // follows: // // The axes are ordered relative to the direction of the basis // vector, e.g. for x-vectors, the axes are (x,y,z), for // y-vectors the axes are (y,z,x), and for z-vectors the axes are // (z,x,y). // // The nonzeros are then given in "rotated lexicographic" // ordering, according to these axes. for (int ii_loc=0; ii_loc 0) ? i1-1 : i1; const int j1_end = (cj_rel == 1) ? ((i1 < o) ? i1 : i1-1) : ((i1 < o) ? i1+1 : i1); const int j2_begin = (i2 > 0) ? i2-1 : i2; const int j2_end = (cj_rel == 2) ? ((i2 < o) ? i2 : i2-1) : ((i2 < o) ? i2+1 : i2); for (int j0=j0_begin; j0<=j0_end; ++j0) { const int d0 = j0 - i0; for (int j1=j1_begin; j1<=j1_end; ++j1) { const int d1 = j1 - i1 + 1; for (int j2=j2_begin; j2<=j2_end; ++j2) { const int d2 = j2 - i2 + 1; int jj_lex[3]; jj_lex[id0] = j0; jj_lex[id1] = j1; jj_lex[id2] = j2; const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj; int jj_off; if (cj_rel == 0) { jj_off = d1 + 3*d2; } else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; } else /* if (cj_rel == 2) */ { jj_off = 21 + d0 + 2*d1 + 6*d2; } map(jj_off, ii_el) = jj_el; } } } } } } } } } // Explicit template instantiations template void BatchedLOR_ND::Assemble2D<1>(); template void BatchedLOR_ND::Assemble2D<2>(); template void BatchedLOR_ND::Assemble2D<3>(); template void BatchedLOR_ND::Assemble2D<4>(); template void BatchedLOR_ND::Assemble2D<5>(); template void BatchedLOR_ND::Assemble2D<6>(); template void BatchedLOR_ND::Assemble2D<7>(); template void BatchedLOR_ND::Assemble2D<8>(); template void BatchedLOR_ND::Assemble3D<1>(); template void BatchedLOR_ND::Assemble3D<2>(); template void BatchedLOR_ND::Assemble3D<3>(); template void BatchedLOR_ND::Assemble3D<4>(); template void BatchedLOR_ND::Assemble3D<5>(); template void BatchedLOR_ND::Assemble3D<6>(); template void BatchedLOR_ND::Assemble3D<7>(); template void BatchedLOR_ND::Assemble3D<8>(); BatchedLOR_ND::BatchedLOR_ND(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array &sparse_mapping_) : BatchedLORKernel(fes_ho_, X_vert_, sparse_ij_, sparse_mapping_) { ProjectLORCoefficient(a, c1); ProjectLORCoefficient(a, c2); } } // namespace mfem