/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #ifndef KOKKOS_EXPLICITFUNCTORS_HPP #define KOKKOS_EXPLICITFUNCTORS_HPP #include #include #include namespace Explicit { template struct Fields { static const int NumStates = 2 ; static const int SpatialDim = 3 ; static const int ElemNodeCount = 8 ; // Indices for full 3x3 tensor: static const int K_F_XX = 0 ; static const int K_F_YY = 1 ; static const int K_F_ZZ = 2 ; static const int K_F_XY = 3 ; static const int K_F_YZ = 4 ; static const int K_F_ZX = 5 ; static const int K_F_YX = 6 ; static const int K_F_ZY = 7 ; static const int K_F_XZ = 8 ; // Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector static const int K_S_XX = 0 ; static const int K_S_YY = 1 ; static const int K_S_ZZ = 2 ; static const int K_S_XY = 3 ; static const int K_S_YZ = 4 ; static const int K_S_ZX = 5 ; static const int K_S_YX = 3 ; static const int K_S_ZY = 4 ; static const int K_S_XZ = 5 ; // Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector static const int K_V_XY = 0 ; static const int K_V_YZ = 1 ; static const int K_V_ZX = 2 ; typedef Device execution_space ; typedef typename execution_space::size_type size_type ; typedef HybridFEM::FEMesh FEMesh ; typedef typename FEMesh::node_coords_type node_coords_type ; typedef typename FEMesh::elem_node_ids_type elem_node_ids_type ; typedef typename FEMesh::node_elem_ids_type node_elem_ids_type ; typedef typename Kokkos::ParallelDataMap parallel_data_map ; typedef Kokkos::View< double[][ SpatialDim ][ NumStates ] , execution_space > geom_state_array_type ; typedef Kokkos::View< Scalar[][ SpatialDim ] , execution_space > geom_array_type ; typedef Kokkos::View< Scalar[] , execution_space > array_type ; typedef Kokkos::View< Scalar , execution_space > scalar_type ; typedef Kokkos::View< Scalar[][ 6 ] , execution_space > elem_sym_tensor_type ; typedef Kokkos::View< Scalar[][ 9 ] , execution_space > elem_tensor_type ; typedef Kokkos::View< Scalar[][ 9 ][ NumStates ] , execution_space > elem_tensor_state_type ; typedef Kokkos::View< Scalar[][ SpatialDim ][ ElemNodeCount ] , execution_space > elem_node_geom_type ; // Parameters: const int num_nodes ; const int num_nodes_owned ; const int num_elements ; const Scalar lin_bulk_visc; const Scalar quad_bulk_visc; const Scalar two_mu; const Scalar bulk_modulus; const Scalar density; // Mesh: const elem_node_ids_type elem_node_connectivity ; const node_elem_ids_type node_elem_connectivity ; const node_coords_type model_coords ; // Compute: const scalar_type dt ; const scalar_type prev_dt ; const geom_state_array_type displacement ; const geom_state_array_type velocity ; const geom_array_type acceleration ; const geom_array_type internal_force ; const array_type nodal_mass ; const array_type elem_mass ; const array_type internal_energy ; const elem_sym_tensor_type stress_new ; const elem_tensor_state_type rotation ; const elem_node_geom_type element_force ; const elem_tensor_type vel_grad ; const elem_sym_tensor_type stretch ; const elem_sym_tensor_type rot_stretch ; Fields( const FEMesh & mesh, Scalar arg_lin_bulk_visc, Scalar arg_quad_bulk_visc, Scalar youngs_modulus, Scalar poissons_ratio, Scalar arg_density ) : num_nodes( mesh.parallel_data_map.count_owned + mesh.parallel_data_map.count_receive ) , num_nodes_owned( mesh.parallel_data_map.count_owned ) , num_elements( mesh.elem_node_ids.dimension_0() ) , lin_bulk_visc( arg_lin_bulk_visc ) , quad_bulk_visc( arg_quad_bulk_visc ) , two_mu(youngs_modulus/(1.0+poissons_ratio)) , bulk_modulus(youngs_modulus/(3*(1.0-2.0*poissons_ratio))) , density(arg_density) // mesh , elem_node_connectivity( mesh.elem_node_ids ) // ( num_elements , ElemNodeCount ) , node_elem_connectivity( mesh.node_elem_ids ) // ( num_nodes , ... ) , model_coords( mesh.node_coords ) // ( num_nodes , 3 ) // compute with input/output , dt( "dt" ) , prev_dt( "prev_dt" ) , displacement( "displacement" , num_nodes ) , velocity( "velocity" , num_nodes ) , acceleration( "acceleration" , num_nodes_owned ) , internal_force( "internal_force" , num_nodes_owned ) , nodal_mass( "nodal_mass" , num_nodes_owned ) , elem_mass( "elem_mass" , num_elements ) , internal_energy( "internal_energy" , num_elements ) , stress_new( "stress_new" , num_elements ) // temporary arrays , rotation( "rotation" , num_elements ) , element_force( "element_force" , num_elements ) , vel_grad( "vel_grad" , num_elements ) , stretch( "stretch" , num_elements ) , rot_stretch( "rot_stretch" , num_elements ) { } }; //---------------------------------------------------------------------------- template< typename Scalar , class DeviceType > KOKKOS_INLINE_FUNCTION Scalar dot8( const Scalar * a , const Scalar * b ) { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] + a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; } template< typename Scalar , class DeviceType > KOKKOS_INLINE_FUNCTION void comp_grad( const Scalar * const x , const Scalar * const y , const Scalar * const z, Scalar * const grad_x , Scalar * const grad_y , Scalar * const grad_z ) { // calc X difference vectors Scalar R42=(x[3] - x[1]); Scalar R52=(x[4] - x[1]); Scalar R54=(x[4] - x[3]); Scalar R63=(x[5] - x[2]); Scalar R83=(x[7] - x[2]); Scalar R86=(x[7] - x[5]); Scalar R31=(x[2] - x[0]); Scalar R61=(x[5] - x[0]); Scalar R74=(x[6] - x[3]); Scalar R72=(x[6] - x[1]); Scalar R75=(x[6] - x[4]); Scalar R81=(x[7] - x[0]); Scalar t1=(R63 + R54); Scalar t2=(R61 + R74); Scalar t3=(R72 + R81); Scalar t4 =(R86 + R42); Scalar t5 =(R83 + R52); Scalar t6 =(R75 + R31); // Calculate Y gradient from X and Z data grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54); grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61); grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72); grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83); grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61); grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72); grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83); grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54); // calc Z difference vectors R42=(z[3] - z[1]); R52=(z[4] - z[1]); R54=(z[4] - z[3]); R63=(z[5] - z[2]); R83=(z[7] - z[2]); R86=(z[7] - z[5]); R31=(z[2] - z[0]); R61=(z[5] - z[0]); R74=(z[6] - z[3]); R72=(z[6] - z[1]); R75=(z[6] - z[4]); R81=(z[7] - z[0]); t1=(R63 + R54); t2=(R61 + R74); t3=(R72 + R81); t4 =(R86 + R42); t5 =(R83 + R52); t6 =(R75 + R31); // Calculate X gradient from Y and Z data grad_x[0] = (y[1] * t1) - (y[2] * R42) - (y[3] * t5) + (y[4] * t4) + (y[5] * R52) - (y[7] * R54); grad_x[1] = (y[2] * t2) + (y[3] * R31) - (y[0] * t1) - (y[5] * t6) + (y[6] * R63) - (y[4] * R61); grad_x[2] = (y[3] * t3) + (y[0] * R42) - (y[1] * t2) - (y[6] * t4) + (y[7] * R74) - (y[5] * R72); grad_x[3] = (y[0] * t5) - (y[1] * R31) - (y[2] * t3) + (y[7] * t6) + (y[4] * R81) - (y[6] * R83); grad_x[4] = (y[5] * t3) + (y[6] * R86) - (y[7] * t2) - (y[0] * t4) - (y[3] * R81) + (y[1] * R61); grad_x[5] = (y[6] * t5) - (y[4] * t3) - (y[7] * R75) + (y[1] * t6) - (y[0] * R52) + (y[2] * R72); grad_x[6] = (y[7] * t1) - (y[5] * t5) - (y[4] * R86) + (y[2] * t4) - (y[1] * R63) + (y[3] * R83); grad_x[7] = (y[4] * t2) - (y[6] * t1) + (y[5] * R75) - (y[3] * t6) - (y[2] * R74) + (y[0] * R54); // calc Y difference vectors R42=(y[3] - y[1]); R52=(y[4] - y[1]); R54=(y[4] - y[3]); R63=(y[5] - y[2]); R83=(y[7] - y[2]); R86=(y[7] - y[5]); R31=(y[2] - y[0]); R61=(y[5] - y[0]); R74=(y[6] - y[3]); R72=(y[6] - y[1]); R75=(y[6] - y[4]); R81=(y[7] - y[0]); t1=(R63 + R54); t2=(R61 + R74); t3=(R72 + R81); t4 =(R86 + R42); t5 =(R83 + R52); t6 =(R75 + R31); // Calculate Z gradient from X and Y data grad_z[0] = (x[1] * t1) - (x[2] * R42) - (x[3] * t5) + (x[4] * t4) + (x[5] * R52) - (x[7] * R54); grad_z[1] = (x[2] * t2) + (x[3] * R31) - (x[0] * t1) - (x[5] * t6) + (x[6] * R63) - (x[4] * R61); grad_z[2] = (x[3] * t3) + (x[0] * R42) - (x[1] * t2) - (x[6] * t4) + (x[7] * R74) - (x[5] * R72); grad_z[3] = (x[0] * t5) - (x[1] * R31) - (x[2] * t3) + (x[7] * t6) + (x[4] * R81) - (x[6] * R83); grad_z[4] = (x[5] * t3) + (x[6] * R86) - (x[7] * t2) - (x[0] * t4) - (x[3] * R81) + (x[1] * R61); grad_z[5] = (x[6] * t5) - (x[4] * t3) - (x[7] * R75) + (x[1] * t6) - (x[0] * R52) + (x[2] * R72); grad_z[6] = (x[7] * t1) - (x[5] * t5) - (x[4] * R86) + (x[2] * t4) - (x[1] * R63) + (x[3] * R83); grad_z[7] = (x[4] * t2) - (x[6] * t1) + (x[5] * R75) - (x[3] * t6) - (x[2] * R74) + (x[0] * R54); } //---------------------------------------------------------------------------- template< typename Scalar , class DeviceType > struct initialize_element { typedef DeviceType execution_space ; typedef Explicit::Fields< Scalar , execution_space > Fields ; typename Fields::elem_node_ids_type elem_node_connectivity ; typename Fields::node_coords_type model_coords ; typename Fields::elem_sym_tensor_type stretch ; typename Fields::elem_tensor_state_type rotation ; typename Fields::array_type elem_mass ; const Scalar density ; initialize_element( const Fields & mesh_fields ) : elem_node_connectivity( mesh_fields.elem_node_connectivity ) , model_coords( mesh_fields.model_coords ) , stretch( mesh_fields.stretch ) , rotation( mesh_fields.rotation ) , elem_mass( mesh_fields.elem_mass ) , density( mesh_fields.density ) {} KOKKOS_INLINE_FUNCTION void operator()( int ielem )const { const int K_XX = 0 ; const int K_YY = 1 ; const int K_ZZ = 2 ; const Scalar ONE12TH = 1.0 / 12.0 ; Scalar x[ Fields::ElemNodeCount ]; Scalar y[ Fields::ElemNodeCount ]; Scalar z[ Fields::ElemNodeCount ]; Scalar grad_x[ Fields::ElemNodeCount ]; Scalar grad_y[ Fields::ElemNodeCount ]; Scalar grad_z[ Fields::ElemNodeCount ]; for ( int i = 0 ; i < Fields::ElemNodeCount ; ++i ) { const int n = elem_node_connectivity( ielem , i ); x[i] = model_coords( n , 0 ); y[i] = model_coords( n , 1 ); z[i] = model_coords( n , 2 ); } comp_grad( x, y, z, grad_x, grad_y, grad_z); stretch(ielem,K_XX) = 1 ; stretch(ielem,K_YY) = 1 ; stretch(ielem,K_ZZ) = 1 ; rotation(ielem,K_XX,0) = 1 ; rotation(ielem,K_YY,0) = 1 ; rotation(ielem,K_ZZ,0) = 1 ; rotation(ielem,K_XX,1) = 1 ; rotation(ielem,K_YY,1) = 1 ; rotation(ielem,K_ZZ,1) = 1 ; elem_mass(ielem) = ONE12TH * density * dot8( x , grad_x ); } static void apply( const Fields & mesh_fields ) { initialize_element op( mesh_fields ); Kokkos::parallel_for( mesh_fields.num_elements , op ); } }; template struct initialize_node { typedef DeviceType execution_space ; typedef Explicit::Fields< Scalar , execution_space > Fields ; typename Fields::node_elem_ids_type node_elem_connectivity ; typename Fields::array_type nodal_mass ; typename Fields::array_type elem_mass ; static const int ElemNodeCount = Fields::ElemNodeCount ; initialize_node( const Fields & mesh_fields ) : node_elem_connectivity( mesh_fields.node_elem_connectivity ) , nodal_mass( mesh_fields.nodal_mass ) , elem_mass( mesh_fields.elem_mass ) {} KOKKOS_INLINE_FUNCTION void operator()( int inode )const { const int begin = node_elem_connectivity.row_map[inode]; const int end = node_elem_connectivity.row_map[inode+1]; Scalar node_mass = 0; for(int i = begin; i != end; ++i) { const int elem_id = node_elem_connectivity.entries( i , 0 ); node_mass += elem_mass(elem_id); } nodal_mass(inode) = node_mass / ElemNodeCount ; } static void apply( const Fields & mesh_fields ) { initialize_node op( mesh_fields ); Kokkos::parallel_for( mesh_fields.num_nodes_owned , op ); } }; //---------------------------------------------------------------------------- template struct grad { typedef DeviceType execution_space ; typedef Explicit::Fields< Scalar , execution_space > Fields ; static const int ElemNodeCount = Fields::ElemNodeCount ; static const int K_F_XX = Fields::K_F_XX ; static const int K_F_YY = Fields::K_F_YY ; static const int K_F_ZZ = Fields::K_F_ZZ ; static const int K_F_XY = Fields::K_F_XY ; static const int K_F_YZ = Fields::K_F_YZ ; static const int K_F_ZX = Fields::K_F_ZX ; static const int K_F_YX = Fields::K_F_YX ; static const int K_F_ZY = Fields::K_F_ZY ; static const int K_F_XZ = Fields::K_F_XZ ; // Global arrays used by this functor. const typename Fields::elem_node_ids_type elem_node_connectivity ; const typename Fields::node_coords_type model_coords ; const typename Fields::geom_state_array_type displacement ; const typename Fields::geom_state_array_type velocity ; const typename Fields::elem_tensor_type vel_grad ; const typename Fields::scalar_type dt ; const int current_state; const int previous_state; // Constructor on the Host to populate this device functor. // All array view copies are shallow. grad( const Fields & fields, const int arg_current_state, const int arg_previous_state) : elem_node_connectivity( fields.elem_node_connectivity) , model_coords( fields.model_coords) , displacement( fields.displacement) , velocity( fields.velocity) , vel_grad( fields.vel_grad) , dt( fields.dt) , current_state(arg_current_state) , previous_state(arg_previous_state) { } //-------------------------------------------------------------------------- // Calculate Velocity Gradients KOKKOS_INLINE_FUNCTION void v_grad( int ielem, Scalar * vx, Scalar * vy, Scalar * vz, Scalar * grad_x, Scalar * grad_y, Scalar * grad_z, Scalar inv_vol) const { const int K_F_XX = Fields::K_F_XX ; const int K_F_YY = Fields::K_F_YY ; const int K_F_ZZ = Fields::K_F_ZZ ; const int K_F_XY = Fields::K_F_XY ; const int K_F_YZ = Fields::K_F_YZ ; const int K_F_ZX = Fields::K_F_ZX ; const int K_F_YX = Fields::K_F_YX ; const int K_F_ZY = Fields::K_F_ZY ; const int K_F_XZ = Fields::K_F_XZ ; vel_grad(ielem, K_F_XX) = inv_vol * dot8( vx , grad_x ); vel_grad(ielem, K_F_YX) = inv_vol * dot8( vy , grad_x ); vel_grad(ielem, K_F_ZX) = inv_vol * dot8( vz , grad_x ); vel_grad(ielem, K_F_XY) = inv_vol * dot8( vx , grad_y ); vel_grad(ielem, K_F_YY) = inv_vol * dot8( vy , grad_y ); vel_grad(ielem, K_F_ZY) = inv_vol * dot8( vz , grad_y ); vel_grad(ielem, K_F_XZ) = inv_vol * dot8( vx , grad_z ); vel_grad(ielem, K_F_YZ) = inv_vol * dot8( vy , grad_z ); vel_grad(ielem, K_F_ZZ) = inv_vol * dot8( vz , grad_z ); } //-------------------------------------------------------------------------- // Functor operator() which calls the three member functions. KOKKOS_INLINE_FUNCTION void operator()( int ielem )const { const int X = 0 ; const int Y = 1 ; const int Z = 2 ; const Scalar dt_scale = -0.5 * dt(); // declare and reuse local data for frequently accessed data to // reduce global memory reads and writes. Scalar x[8], y[8], z[8]; Scalar vx[8], vy[8], vz[8]; Scalar grad_x[8], grad_y[8], grad_z[8]; // Read global velocity once and use many times // via local registers / L1 cache. // store the velocity information in local memory before using, // so it can be returned for other functions to use // Read global coordinates and velocity once and use many times // via local registers / L1 cache. // load X coordinate information and move by half time step for ( int i = 0 ; i < ElemNodeCount ; ++i ) { const int n = elem_node_connectivity( ielem , i ); vx[i] = velocity( n , X , current_state ); vy[i] = velocity( n , Y , current_state ); vz[i] = velocity( n , Z , current_state ); x[i] = model_coords( n , X ) + displacement( n , X , current_state ) + dt_scale * vx[i]; y[i] = model_coords( n , Y ) + displacement( n , Y , current_state ) + dt_scale * vy[i]; z[i] = model_coords( n , Z ) + displacement( n , Z , current_state ) + dt_scale * vz[i]; } comp_grad( x, y, z, grad_x, grad_y, grad_z); // Calculate hexahedral volume from x model_coords and gradient information const Scalar inv_vol = 1.0 / dot8( x , grad_x ); v_grad(ielem, vx, vy, vz, grad_x, grad_y, grad_z, inv_vol); } static void apply( const Fields & fields , const int arg_current_state , const int arg_previous_state ) { grad op( fields, arg_current_state , arg_previous_state ); Kokkos::parallel_for( fields.num_elements , op ); } }; //---------------------------------------------------------------------------- template struct decomp_rotate { typedef DeviceType execution_space ; typedef Explicit::Fields< Scalar , execution_space > Fields ; static const int ElemNodeCount = Fields::ElemNodeCount ; static const int K_F_XX = Fields::K_F_XX ; static const int K_F_YY = Fields::K_F_YY ; static const int K_F_ZZ = Fields::K_F_ZZ ; static const int K_F_XY = Fields::K_F_XY ; static const int K_F_YZ = Fields::K_F_YZ ; static const int K_F_ZX = Fields::K_F_ZX ; static const int K_F_YX = Fields::K_F_YX ; static const int K_F_ZY = Fields::K_F_ZY ; static const int K_F_XZ = Fields::K_F_XZ ; static const int K_S_XX = Fields::K_S_XX ; static const int K_S_YY = Fields::K_S_YY ; static const int K_S_ZZ = Fields::K_S_ZZ ; static const int K_S_XY = Fields::K_S_XY ; static const int K_S_YZ = Fields::K_S_YZ ; static const int K_S_ZX = Fields::K_S_ZX ; static const int K_S_YX = Fields::K_S_YX ; static const int K_S_ZY = Fields::K_S_ZY ; static const int K_S_XZ = Fields::K_S_XZ ; static const int K_V_XY = Fields::K_V_XY ; static const int K_V_YZ = Fields::K_V_YZ ; static const int K_V_ZX = Fields::K_V_ZX ; // Global arrays used by this functor. const typename Fields::elem_tensor_state_type rotation ; const typename Fields::elem_tensor_type vel_grad ; const typename Fields::elem_sym_tensor_type stretch ; const typename Fields::elem_sym_tensor_type rot_stretch ; const typename Fields::scalar_type dt_value ; const int current_state; const int previous_state; decomp_rotate( const Fields & mesh_fields , const int arg_current_state, const int arg_previous_state) : rotation( mesh_fields.rotation ) , vel_grad( mesh_fields.vel_grad ) , stretch( mesh_fields.stretch ) , rot_stretch( mesh_fields.rot_stretch ) , dt_value( mesh_fields.dt) , current_state( arg_current_state) , previous_state(arg_previous_state) {} static void apply( const Fields & mesh_fields , const int arg_current_state , const int arg_previous_state ) { decomp_rotate op( mesh_fields , arg_current_state , arg_previous_state ); Kokkos::parallel_for( mesh_fields.num_elements , op ); } KOKKOS_INLINE_FUNCTION void additive_decomp(int ielem, Scalar * v_gr, Scalar * str_ten) const { // In addition to calculating stretching_tensor, // use this as an opportunity to load global // variables into a local space for ( int i = 0 ; i < 9 ; ++i ) { v_gr[i] = vel_grad( ielem , i ); } // // Symmetric part // str_ten[K_S_XX] = v_gr[K_F_XX]; str_ten[K_S_YY] = v_gr[K_F_YY]; str_ten[K_S_ZZ] = v_gr[K_F_ZZ]; str_ten[K_S_XY] = 0.5*(v_gr[K_F_XY] + v_gr[K_F_YX]); str_ten[K_S_YZ] = 0.5*(v_gr[K_F_YZ] + v_gr[K_F_ZY]); str_ten[K_S_ZX] = 0.5*(v_gr[K_F_ZX] + v_gr[K_F_XZ]); } KOKKOS_INLINE_FUNCTION void polar_decomp(int ielem, Scalar * v_gr, Scalar * str_ten, Scalar * str, Scalar * vort, Scalar * rot_old, Scalar * rot_new)const { const Scalar dt = dt_value(); const Scalar dt_half = 0.5 * dt; // Skew Symmetric part vort[K_V_XY] = 0.5*(v_gr[K_F_XY] - v_gr[K_F_YX]); vort[K_V_YZ] = 0.5*(v_gr[K_F_YZ] - v_gr[K_F_ZY]); vort[K_V_ZX] = 0.5*(v_gr[K_F_ZX] - v_gr[K_F_XZ]); // calculate the rates of rotation via gauss elimination. for ( int i = 0 ; i < 6 ; ++i ) { str[i] = stretch(ielem, i); } Scalar z1 = str_ten[K_S_XY] * str[K_S_ZX] - str_ten[K_S_ZX] * str[K_S_XY] + str_ten[K_S_YY] * str[K_S_YZ] - str_ten[K_S_YZ] * str[K_S_YY] + str_ten[K_S_YZ] * str[K_S_ZZ] - str_ten[K_S_ZZ] * str[K_S_YZ]; Scalar z2 = str_ten[K_S_ZX] * str[K_S_XX] - str_ten[K_S_XX] * str[K_S_ZX] + str_ten[K_S_YZ] * str[K_S_XY] - str_ten[K_S_XY] * str[K_S_YZ] + str_ten[K_S_ZZ] * str[K_S_ZX] - str_ten[K_S_ZX] * str[K_S_ZZ]; Scalar z3 = str_ten[K_S_XX] * str[K_S_XY] - str_ten[K_S_XY] * str[K_S_XX] + str_ten[K_S_XY] * str[K_S_YY] - str_ten[K_S_YY] * str[K_S_XY] + str_ten[K_S_ZX] * str[K_S_YZ] - str_ten[K_S_YZ] * str[K_S_ZX]; // forward elimination const Scalar a1inv = 1.0 / (str[K_S_YY] + str[K_S_ZZ]); const Scalar a4BYa1 = -1 * str[K_S_XY] * a1inv; const Scalar a2inv = 1.0 / (str[K_S_ZZ] + str[K_S_XX] + str[K_S_XY] * a4BYa1); const Scalar a5 = -str[K_S_YZ] + str[K_S_ZX] * a4BYa1; z2 -= z1 * a4BYa1; Scalar a6BYa1 = -1 * str[K_S_ZX] * a1inv; const Scalar a5BYa2 = a5 * a2inv; z3 -= z1 * a6BYa1 - z2 * a5BYa2; // backward substitution - z3 /= (str[K_S_XX] + str[K_S_YY] + str[K_S_ZX] * a6BYa1 + a5 * a5BYa2); z2 = (z2 - a5 * z3) * a2inv; z1 = (z1*a1inv - a6BYa1 * z3 -a4BYa1 * z2); // calculate rotation rates - recall that spin_rate is an asymmetric tensor, // so compute spin rate vector as dual of spin rate tensor, // i.e w_i = e_ijk * spin_rate_jk z1 += vort[K_V_YZ]; z2 += vort[K_V_ZX]; z3 += vort[K_V_XY]; // update rotation tensor: // 1) premultiply old rotation tensor to get right-hand side. for ( int i = 0 ; i < 9 ; ++i ) { rot_old[i] = rotation(ielem, i, previous_state); } Scalar r_XX = rot_old[K_F_XX] + dt_half*( z3 * rot_old[K_F_YX] - z2 * rot_old[K_F_ZX] ); Scalar r_YX = rot_old[K_F_YX] + dt_half*( z1 * rot_old[K_F_ZX] - z3 * rot_old[K_F_XX] ); Scalar r_ZX = rot_old[K_F_ZX] + dt_half*( z2 * rot_old[K_F_XX] - z1 * rot_old[K_F_YX] ); Scalar r_XY = rot_old[K_F_XY] + dt_half*( z3 * rot_old[K_F_YY] - z2 * rot_old[K_F_ZY] ); Scalar r_YY = rot_old[K_F_YY] + dt_half*( z1 * rot_old[K_F_ZY] - z3 * rot_old[K_F_XY] ); Scalar r_ZY = rot_old[K_F_ZY] + dt_half*( z2 * rot_old[K_F_XY] - z1 * rot_old[K_F_YY] ); Scalar r_XZ = rot_old[K_F_XZ] + dt_half*( z3 * rot_old[K_F_YZ] - z2 * rot_old[K_F_ZZ] ); Scalar r_YZ = rot_old[K_F_YZ] + dt_half*( z1 * rot_old[K_F_ZZ] - z3 * rot_old[K_F_XZ] ); Scalar r_ZZ = rot_old[K_F_ZZ] + dt_half*( z2 * rot_old[K_F_XZ] - z1 * rot_old[K_F_YZ] ); // 2) solve for new rotation tensor via gauss elimination. // forward elimination - Scalar a12 = - dt_half * z3; Scalar a13 = dt_half * z2; Scalar b32 = - dt_half * z1; Scalar a22inv = 1.0 / (1.0 + a12 * a12); Scalar a13a12 = a13*a12; Scalar a23 = b32 + a13a12; r_YX += r_XX * a12; r_YY += r_XY * a12; r_YZ += r_XZ * a12; b32 = (b32 - a13a12) * a22inv; r_ZX += r_XX * a13 + r_YX * b32; r_ZY += r_XY * a13 + r_YY * b32; r_ZZ += r_XZ * a13 + r_YZ * b32; // backward substitution - const Scalar a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32); rot_new[K_F_ZX] = r_ZX * a33inv; rot_new[K_F_ZY] = r_ZY * a33inv; rot_new[K_F_ZZ] = r_ZZ * a33inv; rot_new[K_F_YX] = ( r_YX - rot_new[K_F_ZX] * a23 ) * a22inv; rot_new[K_F_YY] = ( r_YY - rot_new[K_F_ZY] * a23 ) * a22inv; rot_new[K_F_YZ] = ( r_YZ - rot_new[K_F_ZZ] * a23 ) * a22inv; rot_new[K_F_XX] = r_XX - rot_new[K_F_ZX] * a13 - rot_new[K_F_YX] * a12; rot_new[K_F_XY] = r_XY - rot_new[K_F_ZY] * a13 - rot_new[K_F_YY] * a12; rot_new[K_F_XZ] = r_XZ - rot_new[K_F_ZZ] * a13 - rot_new[K_F_YZ] * a12; for ( int i = 0 ; i < 9 ; ++i ) { rotation(ielem, i, current_state) = rot_new[i] ; } // update stretch tensor in the new configuration - const Scalar a1 = str_ten[K_S_XY] + vort[K_V_XY]; const Scalar a2 = str_ten[K_S_YZ] + vort[K_V_YZ]; const Scalar a3 = str_ten[K_S_ZX] + vort[K_V_ZX]; const Scalar b1 = str_ten[K_S_ZX] - vort[K_V_ZX]; const Scalar b2 = str_ten[K_S_XY] - vort[K_V_XY]; const Scalar b3 = str_ten[K_S_YZ] - vort[K_V_YZ]; const Scalar s_XX = str[K_S_XX]; const Scalar s_YY = str[K_S_YY]; const Scalar s_ZZ = str[K_S_ZZ]; const Scalar s_XY = str[K_S_XY]; const Scalar s_YZ = str[K_S_YZ]; const Scalar s_ZX = str[K_S_ZX]; str[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX); str[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY); str[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ); str[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX); str[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY); str[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ); } KOKKOS_INLINE_FUNCTION void rotate_tensor(int ielem, Scalar * str_ten, Scalar * str, Scalar * rot_new)const { Scalar t[9]; Scalar rot_str[6]; // Rotated stretch t[0] = str_ten[K_S_XX]*rot_new[K_F_XX] + str_ten[K_S_XY]*rot_new[K_F_YX] + str_ten[K_S_XZ]*rot_new[K_F_ZX]; t[1] = str_ten[K_S_YX]*rot_new[K_F_XX] + str_ten[K_S_YY]*rot_new[K_F_YX] + str_ten[K_S_YZ]*rot_new[K_F_ZX]; t[2] = str_ten[K_S_ZX]*rot_new[K_F_XX] + str_ten[K_S_ZY]*rot_new[K_F_YX] + str_ten[K_S_ZZ]*rot_new[K_F_ZX]; t[3] = str_ten[K_S_XX]*rot_new[K_F_XY] + str_ten[K_S_XY]*rot_new[K_F_YY] + str_ten[K_S_XZ]*rot_new[K_F_ZY]; t[4] = str_ten[K_S_YX]*rot_new[K_F_XY] + str_ten[K_S_YY]*rot_new[K_F_YY] + str_ten[K_S_YZ]*rot_new[K_F_ZY]; t[5] = str_ten[K_S_ZX]*rot_new[K_F_XY] + str_ten[K_S_ZY]*rot_new[K_F_YY] + str_ten[K_S_ZZ]*rot_new[K_F_ZY]; t[6] = str_ten[K_S_XX]*rot_new[K_F_XZ] + str_ten[K_S_XY]*rot_new[K_F_YZ] + str_ten[K_S_XZ]*rot_new[K_F_ZZ]; t[7] = str_ten[K_S_YX]*rot_new[K_F_XZ] + str_ten[K_S_YY]*rot_new[K_F_YZ] + str_ten[K_S_YZ]*rot_new[K_F_ZZ]; t[8] = str_ten[K_S_ZX]*rot_new[K_F_XZ] + str_ten[K_S_ZY]*rot_new[K_F_YZ] + str_ten[K_S_ZZ]*rot_new[K_F_ZZ]; rot_str[ K_S_XX ] = rot_new[K_F_XX] * t[0] + rot_new[K_F_YX] * t[1] + rot_new[K_F_ZX] * t[2]; rot_str[ K_S_YY ] = rot_new[K_F_XY] * t[3] + rot_new[K_F_YY] * t[4] + rot_new[K_F_ZY] * t[5]; rot_str[ K_S_ZZ ] = rot_new[K_F_XZ] * t[6] + rot_new[K_F_YZ] * t[7] + rot_new[K_F_ZZ] * t[8]; rot_str[ K_S_XY ] = rot_new[K_F_XX] * t[3] + rot_new[K_F_YX] * t[4] + rot_new[K_F_ZX] * t[5]; rot_str[ K_S_YZ ] = rot_new[K_F_XY] * t[6] + rot_new[K_F_YY] * t[7] + rot_new[K_F_ZY] * t[8]; rot_str[ K_S_ZX ] = rot_new[K_F_XZ] * t[0] + rot_new[K_F_YZ] * t[1] + rot_new[K_F_ZZ] * t[2]; for ( int i = 0 ; i < 6 ; ++i ) { rot_stretch(ielem, i) = rot_str[i] ; } for ( int i = 0 ; i < 6 ; ++i ) { stretch(ielem, i) = str[i] ; } } KOKKOS_INLINE_FUNCTION void operator()( int ielem )const { // Local scratch space to avoid multiple // accesses to global memory. Scalar str_ten[6]; // Stretching tensor Scalar str[6]; // Stretch Scalar rot_old[9]; // Rotation old Scalar rot_new[9]; // Rotation new Scalar vort[3]; // Vorticity Scalar v_gr[9]; // Velocity gradient additive_decomp(ielem, v_gr, str_ten); polar_decomp(ielem, v_gr, str_ten, str, vort, rot_old, rot_new); rotate_tensor(ielem, str_ten, str, rot_new); } }; //---------------------------------------------------------------------------- template struct internal_force { typedef DeviceType execution_space ; typedef Explicit::Fields< Scalar , execution_space > Fields ; static const int ElemNodeCount = Fields::ElemNodeCount ; static const int K_F_XX = Fields::K_F_XX ; static const int K_F_YY = Fields::K_F_YY ; static const int K_F_ZZ = Fields::K_F_ZZ ; static const int K_F_XY = Fields::K_F_XY ; static const int K_F_YZ = Fields::K_F_YZ ; static const int K_F_ZX = Fields::K_F_ZX ; static const int K_F_YX = Fields::K_F_YX ; static const int K_F_ZY = Fields::K_F_ZY ; static const int K_F_XZ = Fields::K_F_XZ ; static const int K_S_XX = Fields::K_S_XX ; static const int K_S_YY = Fields::K_S_YY ; static const int K_S_ZZ = Fields::K_S_ZZ ; static const int K_S_XY = Fields::K_S_XY ; static const int K_S_YZ = Fields::K_S_YZ ; static const int K_S_ZX = Fields::K_S_ZX ; static const int K_S_YX = Fields::K_S_YX ; static const int K_S_ZY = Fields::K_S_ZY ; static const int K_S_XZ = Fields::K_S_XZ ; //-------------------------------------------------------------------------- // Reduction: typedef Scalar value_type; KOKKOS_INLINE_FUNCTION static void init(value_type &update) { update = 1.0e32; } KOKKOS_INLINE_FUNCTION static void join( volatile value_type & update, const volatile value_type & source ) { update = update < source ? update : source; } // Final serial processing of reduction value: KOKKOS_INLINE_FUNCTION void final( value_type & result ) const { prev_dt() = dt() ; dt() = result ; }; //-------------------------------------------------------------------------- // Global arrays used by this functor. const typename Fields::elem_node_ids_type elem_node_connectivity ; const typename Fields::node_coords_type model_coords ; const typename Fields::scalar_type dt ; const typename Fields::scalar_type prev_dt ; const typename Fields::geom_state_array_type displacement ; const typename Fields::geom_state_array_type velocity ; const typename Fields::array_type elem_mass ; const typename Fields::array_type internal_energy ; const typename Fields::elem_sym_tensor_type stress_new ; const typename Fields::elem_node_geom_type element_force ; const typename Fields::elem_tensor_state_type rotation ; const typename Fields::elem_sym_tensor_type rot_stretch ; const Scalar two_mu; const Scalar bulk_modulus; const Scalar lin_bulk_visc; const Scalar quad_bulk_visc; const Scalar user_dt; const int current_state; internal_force( const Fields & mesh_fields, const Scalar arg_user_dt, const int arg_current_state ) : elem_node_connectivity( mesh_fields.elem_node_connectivity ) , model_coords( mesh_fields.model_coords ) , dt( mesh_fields.dt ) , prev_dt( mesh_fields.prev_dt ) , displacement( mesh_fields.displacement ) , velocity( mesh_fields.velocity ) , elem_mass( mesh_fields.elem_mass ) , internal_energy( mesh_fields.internal_energy ) , stress_new( mesh_fields.stress_new ) , element_force( mesh_fields.element_force ) , rotation( mesh_fields.rotation ) , rot_stretch( mesh_fields.rot_stretch ) , two_mu( mesh_fields.two_mu ) , bulk_modulus( mesh_fields.bulk_modulus ) , lin_bulk_visc( mesh_fields.lin_bulk_visc ) , quad_bulk_visc( mesh_fields.quad_bulk_visc ) , user_dt( arg_user_dt ) , current_state( arg_current_state ) {} static void apply( const Fields & mesh_fields , const Scalar arg_user_dt, const int arg_current_state ) { internal_force op_force( mesh_fields , arg_user_dt , arg_current_state ); Kokkos::parallel_reduce( mesh_fields.num_elements, op_force ); } //-------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION void rotate_tensor_backward(int ielem , const Scalar * const s_n , Scalar * const rot_stress )const { const int rot_state = current_state ; // 1 ; // t : temporary variables // s_n : stress_new in local memory space // r_n : rotation_new in local memory space Scalar t[9], r_n[9]; r_n[0] = rotation(ielem, 0, rot_state ); r_n[1] = rotation(ielem, 1, rot_state ); r_n[2] = rotation(ielem, 2, rot_state ); r_n[3] = rotation(ielem, 3, rot_state ); r_n[4] = rotation(ielem, 4, rot_state ); r_n[5] = rotation(ielem, 5, rot_state ); r_n[6] = rotation(ielem, 6, rot_state ); r_n[7] = rotation(ielem, 7, rot_state ); r_n[8] = rotation(ielem, 8, rot_state ); t[0] = s_n[K_S_XX]*r_n[K_F_XX]+ s_n[K_S_XY]*r_n[K_F_XY]+ s_n[K_S_XZ]*r_n[K_F_XZ]; t[1] = s_n[K_S_YX]*r_n[K_F_XX]+ s_n[K_S_YY]*r_n[K_F_XY]+ s_n[K_S_YZ]*r_n[K_F_XZ]; t[2] = s_n[K_S_ZX]*r_n[K_F_XX]+ s_n[K_S_ZY]*r_n[K_F_XY]+ s_n[K_S_ZZ]*r_n[K_F_XZ]; t[3] = s_n[K_S_XX]*r_n[K_F_YX]+ s_n[K_S_XY]*r_n[K_F_YY]+ s_n[K_S_XZ]*r_n[K_F_YZ]; t[4] = s_n[K_S_YX]*r_n[K_F_YX]+ s_n[K_S_YY]*r_n[K_F_YY]+ s_n[K_S_YZ]*r_n[K_F_YZ]; t[5] = s_n[K_S_ZX]*r_n[K_F_YX]+ s_n[K_S_ZY]*r_n[K_F_YY]+ s_n[K_S_ZZ]*r_n[K_F_YZ]; t[6] = s_n[K_S_XX]*r_n[K_F_ZX]+ s_n[K_S_XY]*r_n[K_F_ZY]+ s_n[K_S_XZ]*r_n[K_F_ZZ]; t[7] = s_n[K_S_YX]*r_n[K_F_ZX]+ s_n[K_S_YY]*r_n[K_F_ZY]+ s_n[K_S_YZ]*r_n[K_F_ZZ]; t[8] = s_n[K_S_ZX]*r_n[K_F_ZX]+ s_n[K_S_ZY]*r_n[K_F_ZY]+ s_n[K_S_ZZ]*r_n[K_F_ZZ]; rot_stress[ K_S_XX ] = r_n[K_F_XX]*t[0] + r_n[K_F_XY]*t[1] + r_n[K_F_XZ]*t[2]; rot_stress[ K_S_YY ] = r_n[K_F_YX]*t[3] + r_n[K_F_YY]*t[4] + r_n[K_F_YZ]*t[5]; rot_stress[ K_S_ZZ ] = r_n[K_F_ZX]*t[6] + r_n[K_F_ZY]*t[7] + r_n[K_F_ZZ]*t[8]; rot_stress[ K_S_XY ] = r_n[K_F_XX]*t[3] + r_n[K_F_XY]*t[4] + r_n[K_F_XZ]*t[5]; rot_stress[ K_S_YZ ] = r_n[K_F_YX]*t[6] + r_n[K_F_YY]*t[7] + r_n[K_F_YZ]*t[8]; rot_stress[ K_S_ZX ] = r_n[K_F_ZX]*t[0] + r_n[K_F_ZY]*t[1] + r_n[K_F_ZZ]*t[2]; } //-------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION void comp_force(int ielem, const Scalar * const vx , const Scalar * const vy , const Scalar * const vz , const Scalar * const grad_x , const Scalar * const grad_y , const Scalar * const grad_z , Scalar * total_stress12th ) const { Scalar internal_energy_inc = 0 ; for(int inode = 0; inode < 8; ++inode) { const Scalar fx = total_stress12th[K_S_XX] * grad_x[inode] + total_stress12th[K_S_XY] * grad_y[inode] + total_stress12th[K_S_XZ] * grad_z[inode] ; element_force(ielem, 0, inode) = fx ; const Scalar fy = total_stress12th[K_S_YX] * grad_x[inode] + total_stress12th[K_S_YY] * grad_y[inode] + total_stress12th[K_S_YZ] * grad_z[inode] ; element_force(ielem, 1, inode) = fy ; const Scalar fz = total_stress12th[K_S_ZX] * grad_x[inode] + total_stress12th[K_S_ZY] * grad_y[inode] + total_stress12th[K_S_ZZ] * grad_z[inode] ; element_force(ielem, 2, inode) = fz ; internal_energy_inc += fx * vx[inode] + fy * vy[inode] + fz * vz[inode] ; } internal_energy(ielem) = internal_energy_inc ; } //---------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION void get_stress(int ielem , Scalar * const s_n ) const { const int kxx = 0; const int kyy = 1; const int kzz = 2; const int kxy = 3; const int kyz = 4; const int kzx = 5; const Scalar e = (rot_stretch(ielem,kxx)+rot_stretch(ielem,kyy)+rot_stretch(ielem,kzz))/3.0; s_n[kxx] = stress_new(ielem,kxx) += dt() * (two_mu * (rot_stretch(ielem,kxx)-e)+3*bulk_modulus*e); s_n[kyy] = stress_new(ielem,kyy) += dt() * (two_mu * (rot_stretch(ielem,kyy)-e)+3*bulk_modulus*e); s_n[kzz] = stress_new(ielem,kzz) += dt() * (two_mu * (rot_stretch(ielem,kzz)-e)+3*bulk_modulus*e); s_n[kxy] = stress_new(ielem,kxy) += dt() * two_mu * rot_stretch(ielem,kxy); s_n[kyz] = stress_new(ielem,kyz) += dt() * two_mu * rot_stretch(ielem,kyz); s_n[kzx] = stress_new(ielem,kzx) += dt() * two_mu * rot_stretch(ielem,kzx); } //---------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION void operator()( int ielem, value_type & update )const { const Scalar ONE12TH = 1.0 / 12.0 ; Scalar x[8], y[8], z[8] ; Scalar vx[8], vy[8], vz[8]; Scalar grad_x[8], grad_y[8], grad_z[8]; // Position and velocity: for ( int i = 0 ; i < ElemNodeCount ; ++i ) { const int n = elem_node_connectivity(ielem,i); x[i] = model_coords(n, 0) + displacement(n, 0, current_state) ; y[i] = model_coords(n, 1) + displacement(n, 1, current_state) ; z[i] = model_coords(n, 2) + displacement(n, 2, current_state) ; vx[i] = velocity(n, 0, current_state); vy[i] = velocity(n, 1, current_state); vz[i] = velocity(n, 2, current_state); } // Gradient: comp_grad( x , y , z , grad_x , grad_y , grad_z ); const Scalar mid_vol = dot8( x , grad_x ); const Scalar shr = two_mu ; const Scalar dil = bulk_modulus + ((2.0*shr)/3.0); const Scalar aspect = 6.0 * mid_vol / ( dot8( grad_x , grad_x ) + dot8( grad_y , grad_y ) + dot8( grad_z , grad_z ) ); const Scalar dtrial = std::sqrt(elem_mass(ielem) * aspect / dil); const Scalar traced = (rot_stretch(ielem, 0) + rot_stretch(ielem, 1) + rot_stretch(ielem, 2)); const Scalar eps = traced < 0 ? (lin_bulk_visc - quad_bulk_visc * traced * dtrial) : lin_bulk_visc ; const Scalar bulkq = eps * dil * dtrial * traced; Scalar cur_time_step = dtrial * ( std::sqrt( 1.0 + eps * eps) - eps); // force fixed time step if input cur_time_step = user_dt > 0 ? user_dt : cur_time_step; update = update < cur_time_step ? update : cur_time_step; Scalar s_n[ 6 ]; get_stress( ielem, s_n ); Scalar total_stress12th[6]; // Get rotated stress: rotate_tensor_backward(ielem, s_n , total_stress12th ); total_stress12th[0] = ONE12TH*( total_stress12th[ 0 ] + bulkq ); total_stress12th[1] = ONE12TH*( total_stress12th[ 1 ] + bulkq ); total_stress12th[2] = ONE12TH*( total_stress12th[ 2 ] + bulkq ); total_stress12th[3] = ONE12TH*( total_stress12th[ 3 ] ); total_stress12th[4] = ONE12TH*( total_stress12th[ 4 ] ); total_stress12th[5] = ONE12TH*( total_stress12th[ 5 ] ); comp_force(ielem, vx, vy, vz, grad_x, grad_y, grad_z, total_stress12th); } }; //---------------------------------------------------------------------------- template struct nodal_step { typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type; typedef Explicit::Fields< Scalar , execution_space > Fields ; const typename Fields::scalar_type dt ; const typename Fields::scalar_type prev_dt ; const typename Fields::node_elem_ids_type node_elem_connectivity ; const typename Fields::node_coords_type model_coords ; const typename Fields::array_type nodal_mass ; const typename Fields::geom_state_array_type displacement ; const typename Fields::geom_state_array_type velocity ; const typename Fields::geom_array_type acceleration ; const typename Fields::geom_array_type internal_force ; const typename Fields::elem_node_geom_type element_force ; const Scalar x_bc; const int current_state; const int next_state; nodal_step( const Fields & mesh_fields , const Scalar arg_x_bc, const int arg_current_state, const int arg_next_state) : dt( mesh_fields.dt ) , prev_dt( mesh_fields.prev_dt ) , node_elem_connectivity( mesh_fields.node_elem_connectivity ) , model_coords( mesh_fields.model_coords ) , nodal_mass( mesh_fields.nodal_mass ) , displacement( mesh_fields.displacement ) , velocity( mesh_fields.velocity ) , acceleration( mesh_fields.acceleration ) , internal_force( mesh_fields.internal_force ) , element_force( mesh_fields.element_force ) , x_bc( arg_x_bc ) , current_state( arg_current_state ) , next_state( arg_next_state ) { //std::cout << "finish_step dt: " << dt << std::endl; //std::cout << "finish_step prev_dt: " << prev_dt << std::endl; } static void apply( const Fields & mesh_fields , const Scalar arg_x_bc , const int arg_current_state , const int arg_next_state ) { nodal_step op( mesh_fields, arg_x_bc, arg_current_state, arg_next_state ); // Only update the owned nodes: Kokkos::parallel_for( mesh_fields.num_nodes_owned , op ); } KOKKOS_INLINE_FUNCTION void operator()(int inode) const { // Getting count as per 'CSR-like' data structure const int begin = node_elem_connectivity.row_map[inode]; const int end = node_elem_connectivity.row_map[inode+1]; double local_force[] = {0.0, 0.0, 0.0}; // Gather-sum internal force from // each element that a node is attached to. for ( int i = begin; i < end ; ++i ){ // node_elem_offset is a cumulative structure, so // node_elem_offset(inode) should be the index where // a particular row's elem_IDs begin const int nelem = node_elem_connectivity.entries( i, 0); // find the row in an element's stiffness matrix // that corresponds to inode const int elem_node_index = node_elem_connectivity.entries( i, 1); local_force[0] += element_force(nelem, 0, elem_node_index); local_force[1] += element_force(nelem, 1, elem_node_index); local_force[2] += element_force(nelem, 2, elem_node_index); } internal_force(inode, 0) = local_force[0]; internal_force(inode, 1) = local_force[1]; internal_force(inode, 2) = local_force[2]; // Acceleration: Scalar v_new[3]; Scalar a_current[3]; const Scalar tol = 1.0e-7; // If not on the boundary then: a = F / m if ( tol < fabs(model_coords(inode,0)-x_bc) ) { const Scalar m = nodal_mass( inode ); acceleration(inode,0) = a_current[0] = -local_force[0] / m ; acceleration(inode,1) = a_current[1] = -local_force[1] / m ; acceleration(inode,2) = a_current[2] = -local_force[2] / m ; } else { //enforce fixed BC acceleration(inode,0) = a_current[0] = 0; acceleration(inode,1) = a_current[1] = 0; acceleration(inode,2) = a_current[2] = 0; } // Central difference time integration: const Scalar dt_disp = dt() ; const Scalar dt_vel = ( dt() + prev_dt() ) / 2.0 ; velocity(inode,0,next_state) = v_new[0] = velocity(inode,0,current_state) + dt_vel * a_current[0]; velocity(inode,1,next_state) = v_new[1] = velocity(inode,1,current_state) + dt_vel * a_current[1]; velocity(inode,2,next_state) = v_new[2] = velocity(inode,2,current_state) + dt_vel * a_current[2]; displacement(inode,0,next_state) = displacement(inode,0,current_state) + dt_disp * v_new[0]; displacement(inode,1,next_state) = displacement(inode,1,current_state) + dt_disp * v_new[1]; displacement(inode,2,next_state) = displacement(inode,2,current_state) + dt_disp * v_new[2]; } }; //---------------------------------------------------------------------------- template< typename Scalar , class DeviceType > struct pack_state { typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; typedef Explicit::Fields< Scalar , execution_space > Fields ; typedef typename Fields::geom_state_array_type::value_type value_type ; typedef Kokkos::View< value_type* , execution_space > buffer_type ; static const unsigned value_count = 6 ; const typename Fields::geom_state_array_type displacement ; const typename Fields::geom_state_array_type velocity ; const buffer_type output ; const size_type inode_base ; const size_type state_next ; pack_state( const buffer_type & arg_output , const Fields & mesh_fields , const size_type arg_begin , const size_type arg_state ) : displacement( mesh_fields.displacement ) , velocity( mesh_fields.velocity ) , output( arg_output ) , inode_base( arg_begin ) , state_next( arg_state ) {} static void apply( const buffer_type & arg_output , const size_type arg_begin , const size_type arg_count , const Fields & mesh_fields , const size_type arg_state ) { pack_state op( arg_output , mesh_fields , arg_begin , arg_state ); Kokkos::parallel_for( arg_count , op ); } KOKKOS_INLINE_FUNCTION void operator()( const size_type i ) const { const size_type inode = inode_base + i ; size_type j = i * value_count ; output[j++] = displacement( inode , 0 , state_next ); output[j++] = displacement( inode , 1 , state_next ); output[j++] = displacement( inode , 2 , state_next ); output[j++] = velocity( inode , 0 , state_next ); output[j++] = velocity( inode , 1 , state_next ); output[j++] = velocity( inode , 2 , state_next ); } }; template< typename Scalar , class DeviceType > struct unpack_state { typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; typedef Explicit::Fields< Scalar , execution_space > Fields ; typedef typename Fields::geom_state_array_type::value_type value_type ; typedef Kokkos::View< value_type* , execution_space > buffer_type ; static const unsigned value_count = 6 ; const typename Fields::geom_state_array_type displacement ; const typename Fields::geom_state_array_type velocity ; const buffer_type input ; const size_type inode_base ; const size_type state_next ; unpack_state( const buffer_type & arg_input , const Fields & mesh_fields , const size_type arg_begin , const size_type arg_state ) : displacement( mesh_fields.displacement ) , velocity( mesh_fields.velocity ) , input( arg_input ) , inode_base( arg_begin ) , state_next( arg_state ) {} static void apply( const Fields & mesh_fields , const size_type arg_state , const buffer_type & arg_input , const size_type arg_begin , const size_type arg_count ) { unpack_state op( arg_input , mesh_fields , arg_begin , arg_state ); Kokkos::parallel_for( arg_count , op ); } KOKKOS_INLINE_FUNCTION void operator()( const size_type i ) const { const size_type inode = inode_base + i ; size_type j = i * value_count ; displacement( inode , 0 , state_next ) = input[j++] ; displacement( inode , 1 , state_next ) = input[j++] ; displacement( inode , 2 , state_next ) = input[j++] ; velocity( inode , 0 , state_next ) = input[j++] ; velocity( inode , 1 , state_next ) = input[j++] ; velocity( inode , 2 , state_next ) = input[j++] ; } }; } /* namespace Explicit */ #endif /* #ifndef KOKKOS_EXPLICITFUNCTORS_HPP */