/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #include #include #include namespace Test { template< class DeviceType , typename CoordScalarType = double , typename GradScalarType = float > struct HexGrad { typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; typedef HexGrad self_type; // 3D array : ( ParallelWork , Space , Node ) enum { NSpace = 3 , NNode = 8 }; typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space > elem_coord_type ; typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space > elem_grad_type ; elem_coord_type coords ; elem_grad_type grad_op ; enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) }; enum { READS = 18 }; enum { WRITES = 18 }; HexGrad( const elem_coord_type & arg_coords , const elem_grad_type & arg_grad_op ) : coords( arg_coords ) , grad_op( arg_grad_op ) {} KOKKOS_INLINE_FUNCTION static void grad( const CoordScalarType x[] , const CoordScalarType z[] , GradScalarType grad_y[] ) { const GradScalarType R42=(x[3] - x[1]); const GradScalarType R52=(x[4] - x[1]); const GradScalarType R54=(x[4] - x[3]); const GradScalarType R63=(x[5] - x[2]); const GradScalarType R83=(x[7] - x[2]); const GradScalarType R86=(x[7] - x[5]); const GradScalarType R31=(x[2] - x[0]); const GradScalarType R61=(x[5] - x[0]); const GradScalarType R74=(x[6] - x[3]); const GradScalarType R72=(x[6] - x[1]); const GradScalarType R75=(x[6] - x[4]); const GradScalarType R81=(x[7] - x[0]); const GradScalarType t1=(R63 + R54); const GradScalarType t2=(R61 + R74); const GradScalarType t3=(R72 + R81); const GradScalarType t4 =(R86 + R42); const GradScalarType t5 =(R83 + R52); const GradScalarType t6 =(R75 + R31); // Calculate Y gradient from X and Z data grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54); grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61); grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72); grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83); grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61); grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72); grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83); grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54); } KOKKOS_INLINE_FUNCTION void operator()( size_type ielem ) const { GradScalarType g[NNode] ; const CoordScalarType x[NNode] = { coords(ielem,0,0), coords(ielem,0,1), coords(ielem,0,2), coords(ielem,0,3), coords(ielem,0,4), coords(ielem,0,5), coords(ielem,0,6), coords(ielem,0,7) }; const CoordScalarType y[NNode] = { coords(ielem,1,0), coords(ielem,1,1), coords(ielem,1,2), coords(ielem,1,3), coords(ielem,1,4), coords(ielem,1,5), coords(ielem,1,6), coords(ielem,1,7) }; const CoordScalarType z[NNode] = { coords(ielem,2,0), coords(ielem,2,1), coords(ielem,2,2), coords(ielem,2,3), coords(ielem,2,4), coords(ielem,2,5), coords(ielem,2,6), coords(ielem,2,7) }; grad( z , y , g ); grad_op(ielem,0,0) = g[0]; grad_op(ielem,0,1) = g[1]; grad_op(ielem,0,2) = g[2]; grad_op(ielem,0,3) = g[3]; grad_op(ielem,0,4) = g[4]; grad_op(ielem,0,5) = g[5]; grad_op(ielem,0,6) = g[6]; grad_op(ielem,0,7) = g[7]; grad( x , z , g ); grad_op(ielem,1,0) = g[0]; grad_op(ielem,1,1) = g[1]; grad_op(ielem,1,2) = g[2]; grad_op(ielem,1,3) = g[3]; grad_op(ielem,1,4) = g[4]; grad_op(ielem,1,5) = g[5]; grad_op(ielem,1,6) = g[6]; grad_op(ielem,1,7) = g[7]; grad( y , x , g ); grad_op(ielem,2,0) = g[0]; grad_op(ielem,2,1) = g[1]; grad_op(ielem,2,2) = g[2]; grad_op(ielem,2,3) = g[3]; grad_op(ielem,2,4) = g[4]; grad_op(ielem,2,5) = g[5]; grad_op(ielem,2,6) = g[6]; grad_op(ielem,2,7) = g[7]; } //-------------------------------------------------------------------------- struct Init { typedef typename self_type::execution_space execution_space ; elem_coord_type coords ; Init( const elem_coord_type & arg_coords ) : coords( arg_coords ) {} KOKKOS_INLINE_FUNCTION void operator()( size_type ielem ) const { coords(ielem,0,0) = 0.; coords(ielem,1,0) = 0.; coords(ielem,2,0) = 0.; coords(ielem,0,1) = 1.; coords(ielem,1,1) = 0.; coords(ielem,2,1) = 0.; coords(ielem,0,2) = 1.; coords(ielem,1,2) = 1.; coords(ielem,2,2) = 0.; coords(ielem,0,3) = 0.; coords(ielem,1,3) = 1.; coords(ielem,2,3) = 0.; coords(ielem,0,4) = 0.; coords(ielem,1,4) = 0.; coords(ielem,2,4) = 1.; coords(ielem,0,5) = 1.; coords(ielem,1,5) = 0.; coords(ielem,2,5) = 1.; coords(ielem,0,6) = 1.; coords(ielem,1,6) = 1.; coords(ielem,2,6) = 1.; coords(ielem,0,7) = 0.; coords(ielem,1,7) = 1.; coords(ielem,2,7) = 1.; } }; //-------------------------------------------------------------------------- static double test( const int count , const int iter = 1 ) { elem_coord_type coord( "coord" , count ); elem_grad_type grad ( "grad" , count ); // Execute the parallel kernels on the arrays: double dt_min = 0 ; Kokkos::parallel_for( count , Init( coord ) ); execution_space().fence(); for ( int i = 0 ; i < iter ; ++i ) { Kokkos::Timer timer ; Kokkos::parallel_for( count , HexGrad( coord , grad ) ); execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; } return dt_min ; } }; template< class DeviceType > void run_test_hexgrad( int exp_beg , int exp_end, int num_trials, const char deviceTypeName[] ) { std::string label_hexgrad ; label_hexgrad.append( "\"HexGrad< double , " ); label_hexgrad.append( deviceTypeName ); label_hexgrad.append( " >\"" ); for (int i = exp_beg ; i < exp_end ; ++i) { double min_seconds = 0.0 ; double max_seconds = 0.0 ; double avg_seconds = 0.0 ; const int parallel_work_length = 1<::test(parallel_work_length) ; if ( 0 == j ) { min_seconds = seconds ; max_seconds = seconds ; } else { if ( seconds < min_seconds ) min_seconds = seconds ; if ( seconds > max_seconds ) max_seconds = seconds ; } avg_seconds += seconds ; } avg_seconds /= num_trials ; std::cout << label_hexgrad << " , " << parallel_work_length << " , " << min_seconds << " , " << ( min_seconds / parallel_work_length ) << std::endl ; } } TEST_F( default_exec, hexgrad ) { int exp_beg = 10; int exp_end = 20; int num_trials = 5; if(command_line_num_args()>1) exp_beg = atoi(command_line_arg(1)); if(command_line_num_args()>2) exp_end = atoi(command_line_arg(2)); if(command_line_num_args()>3) num_trials = atoi(command_line_arg(3)); EXPECT_NO_THROW(run_test_hexgrad< Kokkos::DefaultExecutionSpace >( exp_beg, exp_end, num_trials, Kokkos::DefaultExecutionSpace::name() )); } }