/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #include #include namespace Test { namespace { template< class ExecSpace, class ScheduleType > struct TestRange { typedef int value_type; ///< typedef required for the parallel_reduce typedef Kokkos::View< int*, ExecSpace > view_type; view_type m_flags; struct VerifyInitTag {}; struct ResetTag {}; struct VerifyResetTag {}; struct OffsetTag {}; struct VerifyOffsetTag {}; int N; static const int offset = 13; TestRange( const size_t N_ ) : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N_ ), N(N_) {} void test_for() { typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( m_flags ); Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); #if defined(KOKKOS_ENABLE_PROFILING) { typedef TestRange< ExecSpace, ScheduleType > ThisType; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName< ThisType, void> pcn(label); ASSERT_EQ( pcn.get(), label ); std::string empty_label(""); Kokkos::Impl::ParallelConstructName< ThisType, void> empty_pcn(empty_label); ASSERT_EQ( empty_pcn.get(), typeid(ThisType).name() ); } #endif Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyInitTag >( 0, N ), *this ); #if defined(KOKKOS_ENABLE_PROFILING) { typedef TestRange< ExecSpace, ScheduleType > ThisType; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName< ThisType, VerifyInitTag> pcn(label); ASSERT_EQ( pcn.get(), label ); std::string empty_label(""); Kokkos::Impl::ParallelConstructName< ThisType, VerifyInitTag> empty_pcn(empty_label); ASSERT_EQ( empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" + typeid(VerifyInitTag).name() ); } #endif Kokkos::deep_copy( host_flags, m_flags ); int error_count = 0; for ( int i = 0; i < N; ++i ) { if ( int( i ) != host_flags( i ) ) ++error_count; } ASSERT_EQ( error_count, int( 0 ) ); Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, ResetTag >( 0, N ), *this ); Kokkos::parallel_for( std::string( "TestKernelFor" ), Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyResetTag >( 0, N ), *this ); Kokkos::deep_copy( host_flags, m_flags ); error_count = 0; for ( int i = 0; i < N; ++i ) { if ( int( 2 * i ) != host_flags( i ) ) ++error_count; } ASSERT_EQ( error_count, int( 0 ) ); Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag >( offset, N + offset ), *this ); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::RangePolicy( 0, N ), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (i + offset != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); } KOKKOS_INLINE_FUNCTION void operator()( const int i ) const { m_flags( i ) = i; } KOKKOS_INLINE_FUNCTION void operator()( const VerifyInitTag &, const int i ) const { if ( i != m_flags( i ) ) { printf( "TestRange::test_for error at %d != %d\n", i, m_flags( i ) ); } } KOKKOS_INLINE_FUNCTION void operator()( const ResetTag &, const int i ) const { m_flags( i ) = 2 * m_flags( i ); } KOKKOS_INLINE_FUNCTION void operator()( const VerifyResetTag &, const int i ) const { if ( 2 * i != m_flags( i ) ) { printf( "TestRange::test_for error at %d != %d\n", i, m_flags( i ) ); } } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i) const { m_flags(i - offset) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyOffsetTag &, const int i) const { if (i + offset != m_flags(i)) { printf("TestRange::test_for error at %d != %d\n", i + offset, m_flags(i)); } } //---------------------------------------- void test_reduce( ) { int total = 0; Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this, total ); // sum( 0 .. N-1 ) ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( offset, N+offset ), *this, total ); // sum( 1 .. N ) ASSERT_EQ( size_t( ( N ) * ( N + 1 ) / 2 ), size_t( total ) ); } KOKKOS_INLINE_FUNCTION void operator()( const int i, value_type & update ) const { update += m_flags( i ); } KOKKOS_INLINE_FUNCTION void operator()( const OffsetTag &, const int i, value_type & update ) const { update += 1 + m_flags( i-offset ); } //---------------------------------------- void test_scan( ) { Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), *this ); int total = 0; Kokkos::parallel_scan( "TestKernelScanWithTotal", Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), *this, total ); ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) );// sum( 0 .. N-1 ) } KOKKOS_INLINE_FUNCTION void operator()( const OffsetTag &, const int i, value_type & update, bool final ) const { update += m_flags( i ); if ( final ) { if ( update != ( i * ( i + 1 ) ) / 2 ) { printf( "TestRange::test_scan error %d : %d != %d\n", i, ( i * ( i + 1 ) ) / 2, m_flags( i ) ); } } } void test_dynamic_policy() { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) typedef Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule > policy_t; { Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits > count( "Count", ExecSpace::concurrency() ); Kokkos::View< int*, ExecSpace > a( "A", N ); Kokkos::parallel_for( policy_t( 0, N ), KOKKOS_LAMBDA ( const int& i ) { for ( int k = 0; k < ( i < N / 2 ? 1 : 10000 ); k++ ) { a( i )++; } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE count( ExecSpace::hardware_thread_id() )++; #else count( ExecSpace::impl_hardware_thread_id() )++; #endif }); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const int & i, int & lsum ) { lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); }, error ); ASSERT_EQ( error, 0 ); if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { if ( count( t ) < min ) min = count( t ); if ( count( t ) > max ) max = count( t ); } ASSERT_TRUE( min < max ); //if ( ExecSpace::concurrency() > 2 ) { // ASSERT_TRUE( 2 * min < max ); //} } } { Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits > count( "Count", ExecSpace::concurrency() ); Kokkos::View< int*, ExecSpace> a( "A", N ); int sum = 0; Kokkos::parallel_reduce( policy_t( 0, N ), KOKKOS_LAMBDA( const int & i, int & lsum ) { for ( int k = 0; k < ( i < N / 2 ? 1 : 10000 ); k++ ) { a( i )++; } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE count( ExecSpace::hardware_thread_id() )++; #else count( ExecSpace::impl_hardware_thread_id() )++; #endif lsum++; }, sum ); ASSERT_EQ( sum, N ); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const int & i, int & lsum ) { lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); }, error ); ASSERT_EQ( error, 0 ); if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { if ( count( t ) < min ) min = count( t ); if ( count( t ) > max ) max = count( t ); } ASSERT_TRUE( min < max ); //if ( ExecSpace::concurrency() > 2 ) { // ASSERT_TRUE( 2 * min < max ); //} } } #endif #endif } }; } // namespace TEST_F( TEST_CATEGORY, range_for ) { { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_for(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_for(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(2); f.test_for(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(3); f.test_for(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1000); f.test_for(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1001); f.test_for(); } } TEST_F( TEST_CATEGORY, range_reduce ) { { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_reduce(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_reduce(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(2); f.test_reduce(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(3); f.test_reduce(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1000); f.test_reduce(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1001); f.test_reduce(); } } #ifndef KOKKOS_ENABLE_OPENMPTARGET TEST_F( TEST_CATEGORY, range_scan ) { { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_scan(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_scan(); } #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM) { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(0); f.test_dynamic_policy(); } #endif { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(2); f.test_scan(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(3); f.test_scan(); } #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM) { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(3); f.test_dynamic_policy(); } #endif { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1000); f.test_scan(); } { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1001); f.test_scan(); } #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM) { TestRange< TEST_EXECSPACE, Kokkos::Schedule >f(1001); f.test_dynamic_policy(); } #endif } #endif } // namespace Test