/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #ifndef EXAMPLE_GROW_ARRAY #define EXAMPLE_GROW_ARRAY #include #include #include #if defined(KOKKOS_ENABLE_CUDA) #include #include #endif namespace Example { //---------------------------------------------------------------------------- template< class ExecSpace > struct SortView { template< typename ValueType > SortView( const Kokkos::View v , int begin , int end ) { std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end ); } }; #if defined(KOKKOS_ENABLE_CUDA) template<> struct SortView< Kokkos::Cuda > { template< typename ValueType > SortView( const Kokkos::View v , int begin , int end ) { thrust::sort( thrust::device_ptr( v.ptr_on_device() + begin ) , thrust::device_ptr( v.ptr_on_device() + end ) ); } }; #endif //---------------------------------------------------------------------------- template< class ExecSpace > struct GrowArrayFunctor { typedef ExecSpace execution_space ; enum { SHIFT = sizeof(int) == 8 ? 6 : 5 }; // 8 or 4 byte int enum { MASK = ( 1 << SHIFT ) - 1 }; const Kokkos::View m_search_flags ; // bit flags for values to append const Kokkos::View m_search_array ; // array to append values const Kokkos::View m_search_count ; // offset const int m_search_total ; const int m_search_team_chunk ; GrowArrayFunctor( int array_length , int search_length , int print = 1 ) : m_search_flags( "flags" , ( search_length + MASK ) >> SHIFT ) // One bit per search entry , m_search_array( "array" , array_length ) , m_search_count( "count" ) , m_search_total( search_length ) , m_search_team_chunk( 2048 ) {} KOKKOS_INLINE_FUNCTION bool flag_is_set( const int index ) const { // 64 or 32 bit integer: const int j = index >> SHIFT ; // which integer flag const int k = 1 << ( index & MASK ); // which bit in that integer const int s = ( j < int(m_search_flags.dimension_0()) ) && ( 0 != ( m_search_flags(j) & k ) ); return s ; } typedef typename Kokkos::TeamPolicy::member_type team_member ; KOKKOS_INLINE_FUNCTION void operator()( const team_member & member ) const { enum { LOCAL_BUFFER_LENGTH = 16 }; int local_buffer[ LOCAL_BUFFER_LENGTH ] ; int local_count = 0 ; // Each team searches 'm_search_team_chunk' indices. // The threads of a team must iterate together because all // threads in the team must call 'team_scan' to prevent deadlock in the team. int search_team_begin = member.league_rank() * m_search_team_chunk ; const int search_team_end = search_team_begin + m_search_team_chunk ; int k = 0 ; while ( search_team_begin < search_team_end ) { // This iteration searches [ search_team_begin .. search_team_begin + member.team_size() ] const int thread_search_index = search_team_begin + member.team_rank(); // If this thread's search index is in the range // and the flag is set, push into this thread's local buffer. if ( thread_search_index < m_search_total && flag_is_set(thread_search_index) ) { local_buffer[ local_count ] = thread_search_index ; ++local_count ; } // Move the team's search range forward search_team_begin += member.team_size(); // Striding team by team size // Count number of times a thread's buffer might have grown: ++k ; // Write buffer if end of search or a thread might have filled its buffer. if ( k == LOCAL_BUFFER_LENGTH /* A thread in my team might have filled its buffer */ || ! ( search_team_begin < search_team_end ) /* Team is at the end of its search */ ) { // Team's exclusive scan of threads' contributions, with global offset. // This thread writes its buffer into [ team_offset .. team_offset + local_count ) const int team_offset = member.team_scan( local_count , & *m_search_count ); // Copy locally buffered entries into global array: for ( int i = 0 ; i < local_count ; ++i ) { m_search_array( team_offset + i ) = local_buffer[i] ; } k = 0 ; local_count = 0 ; } } } }; template< class ExecSpace > void grow_array( int array_length , int search_length , int print = 1 ) { typedef GrowArrayFunctor< ExecSpace > FunctorType ; FunctorType functor( array_length , search_length , print ); typename Kokkos::View::HostMirror count = Kokkos::create_mirror_view( functor.m_search_count ); typename Kokkos::View::HostMirror flags = Kokkos::create_mirror_view( functor.m_search_flags ); // Set at most 'array_length' random bits over the search length. for ( int i = 0 ; i < array_length ; ++i ) { // 'lrand48()' generates random number between [0..2^31] // index = ( lrand48() * search_length ) / ( 2^31 ) const long int index = ( lrand48() * search_length ) >> 31 ; // set the bit within the flags: flags( index >> FunctorType::SHIFT ) |= ( 1 << ( index & FunctorType::MASK ) ); } Kokkos::deep_copy( functor.m_search_flags , flags ); // Each team works on 'functor.m_search_team_chunk' span of the search_length Kokkos::TeamPolicy< ExecSpace > work( /* #teams */ ( search_length + functor.m_search_team_chunk - 1 ) / functor.m_search_team_chunk , /* threads/team */ Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor ) ); // Fill array: Kokkos::parallel_for( work , functor ); // How much was filled: Kokkos::deep_copy( count , functor.m_search_count ); // Sort array: SortView< ExecSpace >( functor.m_search_array , 0 , *count ); // Mirror the results: typename Kokkos::View::HostMirror results = Kokkos::create_mirror_view( functor.m_search_array ); Kokkos::deep_copy( results , functor.m_search_array ); // Verify results: int result_error_count = 0 ; int flags_error_count = 0 ; for ( int i = 0 ; i < *count ; ++i ) { const int index = results(i); const int entry = index >> FunctorType::SHIFT ; const int bit = 1 << ( index & FunctorType::MASK ); const bool flag = 0 != ( flags( entry ) & bit ); if ( ! flag ) { if ( print ) std::cerr << "result( " << i << " : " << index << " )"; ++result_error_count ; } flags( entry ) &= ~bit ; // Clear that verified bit } for ( int i = 0 ; i < int(flags.dimension_0()) ; ++i ) { // If any uncleared bits then an error if ( flags(i) ) { if ( print ) std::cerr << "flags( " << i << " : " << flags(i) << " )" ; ++flags_error_count ; } } if ( result_error_count || flags_error_count ) { std::cerr << std::endl << "Example::GrowArrayFunctor( " << array_length << " , " << search_length << " ) result_error_count( " << result_error_count << " )" << " ) flags_error_count( " << flags_error_count << " )" << std::endl ; } } } // namespace Example //---------------------------------------------------------------------------- #endif /* #ifndef EXAMPLE_GROW_ARRAY */