#include #include #include namespace Test { namespace { template struct SpaceInstance { static ExecSpace create() { return ExecSpace(); } static void destroy(ExecSpace&) { } static bool overlap() { return false; } }; #ifndef KOKKOS_ENABLE_DEBUG #ifdef KOKKOS_ENABLE_CUDA template<> struct SpaceInstance { static Kokkos::Cuda create() { cudaStream_t stream; cudaStreamCreate(&stream); return Kokkos::Cuda(stream); } static void destroy(Kokkos::Cuda& space) { cudaStream_t stream = space.cuda_stream(); cudaStreamDestroy(stream); } static bool overlap() { bool value = true; auto local_rank_str = std::getenv("CUDA_LAUNCH_BLOCKING"); if(local_rank_str) { value = (std::atoi(local_rank_str)==0); } return value; } }; #endif #endif } struct FunctorRange { int M,R; Kokkos::View a; FunctorRange(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} KOKKOS_INLINE_FUNCTION void operator() (const int i) const { for(int r=0;r a; FunctorMDRange(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} KOKKOS_INLINE_FUNCTION void operator() (const int i, const int) const { for(int j=0;j a; FunctorTeam(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} KOKKOS_INLINE_FUNCTION void operator() (const Kokkos::TeamPolicy::member_type& team) const { int i = team.league_rank(); for(int r=0;r a; FunctorRangeReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} KOKKOS_INLINE_FUNCTION void operator() (const int i, double& tmp) const { for(int r=0;r a; FunctorMDRangeReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} KOKKOS_INLINE_FUNCTION void operator() (const int i, const int, double& tmp) const { for(int j=0;j a; FunctorTeamReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} KOKKOS_INLINE_FUNCTION void operator() (const Kokkos::TeamPolicy::member_type& team, double& tmp) const { int i = team.league_rank(); for(int r=0;r::create(); TEST_EXECSPACE space2 = SpaceInstance::create(); Kokkos::View a("A",N,M); FunctorRange f(M,R,a); FunctorRangeReduce fr(M,R,a); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", Kokkos::RangePolicy(0,N), FunctorRange(M,R,a)); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", Kokkos::Experimental::require( Kokkos::RangePolicy(space1,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", Kokkos::Experimental::require( Kokkos::RangePolicy(space2,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); Kokkos::Timer timer; Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); timer.reset(); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", Kokkos::Experimental::require( Kokkos::RangePolicy(space1,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorRange(M,R,a)); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", Kokkos::Experimental::require( Kokkos::RangePolicy(space2,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorRange(M,R,a)); Kokkos::fence(); double time_overlap = timer.seconds(); timer.reset(); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); double time_end = timer.seconds(); if(SpaceInstance::overlap()) { ASSERT_TRUE( (time_end > 1.5*time_overlap) ); } printf("Time RangePolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); Kokkos::View result("result"); Kokkos::View result1("result1"); Kokkos::View result2("result2"); Kokkos::View h_result("h_result"); Kokkos::View h_result1("h_result1"); Kokkos::View h_result2("h_result2"); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::fence(); double time_fenced = timer.seconds(); Kokkos::deep_copy(h_result,result); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); double time_not_fenced = timer.seconds(); Kokkos::fence(); if(SpaceInstance::overlap()) { ASSERT_TRUE(time_fenced>2.0*time_not_fenced); } timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::RangePolicy(space,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::fence(); double time_no_overlapped_reduce = timer.seconds(); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::RangePolicy(space1,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result1); Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::RangePolicy(space2,0,N), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result2); Kokkos::fence(); double time_overlapped_reduce = timer.seconds(); Kokkos::deep_copy(h_result2,result2); Kokkos::deep_copy(h_result1,result1); ASSERT_EQ(h_result1(),h_result()); ASSERT_EQ(h_result2(),h_result()); if(SpaceInstance::overlap()) { ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); } printf("Time RangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); SpaceInstance::destroy(space1); SpaceInstance::destroy(space2); } TEST_F( default_exec, overlap_mdrange_policy ) { int N = 200; int M = 10000; int R = 10; TEST_EXECSPACE space; TEST_EXECSPACE space1 = SpaceInstance::create(); TEST_EXECSPACE space2 = SpaceInstance::create(); Kokkos::View a("A",N,M); FunctorMDRange f(M,R,a); FunctorMDRangeReduce fr(M,R,a); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", Kokkos::Experimental::require( Kokkos::MDRangePolicy>({0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorMDRange(M,R,a)); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); Kokkos::Timer timer; Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); timer.reset(); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorMDRange(M,R,a)); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorMDRange(M,R,a)); Kokkos::fence(); double time_overlap = timer.seconds(); timer.reset(); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); double time_end = timer.seconds(); if(SpaceInstance::overlap()) { ASSERT_TRUE( (time_end > 1.5*time_overlap) ); } printf("Time MDRangePolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); Kokkos::View result("result"); Kokkos::View result1("result1"); Kokkos::View result2("result2"); Kokkos::View h_result("h_result"); Kokkos::View h_result1("h_result1"); Kokkos::View h_result2("h_result2"); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::fence(); double time_fenced = timer.seconds(); Kokkos::deep_copy(h_result,result); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); double time_not_fenced = timer.seconds(); Kokkos::fence(); if(SpaceInstance::overlap()) { ASSERT_TRUE(time_fenced>2.0*time_not_fenced); } timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::fence(); double time_no_overlapped_reduce = timer.seconds(); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result1); Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result2); Kokkos::fence(); double time_overlapped_reduce = timer.seconds(); Kokkos::deep_copy(h_result2,result2); Kokkos::deep_copy(h_result1,result1); ASSERT_EQ(h_result1(),h_result()); ASSERT_EQ(h_result2(),h_result()); if(SpaceInstance::overlap()) { ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); } printf("Time MDRangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); SpaceInstance::destroy(space2); SpaceInstance::destroy(space1); } TEST_F( default_exec, overlap_team_policy ) { int N = 20; int M = 1000000; int R = 10; TEST_EXECSPACE space; TEST_EXECSPACE space1 = SpaceInstance::create(); TEST_EXECSPACE space2 = SpaceInstance::create(); Kokkos::View a("A",N,M); FunctorTeam f(M,R,a); FunctorTeamReduce fr(M,R,a); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", Kokkos::Experimental::require( Kokkos::TeamPolicy(N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorTeam(M,R,a)); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", Kokkos::Experimental::require( Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", Kokkos::Experimental::require( Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); Kokkos::Timer timer; Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); timer.reset(); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", Kokkos::Experimental::require( Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorTeam(M,R,a)); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", Kokkos::Experimental::require( Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , FunctorTeam(M,R,a)); Kokkos::fence(); double time_overlap = timer.seconds(); timer.reset(); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , f); Kokkos::fence(); double time_end = timer.seconds(); if(SpaceInstance::overlap()) { ASSERT_TRUE( (time_end > 1.5*time_overlap) ); } printf("Time TeamPolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); Kokkos::View result("result"); Kokkos::View result1("result1"); Kokkos::View result2("result2"); Kokkos::View h_result("h_result"); Kokkos::View h_result1("h_result1"); Kokkos::View h_result2("h_result2"); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::fence(); double time_fenced = timer.seconds(); Kokkos::deep_copy(h_result,result); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); double time_not_fenced = timer.seconds(); Kokkos::fence(); if(SpaceInstance::overlap()) { ASSERT_TRUE(time_fenced>2.0*time_not_fenced); } timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::TeamPolicy(space,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result); Kokkos::fence(); double time_no_overlapped_reduce = timer.seconds(); timer.reset(); Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result1); Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", Kokkos::Experimental::require( Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), Kokkos::Experimental::WorkItemProperty::HintLightWeight) , fr, result2); Kokkos::fence(); double time_overlapped_reduce = timer.seconds(); Kokkos::deep_copy(h_result2,result2); Kokkos::deep_copy(h_result1,result1); ASSERT_EQ(h_result1(),h_result()); ASSERT_EQ(h_result2(),h_result()); if(SpaceInstance::overlap()) { ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); } printf("Time TeamPolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); SpaceInstance::destroy(space1); SpaceInstance::destroy(space2); } }