#include #include #include template __global__ void reduce_kernel(ExecutionPolicy exec, Iterator first, Iterator last, T init, Iterator2 result) { *result = thrust::reduce(exec, first, last, init); } template void TestReduceDevice(ExecutionPolicy exec, const size_t n) { thrust::host_vector h_data = unittest::random_integers(n); thrust::device_vector d_data = h_data; thrust::device_vector d_result(1); T init = 13; T h_result = thrust::reduce(h_data.begin(), h_data.end(), init); reduce_kernel<<<1,1>>>(exec, d_data.begin(), d_data.end(), init, d_result.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); ASSERT_EQUAL(h_result, d_result[0]); } template struct TestReduceDeviceSeq { void operator()(const size_t n) { TestReduceDevice(thrust::seq, n); } }; VariableUnitTest TestReduceDeviceSeqInstance; template struct TestReduceDeviceDevice { void operator()(const size_t n) { TestReduceDevice(thrust::device, n); } }; VariableUnitTest TestReduceDeviceDeviceInstance; template struct TestReduceDeviceNoSync { void operator()(const size_t n) { TestReduceDevice(thrust::cuda::par_nosync, n); } }; VariableUnitTest TestReduceDeviceNoSyncInstance; template void TestReduceCudaStreams(ExecutionPolicy policy) { typedef thrust::device_vector Vector; Vector v(3); v[0] = 1; v[1] = -2; v[2] = 3; cudaStream_t s; cudaStreamCreate(&s); auto streampolicy = policy.on(s); // no initializer ASSERT_EQUAL(thrust::reduce(streampolicy, v.begin(), v.end()), 2); // with initializer ASSERT_EQUAL(thrust::reduce(streampolicy, v.begin(), v.end(), 10), 12); cudaStreamDestroy(s); } void TestReduceCudaStreamsSync() { TestReduceCudaStreams(thrust::cuda::par); } DECLARE_UNITTEST(TestReduceCudaStreamsSync); void TestReduceCudaStreamsNoSync() { TestReduceCudaStreams(thrust::cuda::par_nosync); } DECLARE_UNITTEST(TestReduceCudaStreamsNoSync);