/** * \file dnn/test/cuda/rng.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "test/naive/rng.h" #include "megdnn/oprs.h" #include "test/common/tensor.h" #include "test/cuda/fixture.h" namespace megdnn { namespace test { namespace { template void run_gamma(Handle* handle) { using ctype = typename DTypeTrait::ctype; auto opr = handle->create_operator(); TensorLayout ly{TensorShape{2000000 * 5}, T()}; SyncedTensor out(handle, ly); SyncedTensor shape(handle, ly); SyncedTensor scale(handle, ly); auto shape_ptr = shape.ptr_mutable_host(); auto scale_ptr = scale.ptr_mutable_host(); for (int i = 0; i < 5; ++i) { for (int j = 0; j < 2000000; ++j) { shape_ptr[i * 2000000 + j] = 2 * 0.3 * i + 0.3; scale_ptr[i * 2000000 + j] = i * 0.2 + 0.1; } } opr->exec(shape.tensornd_dev(), scale.tensornd_dev(), out.tensornd_dev(), {}); auto ptr = out.ptr_mutable_host(); for (int i = 0; i < 5; ++i) { float a = 2 * 0.3 * i + 0.3, b = i * 0.2 + 0.1; float mean = a * b; float std = a * (b * b); auto stat = get_mean_var(ptr + i * 2000000, 2000000, ctype(mean)); ASSERT_LE(std::abs(stat.first - mean), 0.01); ASSERT_LE(std::abs(stat.second - std), 0.01); } } template void run_poisson(Handle* handle) { using ctype = typename DTypeTrait::ctype; auto opr = handle->create_operator(); TensorLayout ly{TensorShape{200000 * 5}, T()}; SyncedTensor out(handle, ly); SyncedTensor lam(handle, ly); auto lam_ptr = lam.ptr_mutable_host(); for (int i = 0; i < 5; ++i) { for (int j = 0; j < 200000; ++j) { lam_ptr[i * 200000 + j] = ctype(i + 1); } } opr->exec(lam.tensornd_dev(), out.tensornd_dev(), {}); auto ptr = out.ptr_mutable_host(); for (int i = 0; i < 5; ++i) { auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(i + 1)); ASSERT_LE(std::abs(stat.first - ctype(i + 1)), 0.01); ASSERT_LE(std::abs(stat.second - ctype(i + 1)), 0.01); } } template void run_beta(Handle* handle) { using ctype = typename DTypeTrait::ctype; auto opr = handle->create_operator(); TensorLayout ly{TensorShape{200000 * 5}, T()}; SyncedTensor out(handle, ly); SyncedTensor alpha(handle, ly); SyncedTensor beta(handle, ly); auto alpha_ptr = alpha.ptr_mutable_host(); auto beta_ptr = beta.ptr_mutable_host(); for (int i = 0; i < 5; ++i) { for (int j = 0; j < 200000; ++j) { alpha_ptr[i * 200000 + j] = 0.3 * i + 0.1; beta_ptr[i * 200000 + j] = 2 * i * 0.3 + 0.1; } } opr->exec(alpha.tensornd_dev(), beta.tensornd_dev(), out.tensornd_dev(), {}); auto ptr = out.ptr_mutable_host(); for (int i = 0; i < 5; ++i) { float a = 0.3 * i + 0.1, b = 2 * i * 0.3 + 0.1; float mean = a / (a + b); float std = a * b / ((a + b) * (a + b) * (a + b + 1)); auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(mean)); ASSERT_LE(std::abs(stat.first - mean), 0.01); ASSERT_LE(std::abs(stat.second - std), 0.01); } } template void run_permutation(Handle* handle) { using ctype = typename DTypeTrait::ctype; size_t sample_num = std::min(200000, static_cast(DTypeTrait::max()) - 10); auto opr = handle->create_operator(); opr->param().dtype = DTypeTrait::enumv; TensorLayout ly{TensorShape{sample_num}, T()}; Tensor workspace( handle, {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()}); SyncedTensor t(handle, ly); opr->exec(t.tensornd_dev(), {workspace.ptr(), workspace.layout().total_nr_elems()}); auto ptr = t.ptr_mutable_host(); auto size = t.layout().total_nr_elems(); std::vector res(size); int not_same = 0; for (size_t i = 0; i < size; ++i) { if ((ptr[i] - ctype(i)) >= ctype(1)) not_same++; res[i] = ptr[i]; } ASSERT_GT(not_same, 5000); std::sort(res.begin(), res.end()); for (size_t i = 0; i < size; ++i) { ASSERT_LE(std::abs(res[i] - ctype(i)), 1e-8); } } template void run_shuffle(Handle* handle, bool bwd_flag) { using ctype = typename DTypeTrait::ctype; auto run = [&](TensorShape shape) { auto opr = handle->create_operator(); TensorLayout srclay{shape, T()}; TensorLayout dstlay{shape, T()}; TensorLayout indexlay{TensorShape{shape[0]}, dtype::Int32()}; Tensor workspace( handle, {TensorShape{opr->get_workspace_in_bytes(srclay, dstlay, indexlay)}, dtype::Byte()}); SyncedTensor src(handle, srclay); SyncedTensor dst(handle, dstlay); SyncedTensor::ctype> index(handle, indexlay); auto sptr = src.ptr_mutable_host(); size_t size = src.layout().total_nr_elems(); for (size_t j = 0; j < size; ++j) { sptr[j] = j; } opr->exec( src.tensornd_dev(), dst.tensornd_dev(), index.tensornd_dev(), {workspace.ptr(), workspace.layout().total_nr_elems()}); auto dptr = dst.ptr_mutable_host(); auto iptr = index.ptr_mutable_host(); size_t len = index.layout().total_nr_elems(); size_t step = size / len; for (size_t i = 0; i < len; ++i) { for (size_t j = 0; j < step; ++j) { ASSERT_EQ(dptr[i * step + j], sptr[iptr[i] * step + j]); } } if (bwd_flag) { for (size_t j = 0; j < size; ++j) { sptr[j] = 0; } auto oprbwd = handle->create_operator(); oprbwd->exec( dst.tensornd_dev(), index.tensornd_dev(), src.tensornd_dev(), {workspace.ptr(), workspace.layout().total_nr_elems()}); auto sptr_bwd = src.ptr_mutable_host(); for (size_t i = 0; i < len; ++i) { for (size_t j = 0; j < step; ++j) { ASSERT_EQ(dptr[i * step + j], sptr_bwd[iptr[i] * step + j]); } } } }; run({10}); run({6, 3}); } template void run_dropout(Handle* handle) { using ctype = typename DTypeTrait::ctype; auto run = [&](TensorShape shape, float drop_prob) { auto fwd = handle->create_operator(); auto bwd = handle->create_operator(); fwd->param().drop_prob = drop_prob; bwd->param().drop_prob = drop_prob; double scale = 1.0 / (1.0 - drop_prob); TensorLayout inp_lay{shape, T()}; TensorLayout oup_lay{shape, T()}; TensorLayout mask_lay{{fwd->get_mask_size_in_bytes(inp_lay)}, dtype::Byte()}; TensorLayout doup_lay{shape, T()}; TensorLayout dinp_lay{shape, T()}; TensorLayout fwd_ws_lay{ {fwd->get_workspace_in_bytes(inp_lay, oup_lay, mask_lay)}, dtype::Byte()}; TensorLayout bwd_ws_lay{ {bwd->get_workspace_in_bytes(doup_lay, mask_lay, dinp_lay)}, dtype::Byte()}; SyncedTensor inp(handle, inp_lay); SyncedTensor oup(handle, oup_lay); SyncedTensor::ctype> mask(handle, mask_lay); SyncedTensor doup(handle, doup_lay); SyncedTensor dinp(handle, dinp_lay); SyncedTensor::ctype> fwd_ws(handle, fwd_ws_lay); SyncedTensor::ctype> bwd_ws(handle, bwd_ws_lay); for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { inp.ptr_mutable_host()[i] = 1; doup.ptr_mutable_host()[i] = 1; } fwd->exec( inp.tensornd_dev(), oup.tensornd_dev(), mask.tensornd_dev(), {fwd_ws.ptr_mutable_dev(), fwd_ws.layout().total_nr_elems()}); size_t droped_cnt = 0; for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { ASSERT_TRUE( oup.ptr_host()[i] == 0 || oup.ptr_host()[i] == static_cast(scale)); if (oup.ptr_host()[i] == 0) { droped_cnt++; } } float real_drop = droped_cnt * 1.0 / inp.layout().total_nr_elems(); ASSERT_LT(abs(drop_prob - real_drop), 1e-2); #if CUDNN_VERSION >= 7000 bwd->exec( doup.tensornd_dev(), mask.tensornd_dev(), dinp.tensornd_dev(), {bwd_ws.ptr_mutable_dev(), bwd_ws.layout().total_nr_elems()}); for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) { ASSERT_TRUE(oup.ptr_host()[i] == dinp.ptr_host()[i]); } #endif }; run({32, 32, 32, 32}, 0.2); run({100000}, 0.3); } } // anonymous namespace TEST_F(CUDA, UNIFORM_RNG_F32) { auto opr = handle_cuda()->create_operator(); opr->param().dtype = DTypeTrait::enumv; SyncedTensor<> t(handle_cuda(), {TensorShape{200000}, dtype::Float32()}); opr->exec(t.tensornd_dev(), {}); assert_uniform_correct(t.ptr_mutable_host(), t.layout().total_nr_elems()); } TEST_F(CUDA, GAUSSIAN_RNG_F32) { auto opr = handle_cuda()->create_operator(); opr->param().mean = 0.8; opr->param().std = 2.3; opr->param().dtype = DTypeTrait::enumv; for (size_t size : {1, 200000, 200001}) { TensorLayout ly{{size}, dtype::Float32()}; Tensor workspace( handle_cuda(), {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()}); SyncedTensor<> t(handle_cuda(), ly); opr->exec( t.tensornd_dev(), {workspace.ptr(), workspace.layout().total_nr_elems()}); auto ptr = t.ptr_mutable_host(); ASSERT_LE(std::abs(ptr[0] - 0.8), 2.3); if (size >= 1000) { auto stat = get_mean_var(ptr, size, 0.8f); ASSERT_LE(std::abs(stat.first - 0.8), 5e-3); ASSERT_LE(std::abs(stat.second - 2.3 * 2.3), 5e-2); } } } TEST_F(CUDA, GAMMA_RNG_F32) { run_gamma(handle_cuda()); } TEST_F(CUDA, GAMMA_RNG_F16) { run_gamma(handle_cuda()); } TEST_F(CUDA, POISSON_RNG_F32) { run_poisson(handle_cuda()); } TEST_F(CUDA, POISSON_RNG_F16) { run_poisson(handle_cuda()); } TEST_F(CUDA, BETA_RNG_F32) { run_beta(handle_cuda()); } TEST_F(CUDA, BETA_RNG_F16) { run_beta(handle_cuda()); } TEST_F(CUDA, PERMUTATION_RNG_F32) { run_permutation(handle_cuda()); } TEST_F(CUDA, PERMUTATION_RNG_INT32) { run_permutation(handle_cuda()); } TEST_F(CUDA, PERMUTATION_RNG_INT16) { run_permutation(handle_cuda()); } TEST_F(CUDA, SHUFFLE_RNG_F32) { run_shuffle(handle_cuda(), false); } TEST_F(CUDA, SHUFFLE_RNG_INT32) { run_shuffle(handle_cuda(), false); } TEST_F(CUDA, SHUFFLE_RNG_F16) { run_shuffle(handle_cuda(), false); } TEST_F(CUDA, SHUFFLE_RNG_BWD_F32) { run_shuffle(handle_cuda(), true); } TEST_F(CUDA, SHUFFLE_RNG_BWD_INT32) { run_shuffle(handle_cuda(), true); } TEST_F(CUDA, SHUFFLE_RNG_BWD_F16) { run_shuffle(handle_cuda(), true); } TEST_F(CUDA, DROPOUT_F32) { run_dropout(handle_cuda()); } TEST_F(CUDA, DROPOUT_F16) { run_dropout(handle_cuda()); } } // namespace test } // namespace megdnn // vim: syntax=cpp.doxygen