/** * \file dnn/test/cuda/dilated_convolution.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "test/cuda/fixture.h" #include "src/cuda/cudnn_with_check.h" #include "test/common/checker.h" #include "test/common/convolution.h" #include "test/common/tensor.h" #include "test/cuda/utils.h" using namespace megdnn; using namespace test; using namespace convolution; #define V1(x) #x #define V(x) V1(x) #define CUDNN_VERSION_STRING \ "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL) TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD) { auto args = get_dilated_args(); Checker checker(handle_cuda()); #if CUDNN_VERSION >= 7500 checker.set_before_exec_callback( AlgoChecker(ExecutionPolicyAlgoName{ "DEFAULT", {{ConvBiasForward::algo_name( "CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_" "IMPLICIT_" "PRECOMP_" "GEMM" CUDNN_VERSION_STRING, {}) .c_str(), {}}}})); printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n"); #else checker.set_before_exec_callback( AlgoChecker(ExecutionPolicyAlgoName{ "DEFAULT", {{ConvBiasForward::algo_name( "MATMUL", {}) .c_str(), {{"CUBLAS", {}}}}}})); #endif NormalRNG default_rng; for (auto&& arg : args) { float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); } } TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_DATA) { std::vector args = get_dilated_args(); Checker checker(handle_cuda()); #if CUDNN_VERSION >= 7500 checker.set_before_exec_callback(AlgoChecker( "CUDNN_CONVOLUTION_BWD_DATA_ALGO_1" CUDNN_VERSION_STRING)); printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n"); #else checker.set_before_exec_callback(AlgoChecker( ExecutionPolicyAlgoName{"MATMUL", {{"CUBLAS", {}}}})); #endif NormalRNG default_rng; for (auto&& arg : args) { float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); auto src = TensorLayout(arg.src, dtype::Float32()); auto filter = TensorLayout(arg.filter, dtype::Float32()); TensorLayout dst; { auto opr = handle_cuda()->create_operator(); opr->param() = arg.param; opr->deduce_layout(src, filter, dst); } src.dtype = dst.dtype = filter.dtype = dtype::Float32(); checker.set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); // cudnn7.5.0 or later, CUDNN_CONVOLUTION_BACKWARD_DATA_ALGO_1 produces // incorrect results on architecture 7.0 or later, so disable the // following test with float16. remove the if statement, when cudnn // fixed precision issue if (!check_compute_capability(7, 0)) { src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); } } { auto handle = handle_cuda(); auto opr = handle->create_operator(); param::Convolution param; param.stride_h = param.stride_w = 1; param.pad_h = param.pad_w = 2; param.dilate_h = param.dilate_w = 2; opr->param() = param; TensorLayout srcl({600, 512, 7, 7}, dtype::Float32()), filterl({512, 512, 3, 3}, dtype::Float32()), dstl({600, 512, 7, 7}, dtype::Float32()); auto wsize = opr->get_workspace_in_bytes(filterl, dstl, srcl); Tensor<> src(handle, srcl), filter(handle, filterl), dst(handle, dstl); WorkspaceWrapper w(handle, wsize); opr->exec(filter.tensornd(), dst.tensornd(), src.tensornd(), w.workspace()); megcore_check(megcoreSynchronize(handle->megcore_computing_handle())); } } TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_FILTER) { std::vector args = get_dilated_args(); Checker checker(handle_cuda()); #if CUDNN_VERSION >= 7500 checker.set_before_exec_callback(AlgoChecker( "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1" CUDNN_VERSION_STRING)); printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n"); #else checker.set_before_exec_callback(AlgoChecker( ExecutionPolicyAlgoName{"MATMUL", {{"CUBLAS", {}}}})); #endif NormalRNG default_rng; bool first_run = true; for (auto&& arg : args) { auto src = TensorLayout(arg.src, dtype::Float32()); auto filter = TensorLayout(arg.filter, dtype::Float32()); TensorLayout dst; { auto opr = handle_cuda()->create_operator(); opr->param() = arg.param; opr->deduce_layout(src, filter, dst); } float scale = 1.0f / sqrt(dst[2] * dst[3]); UniformFloatRNG rng(scale, 2 * scale); src.dtype = dst.dtype = filter.dtype = dtype::Float32(); checker.set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-2) .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); if (!first_run) { src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); } else { // first arg is big, and float16 suffers from precision problems first_run = false; } } } #undef CUDNN_VERSION_STRING #undef V #undef V1 // vim: syntax=cpp.doxygen