/** * \file dnn/test/rocm/convolution.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "test/common/convolution.h" #include "hcc_detail/hcc_defs_prologue.h" #include "megdnn/opr_param_defs.h" #include "megdnn/oprs.h" #include "test/common/benchmarker.h" #include "test/common/checker.h" #include "test/common/rng.h" #include "test/common/tensor.h" #include "test/common/workspace_wrapper.h" #include "test/rocm/benchmarker.h" #include "test/rocm/fixture.h" #include "src/common/utils.h" #include "src/rocm/utils.h" namespace megdnn { namespace test { namespace convolution { std::vector get_args_0() { std::vector args, tmp_args; #define ADD_ARGS(NAME) \ tmp_args = get_args_##NAME(); \ args.insert(args.end(), tmp_args.begin(), tmp_args.end()); ADD_ARGS(common) ADD_ARGS(padding) ADD_ARGS(large_channel) ADD_ARGS(1x1) ADD_ARGS(large_filter) ADD_ARGS(exhaustive_search) ADD_ARGS(4x4) ADD_ARGS(large_channels) ADD_ARGS(x86_direct_case_2) ADD_ARGS(cudnn_5_1_failures) ADD_ARGS(x86_winograd_algorithm) ADD_ARGS(BRAIN_481) #undef ADD_ARGS return args; } std::vector get_args_1() { return get_args_fallback_templated_impl(); } std::vector get_args_2() { return get_args_fallback_non_templated_impl(); } std::vector get_group_conv_args() { std::vector args; for (size_t batch_size : {2}) { for (size_t ih : {23}) { for (size_t iw : {ih + 1}) { for (size_t icpg : {2, 4, 8}) { for (size_t ocpg : {4, 8}) { for (size_t fh : {3, 5, 7}) { for (size_t fw : {fh, fh + 1}) { for (size_t ph : {0_z, size_t{fw / 2}}) { for (size_t sh : {1, 2}) { for (size_t dh : {1, 2}) { param::Convolution param; size_t groups = 2; param.sparse = param::Convolution::Sparse::GROUP; param.mode = param::Convolution::Mode:: CROSS_CORRELATION; param.stride_h = param.stride_w = sh; param.pad_h = param.pad_w = ph; param.dilate_h = param.dilate_w = dh; args.emplace_back( param, TensorShape{ batch_size, icpg * groups, ih, iw}, TensorShape{ groups, ocpg, icpg, fh, fw}); } } } } } } } } } } return args; } } // namespace convolution TEST_F(ROCM, CONV_GROUP) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_group_conv_args(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); } } TEST_F(ROCM, CONV_CHANNWISE) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_chanwise_args(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); } } TEST_F(ROCM, CONVOLUTION_FORWARD_0) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args_0(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_dtype(2, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); #if !MEGDNN_DISABLE_FLOAT16 checker.set_dtype(0, dtype::Float16()) .set_dtype(1, dtype::Float16()) .set_dtype(2, dtype::Float16()) .set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); #endif } } TEST_F(ROCM, CONVOLUTION_FORWARD_1) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args_1(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_dtype(2, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); #if !MEGDNN_DISABLE_FLOAT16 checker.set_dtype(0, dtype::Float16()) .set_dtype(1, dtype::Float16()) .set_dtype(2, dtype::Float16()) .set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); #endif } } TEST_F(ROCM, CONVOLUTION_FORWARD_2) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args_2(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_dtype(2, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); #if !MEGDNN_DISABLE_FLOAT16 checker.set_dtype(0, dtype::Float16()) .set_dtype(1, dtype::Float16()) .set_dtype(2, dtype::Float16()) .set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); #endif } } TEST_F(ROCM, CONVOLUTION_1X1_FORWARD) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_1x1_args(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); checker.set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .execs({arg.src, arg.filter, {}}); } } #if MEGDNN_WITH_BENCHMARK TEST_F(ROCM, CONVOLUTION_1X1_FORWARD_ALL_ALGOS) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true); using namespace convolution; OprProxy proxy{true}; proxy.warmup_times = 1; proxy.exec_times = 10; Benchmarker checker(handle_rocm()); checker.set_times(1); auto get_computation = [&](TestArg arg) -> float { megdnn_assert(arg.param.format == param::Convolution::Format::NCHW); size_t N = arg.src[0], IC = arg.src[1], IH = arg.src[2], IW = arg.src[3], OC = arg.filter[0], FH = arg.filter[2], FW = arg.filter[3], SH = arg.param.stride_h, SW = arg.param.stride_w, PH = arg.param.pad_h, PW = arg.param.pad_w; size_t OH = infer_conv_shape(IH, FH, SH, PH); size_t OW = infer_conv_shape(IW, FW, SW, PW); float flops = 2.0 * N * OC * OH * OW * IC * FH * FW; return flops; }; std::vector args = get_1x1_args(); NormalRNG default_rng; for (auto&& arg : args) { float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); checker.set_proxy(proxy) .set_dtype(0, dtype::Float32()) .set_dtype(1, dtype::Float32()) .set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_param(arg.param); float time_in_ms = checker.execs({arg.src, arg.filter, {}}); float flops = get_computation(arg); printf("inp=%s,flt=%s,flops=%.2fGflo,time = %.2f ms, perf = %.2f " "GFLOPS\n", arg.src.to_string().c_str(), arg.filter.to_string().c_str(), flops / 1e9, time_in_ms, flops / (1e6 * time_in_ms)); } } #endif TEST_F(ROCM, CONVOLUTION_BACKWARD_DATA_0) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args_0(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); auto src = TensorLayout(arg.src, dtype::Float32()); auto filter = TensorLayout(arg.filter, dtype::Float32()); TensorLayout dst; { auto opr = handle_rocm()->create_operator(); opr->param() = arg.param; opr->deduce_layout(src, filter, dst); } src.dtype = dst.dtype = filter.dtype = dtype::Float32(); checker.set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); #if !MEGDNN_DISABLE_FLOAT16 src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); #endif } } TEST_F(ROCM, CONVOLUTION_BACKWARD_DATA_1) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args_1(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); auto src = TensorLayout(arg.src, dtype::Float32()); auto filter = TensorLayout(arg.filter, dtype::Float32()); TensorLayout dst; { auto opr = handle_rocm()->create_operator(); opr->param() = arg.param; opr->deduce_layout(src, filter, dst); } src.dtype = dst.dtype = filter.dtype = dtype::Float32(); checker.set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); #if !MEGDNN_DISABLE_FLOAT16 src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); #endif } } TEST_F(ROCM, CONVOLUTION_BACKWARD_DATA_2) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args_2(); Checker checker(handle_rocm()); NormalRNG default_rng; for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]); UniformFloatRNG rng(scale, 2 * scale); auto src = TensorLayout(arg.src, dtype::Float32()); auto filter = TensorLayout(arg.filter, dtype::Float32()); TensorLayout dst; { auto opr = handle_rocm()->create_operator(); opr->param() = arg.param; opr->deduce_layout(src, filter, dst); } src.dtype = dst.dtype = filter.dtype = dtype::Float32(); checker.set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); #if !MEGDNN_DISABLE_FLOAT16 src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); #endif } } TEST_F(ROCM, DISABLED_CONVOLUTION_BACKWARD_FILTER) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false); using namespace convolution; std::vector args = get_args(); Checker checker(handle_rocm()); NormalRNG default_rng; bool f16_checked = false; MEGDNN_MARK_USED_VAR(f16_checked); for (auto&& arg : args) { using Mode = param::Convolution::Mode; //! non xcorr not supported for miopen if (arg.param.mode == Mode::CONVOLUTION) { continue; } auto src = TensorLayout(arg.src, dtype::Float32()); auto filter = TensorLayout(arg.filter, dtype::Float32()); TensorLayout dst; { auto opr = handle_rocm()->create_operator(); opr->param() = arg.param; opr->deduce_layout(src, filter, dst); } float scale = 1.0f / sqrt(dst[2] * dst[3]); UniformFloatRNG rng(scale, 2 * scale); src.dtype = dst.dtype = filter.dtype = dtype::Float32(); checker.set_rng(0, &default_rng) .set_rng(1, &default_rng) .set_epsilon(1e-3) .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); #if !MEGDNN_DISABLE_FLOAT16 //! FIXME: MIOpen convolution backward weights for FP16 with bugs #if 0 // reduce on large f16 array may introduce significant error if (dst.total_nr_elems() >= 1000 && f16_checked) continue; f16_checked = true; src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); #endif #endif } } #if MEGDNN_WITH_BENCHMARK TEST_F(ROCM, CONV_FWD_BENCHMARK) { megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true); auto benchmarker = ROCMBenchmarker(handle_rocm(), handle_naive(false)); auto run = [&](size_t N, size_t OC, size_t IC, size_t IH, size_t IW, size_t SH = 1, size_t SW = 1, size_t FH = 1, size_t FW = 1, size_t PH = 0, size_t PW = 0, DType dtype = dtype::Float32()) { benchmarker.set_dtype(0, dtype).set_dtype(1, dtype).set_dtype(2, dtype); benchmarker.set_display(true); ConvolutionForward::Param param; param.stride_h = SH; param.stride_w = SW; param.pad_h = PH; param.pad_w = PW; benchmarker.set_param(param); size_t OH = (IH - FH + 2 * PH) / SH + 1; size_t OW = (IW - FW + 2 * PW) / SW + 1; // warm up find best algo benchmarker.execs({{N, IC, IH, IW}, {OC, IC, FH, FW}, {N, OC, OH, OW}}); // do actual benchmark auto time_ms = benchmarker.execs({{N, IC, IH, IW}, {OC, IC, FH, FW}, {N, OC, OH, OW}}); auto flo = (double)N * OC * IC * OH * OW * FH * FW * 2; auto flops = flo / (time_ms * 1e9); printf("%.3fG FLO, flops %.3fTFLOPS\n", flo / 1e9, flops); }; run(32, 24, 16, 224, 224, 2, 2, 7, 7, 3, 3); run(32, 128, 32, 112, 112, 1, 1, 3, 3, 1, 1); run(32, 128, 128, 56, 56, 1, 1, 3, 3, 1, 1); run(32, 128, 256, 28, 28, 1, 1, 3, 3, 1, 1); run(32, 256, 256, 28, 28, 1, 1, 1, 1, 0, 0); run(32, 256, 256, 28, 28, 2, 2, 3, 3, 1, 1); run(32, 256, 256, 14, 14, 1, 1, 3, 3, 1, 1); run(32, 512, 512, 7, 7, 1, 1, 3, 3, 1, 1); #if !MEGDNN_DISABLE_FLOAT16 run(32, 256, 256, 56, 56, 1, 1, 1, 1, 0, 0, dtype::Float16()); #endif } #endif } // namespace test } // namespace megdnn // vim: syntax=cpp.doxygen