/** * \file dnn/test/x86/pooling.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "test/common/pooling.h" #include "test/common/benchmarker.h" #include "test/common/checker.h" #include "test/common/task_record_check.h" #include "test/x86/fixture.h" namespace megdnn { namespace test { TEST_F(X86, POOLING) { auto args = pooling::get_args(); for (auto&& arg : args) { Checker checker(handle()); checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } TEST_F(X86, POOLING_RECORD) { auto args = pooling::get_args(); for (auto&& arg : args) { TaskRecordChecker checker(0); checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } TEST_F(X86, S1POOLING88) { Checker checker(handle()); auto run = [&](size_t WH, size_t WW, size_t PH, size_t PW, size_t SH, size_t SW, size_t N, size_t C, size_t H, size_t W) { Pooling::Param param; param.format = param::Pooling::Format::NCHW88; param.window_h = WH; param.window_w = WW; param.pad_h = PH; param.pad_w = PW; param.stride_w = SW; param.stride_h = SH; param.mode = param::Pooling::Mode::MAX; checker.set_param(param); checker.execs({{N, C, H, W, 8}, {}}); }; for (size_t wh = 10; wh < 15; ++wh) { for (size_t ww = 10; ww < 15; ++ww) { for (size_t n : {1, 2, 4}) { for (size_t c : {1, 4}) { for (size_t h : {10, 13, 20}) { for (size_t w : {10, 13, 20}) { run(wh, ww, wh / 2, ww / 2, 1, 1, n, c, h, w); } } } } } } } TEST_F(X86_MULTI_THREADS, S1POOLING88) { Checker checker(handle()); auto run = [&](size_t WH, size_t WW, size_t PH, size_t PW, size_t SH, size_t SW, size_t N, size_t C, size_t H, size_t W) { Pooling::Param param; param.format = param::Pooling::Format::NCHW88; param.window_h = WH; param.window_w = WW; param.pad_h = PH; param.pad_w = PW; param.stride_w = SW; param.stride_h = SH; param.mode = param::Pooling::Mode::MAX; checker.set_param(param); checker.execs({{N, C, H, W, 8}, {}}); }; for (size_t wh = 10; wh < 15; ++wh) { for (size_t ww = 10; ww < 15; ++ww) { for (size_t n : {1, 2, 4}) { for (size_t c : {1, 4}) { for (size_t h : {10, 13, 20}) { for (size_t w : {10, 13, 20}) { run(wh, ww, wh / 2, ww / 2, 1, 1, n, c, h, w); } } } } } } } #if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, POOLING88) { Checker checker(handle()); auto args = pooling::get_args(); for (auto&& arg : args) { arg.ishape.ndim = 5; arg.ishape[1] = (arg.ishape[1] + 7) / 8; arg.ishape[4] = 8; arg.param.format = param::Pooling::Format::NCHW88; checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } TEST_F(X86, POOLING88_RECORD) { TaskRecordChecker checker(0); auto args = pooling::get_args(); for (auto&& arg : args) { arg.ishape.ndim = 5; arg.ishape[1] = (arg.ishape[1] + 7) / 8; arg.ishape[4] = 8; arg.param.format = param::Pooling::Format::NCHW88; checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } TEST_F(X86_MULTI_THREADS, POOLING88) { Checker checker(handle()); auto args = pooling::get_args(); for (auto&& arg : args) { arg.ishape.ndim = 5; arg.ishape[1] = (arg.ishape[1] + 7) / 8; arg.ishape[4] = 8; arg.param.format = param::Pooling::Format::NCHW88; checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } #endif #if MEGDNN_WITH_BENCHMARK static void test_x86_megdnn_pooling(Handle* handle) { constexpr size_t RUNS = 50; auto rng = std::make_unique(-127, 127); Benchmarker benchmarker_pooling(handle); benchmarker_pooling.set_times(RUNS) .set_dtype(0, dtype::QuantizedS8(1.2)) .set_display(false) .set_rng(0, rng.get()); auto run = [&](uint32_t pad, uint32_t stride, uint32_t window_size, size_t in_number, size_t in_channel, size_t in_height, size_t in_width) { TensorLayout dst_layout; auto opr = handle->create_operator(); opr->param() = {param::Pooling::Mode::MAX, pad, pad, stride, stride, window_size, window_size}; TensorShape shape{in_number, in_channel, in_height, in_width}; opr->deduce_layout({shape, dtype::Int8{}}, dst_layout); float computation = dst_layout.total_nr_elems() * window_size * window_size * 1e-9; auto pooling_used = benchmarker_pooling .set_param( {param::Pooling::Mode::MAX, pad, pad, stride, stride, window_size, window_size}) .exec(TensorShapeArray{shape, {}}) / RUNS; float through_put = computation / pooling_used * 1e3; std::cout << "{" << pad << "," << stride << "," << window_size << "," << in_number << "," << in_channel << "," << in_height << "," << in_width << "} " << "use time " << pooling_used << "ms, " << "through_put " << through_put << "Gops, " << std::endl; }; for (auto widows_size : {2, 3}) for (auto stride : {2}) for (auto pad : {2}) for (auto n : {1, 3, 4}) for (auto c : {1, 32, 64}) for (auto h_w : {12, 32, 64}) { run(pad, stride, widows_size, n, c, h_w, h_w); } } TEST_F(X86, BENCHMARK_POOLING) { test_x86_megdnn_pooling(handle()); } TEST_F(X86_MULTI_THREADS, BENCHMARK_POOLING) { test_x86_megdnn_pooling(handle()); } TEST_F(X86, BENCHMARK_POOLING_MAX_S1_NCHW88) { constexpr size_t RUNS = 50; auto x86_handle = handle(); Benchmarker benchmarker_pooling(x86_handle); benchmarker_pooling.set_times(RUNS); auto run = [&](uint32_t pad, uint32_t stride, uint32_t window_size, size_t in_number, size_t in_channel, size_t in_height, size_t in_width) { auto opr = x86_handle->create_operator(); opr->param() = {param::Pooling::Mode::MAX, pad, pad, stride, stride, window_size, window_size}; opr->param().format = param::Pooling::Format::NCHW88; TensorShape shape{in_number, in_channel / 8, in_height, in_width, 8}; TensorLayout dst_layout; opr->deduce_layout({shape, dtype::Float32()}, dst_layout); float computation = dst_layout.total_nr_elems() * window_size * window_size * 1e-9; auto pooling_used = benchmarker_pooling.set_param(opr->param()) .exec(TensorShapeArray{shape, {}}) / RUNS; float through_put = computation / pooling_used * 1e3; printf("profiling max pooling NCHW88 {%zu,%zu,%zu,%zu,8}\nuse time : " "%f ms\nthrough_put : %f Gflops\n", in_number, in_channel / 8, in_height, in_width, pooling_used, through_put); }; run(6, 1, 13, 1, 32 * 8, 20, 20); } #endif #if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, POOLING_INT8) { auto args = pooling::get_args(); for (auto&& arg : args) { Checker checker(handle()); auto rng = std::make_unique(-127, 127); checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get()); checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } TEST_F(X86, POOLING_INT8_RECORD) { auto args = pooling::get_args(); for (auto&& arg : args) { Checker checker(handle()); auto rng = std::make_unique(-127, 127); checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get()); checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } TEST_F(X86_MULTI_THREADS, POOLING_INT8) { auto args = pooling::get_args(); for (auto&& arg : args) { Checker checker(handle()); auto rng = std::make_unique(-127, 127); checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get()); checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}}); } } #endif } // namespace test } // namespace megdnn // vim: syntax=cpp.doxygen