/** * \file dnn/test/common/utils.h * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once #include "megdnn/basic_types.h" #include "megdnn/handle.h" #include "src/common/utils.h" #include #include #include #include #include #if MEGDNN_ENABLE_MULTI_THREADS #include #endif #define megcore_check(x) \ do { \ auto status = (x); \ if (status != megcoreSuccess) { \ std::cerr << "megcore_check error: " << megcoreGetErrorName(status) \ << std::endl; \ megdnn_trap(); \ } \ } while (0) namespace megdnn { namespace test { struct TaskExecutorConfig { //! Number of threads. size_t nr_thread; //! The core id to bind. The size of affinity_core_set should be equal to //! nr_thread. std::vector affinity_core_set; }; class CpuDispatchChecker final : MegcoreCPUDispatcher { class TaskExecutor { using Task = megcore::CPUDispatcher::Task; using MultiThreadingTask = megcore::CPUDispatcher::MultiThreadingTask; #if MEGDNN_ENABLE_MULTI_THREADS #if defined(WIN32) using thread_affinity_type = DWORD; #else // not WIN32 #if defined(__APPLE__) using thread_affinity_type = int; #else using thread_affinity_type = cpu_set_t; #endif #endif #endif public: TaskExecutor(TaskExecutorConfig* config = nullptr); ~TaskExecutor(); /*! * Sync all workers. */ void sync(); /*! * Number of threads in this thread pool, including the main thread. */ size_t nr_threads() const { return m_nr_threads; } void add_task(const MultiThreadingTask& task, size_t parallelism); void add_task(const Task& task); private: #if MEGDNN_ENABLE_MULTI_THREADS size_t m_all_task_iter = 0; std::atomic_int m_current_task_iter{0}; //! Indicate whether the thread should work, used for main thread sync std::vector m_workers_flag; //! Whether the main thread affinity has been set. bool m_main_thread_affinity = false; //! Stop the worker threads. bool m_stop{false}; MultiThreadingTask m_task; //! The cpuids to be bound. //! If the m_cpu_ids is empty, then none of the threads will be bound to //! cpus, else the size of m_cpu_ids should equal to m_nr_threads. std::vector m_cpu_ids; //! The previous affinity mask of the main thread. thread_affinity_type m_main_thread_prev_affinity_mask; std::vector m_workers; #endif //! Total number of threads, including main thread. size_t m_nr_threads = 1; }; //! track number of CpuDispatchChecker instances to avoid leaking class InstCounter { bool m_used = false; int m_cnt = 0, m_max_cnt = 0; public: ~InstCounter() { auto check = [this]() { ASSERT_NE(0, m_max_cnt) << "no kernel dispatched on CPU"; ASSERT_EQ(0, m_cnt) << "leaked CpuDispatchChecker object"; }; if (m_used) { check(); } } int& cnt() { m_used = true; m_max_cnt = std::max(m_cnt, m_max_cnt); return m_cnt; } }; static InstCounter sm_inst_counter; bool m_recursive_dispatch = false; #if MEGDNN_ENABLE_MULTI_THREADS std::atomic_size_t m_nr_call{0}; #else size_t m_nr_call = 0; #endif std::unique_ptr m_task_executor; CpuDispatchChecker(TaskExecutorConfig* config) { ++sm_inst_counter.cnt(); megdnn_assert(sm_inst_counter.cnt() < 10); m_task_executor = std::make_unique(config); } void dispatch(Task&& task) override { megdnn_assert(!m_recursive_dispatch); m_recursive_dispatch = true; ++m_nr_call; m_task_executor->add_task(std::move(task)); m_recursive_dispatch = false; } void dispatch(MultiThreadingTask&& task, size_t parallelism) override { megdnn_assert(!m_recursive_dispatch); m_recursive_dispatch = true; ++m_nr_call; m_task_executor->add_task(std::move(task), parallelism); m_recursive_dispatch = false; } size_t nr_threads() override { return m_task_executor->nr_threads(); } CpuDispatchChecker() { ++sm_inst_counter.cnt(); megdnn_assert(sm_inst_counter.cnt() < 10); } void sync() override {} public: ~CpuDispatchChecker() { if (!std::uncaught_exception()) { megdnn_assert(!m_recursive_dispatch); } else { if (m_recursive_dispatch) { fprintf(stderr, "CpuDispatchChecker: " "detected recursive dispatch\n"); } if (!m_nr_call) { fprintf(stderr, "CpuDispatchChecker: dispatch not called\n"); } } --sm_inst_counter.cnt(); } static std::unique_ptr make(TaskExecutorConfig* config) { return std::unique_ptr(new CpuDispatchChecker(config)); } }; std::unique_ptr create_cpu_handle( int debug_level, bool check_dispatch = true, TaskExecutorConfig* config = nullptr); std::unique_ptr create_cpu_handle_with_dispatcher( int debug_level, const std::shared_ptr& dispatcher); static inline dt_float32 diff(dt_float32 x, dt_float32 y) { auto numerator = x - y; auto denominator = std::max(std::max(std::abs(x), std::abs(y)), 1.f); return numerator / denominator; } static inline int diff(int x, int y) { return x - y; } static inline int diff(dt_quint8 x, dt_quint8 y) { return x.as_uint8() - y.as_uint8(); } static inline int diff(dt_qint32 x, dt_qint32 y) { return x.as_int32() - y.as_int32(); } static inline int diff(dt_qint16 x, dt_qint16 y) { return x.as_int16() - y.as_int16(); } static inline int diff(dt_qint8 x, dt_qint8 y) { return x.as_int8() - y.as_int8(); } static inline int diff(dt_qint4 x, dt_qint4 y) { return x.as_int8() - y.as_int8(); } static inline int diff(dt_quint4 x, dt_quint4 y) { return x.as_uint8() - y.as_uint8(); } inline TensorShape cvt_src_or_dst_nchw2nhwc(const TensorShape& shape) { megdnn_assert(shape.ndim == 4); auto N = shape[0], C = shape[1], H = shape[2], W = shape[3]; return TensorShape{N, H, W, C}; } inline TensorShape cvt_src_or_dst_ncdhw2ndhwc(const TensorShape& shape) { megdnn_assert(shape.ndim == 5); auto N = shape[0], C = shape[1], D = shape[2], H = shape[3], W = shape[4]; return TensorShape{N, D, H, W, C}; } inline TensorShape cvt_filter_nchw2nhwc(const TensorShape& shape) { if (shape.ndim == 4) { auto OC = shape[0], IC = shape[1], FH = shape[2], FW = shape[3]; return TensorShape{OC, FH, FW, IC}; } else { megdnn_assert(shape.ndim == 5); auto G = shape[0], OC = shape[1], IC = shape[2], FH = shape[3], FW = shape[4]; return TensorShape{G, OC, FH, FW, IC}; } } inline TensorShape cvt_filter_ncdhw2ndhwc(const TensorShape& shape) { if (shape.ndim == 5) { auto OC = shape[0], IC = shape[1], FD = shape[2], FH = shape[3], FW = shape[4]; return TensorShape{OC, FD, FH, FW, IC}; } else { megdnn_assert(shape.ndim == 6); auto G = shape[0], OC = shape[1], IC = shape[2], FD = shape[3], FH = shape[4], FW = shape[5]; return TensorShape{G, OC, FD, FH, FW, IC}; } } void megdnn_sync(Handle* handle); void* megdnn_malloc(Handle* handle, size_t size_in_bytes); void megdnn_free(Handle* handle, void* ptr); void megdnn_memcpy_D2H( Handle* handle, void* dst, const void* src, size_t size_in_bytes); void megdnn_memcpy_H2D( Handle* handle, void* dst, const void* src, size_t size_in_bytes); void megdnn_memcpy_D2D( Handle* handle, void* dst, const void* src, size_t size_in_bytes); //! default implementation for DynOutMallocPolicy class DynOutMallocPolicyImpl final : public DynOutMallocPolicy { Handle* m_handle; public: DynOutMallocPolicyImpl(Handle* handle) : m_handle{handle} {} TensorND alloc_output( size_t id, DType dtype, const TensorShape& shape, void* user_data) override; void* alloc_workspace(size_t sz, void* user_data) override; void free_workspace(void* ptr, void* user_data) override; /*! * \brief make a shared_ptr which would release output memory when * deleted * \param out output tensor allocated by alloc_output() */ std::shared_ptr make_output_refholder(const TensorND& out); }; //! replace ErrorHandler::on_megdnn_error class MegDNNError : public std::exception { std::string m_msg; public: MegDNNError(const std::string& msg) : m_msg{msg} {} const char* what() const noexcept { return m_msg.c_str(); } }; class TensorReshapeError : public MegDNNError { public: using MegDNNError::MegDNNError; }; size_t get_cpu_count(); static inline bool good_float(float val) { return std::isfinite(val); } static inline bool good_float(int) { return true; } static inline bool good_float(dt_qint8) { return true; } static inline bool good_float(dt_qint16) { return true; } static inline bool good_float(dt_quint8) { return true; } static inline bool good_float(dt_qint32) { return true; } static inline bool good_float(dt_qint4) { return true; } static inline bool good_float(dt_quint4) { return true; } // A hack for the (x+0) promote to int trick on dt_quint8. static inline int operator+(dt_quint8 lhs, int rhs) { megdnn_assert(rhs == 0, "unexpected rhs"); return lhs.as_uint8(); } static inline int operator+(dt_qint32 lhs, int rhs) { megdnn_assert(rhs == 0, "unexpected rhs"); return lhs.as_int32(); } static inline int operator+(dt_qint8 lhs, int rhs) { megdnn_assert(rhs == 0, "unexpected rhs"); return int8_t(lhs); } static inline int operator+(dt_qint16 lhs, int rhs) { megdnn_assert(rhs == 0, "unexpected rhs"); return lhs.as_int16(); } static inline int operator+(dt_quint4 lhs, int rhs) { megdnn_assert(rhs == 0, "unexpected rhs"); return lhs.as_uint8(); } static inline int operator+(dt_qint4 lhs, int rhs) { megdnn_assert(rhs == 0, "unexpected rhs"); return lhs.as_int8(); } } // namespace test static inline bool operator==(const TensorLayout& a, const TensorLayout& b) { return a.eq_layout(b); } static inline std::ostream& operator<<(std::ostream& ostr, const TensorLayout& layout) { return ostr << layout.to_string(); } //! change the image2d_pitch_alignment of naive handle in this scope class NaivePitchAlignmentScope { size_t m_orig_val, m_new_val; public: NaivePitchAlignmentScope(size_t alignment); ~NaivePitchAlignmentScope(); }; } // namespace megdnn // vim: syntax=cpp.doxygen