/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #pragma once #ifndef __OPENCV_CUDEV_WARP_REDUCE_HPP__ #define __OPENCV_CUDEV_WARP_REDUCE_HPP__ #include "../common.hpp" #include "../util/tuple.hpp" #include "detail/reduce.hpp" #include "detail/reduce_key_val.hpp" namespace cv { namespace cudev { //! @addtogroup cudev //! @{ // warpReduce template __device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op) { warp_reduce_detail::WarpReductor::template reduce(smem, val, tid, op); } template __device__ __forceinline__ void warpReduce(const tuple& smem, const tuple& val, uint tid, const tuple& op) { warp_reduce_detail::WarpReductor::template reduce< const tuple&, const tuple&, const tuple&>(smem, val, tid, op); } // warpReduceKeyVal template __device__ __forceinline__ void warpReduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, uint tid, const Cmp& cmp) { warp_reduce_key_val_detail::WarpReductor::template reduce(skeys, key, svals, val, tid, cmp); } template __device__ __forceinline__ void warpReduceKeyVal(volatile K* skeys, K& key, const tuple& svals, const tuple& val, uint tid, const Cmp& cmp) { warp_reduce_key_val_detail::WarpReductor::template reduce&, const tuple&, const Cmp&>(skeys, key, svals, val, tid, cmp); } template __device__ __forceinline__ void warpReduceKeyVal(const tuple& skeys, const tuple& key, const tuple& svals, const tuple& val, uint tid, const tuple& cmp) { warp_reduce_key_val_detail::WarpReductor::template reduce< const tuple&, const tuple&, const tuple&, const tuple&, const tuple& >(skeys, key, svals, val, tid, cmp); } // smem_tuple template __device__ __forceinline__ tuple smem_tuple(T0* t0) { return make_tuple((volatile T0*) t0); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1) { return make_tuple((volatile T0*) t0, (volatile T1*) t1); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8); } template __device__ __forceinline__ tuple smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9) { return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9); } //! @} }} #endif