/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #pragma once #ifndef __OPENCV_CUDEV_WARP_REDUCE_KEY_VAL_DETAIL_HPP__ #define __OPENCV_CUDEV_WARP_REDUCE_KEY_VAL_DETAIL_HPP__ #include "../../common.hpp" #include "../../util/tuple.hpp" namespace cv { namespace cudev { namespace warp_reduce_key_val_detail { // GetType template struct GetType; template struct GetType { typedef T type; }; template struct GetType { typedef T type; }; template struct GetType { typedef T type; }; // For template struct For { template __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) { get(smem)[tid] = get(data); For::loadToSmem(smem, data, tid); } template __device__ static void copy(const PointerTuple& svals, const ReferenceTuple& val, uint tid, uint delta) { get(svals)[tid] = get(val) = get(svals)[tid + delta]; For::copy(svals, val, tid, delta); } template __device__ static void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key, const ValPointerTuple& svals, const ValReferenceTuple& val, const CmpTuple& cmp, uint tid, uint delta) { typename GetType::type>::type reg = get(skeys)[tid + delta]; if (get(cmp)(reg, get(key))) { get(skeys)[tid] = get(key) = reg; get(svals)[tid] = get(val) = get(svals)[tid + delta]; } For::merge(skeys, key, svals, val, cmp, tid, delta); } }; template struct For { template __device__ __forceinline__ static void loadToSmem(const PointerTuple&, const ReferenceTuple&, uint) { } template __device__ __forceinline__ static void copy(const PointerTuple&, const ReferenceTuple&, uint, uint) { } template __device__ __forceinline__ static void merge(const KeyPointerTuple&, const KeyReferenceTuple&, const ValPointerTuple&, const ValReferenceTuple&, const CmpTuple&, uint, uint) { } }; // loadToSmem template __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) { smem[tid] = data; } template __device__ __forceinline__ void loadToSmem(const tuple& smem, const tuple& data, uint tid) { For<0, tuple_size >::value>::loadToSmem(smem, data, tid); } // copyVals template __device__ __forceinline__ void copyVals(volatile V* svals, V& val, uint tid, uint delta) { svals[tid] = val = svals[tid + delta]; } template __device__ __forceinline__ void copyVals(const tuple& svals, const tuple& val, uint tid, uint delta) { For<0, tuple_size >::value>::copy(svals, val, tid, delta); } // merge template __device__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, uint tid, uint delta) { K reg = skeys[tid + delta]; if (cmp(reg, key)) { skeys[tid] = key = reg; copyVals(svals, val, tid, delta); } } template __device__ void merge(volatile K* skeys, K& key, const tuple& svals, const tuple& val, const Cmp& cmp, uint tid, uint delta) { K reg = skeys[tid + delta]; if (cmp(reg, key)) { skeys[tid] = key = reg; copyVals(svals, val, tid, delta); } } template __device__ __forceinline__ void merge(const tuple& skeys, const tuple& key, const tuple& svals, const tuple& val, const tuple& cmp, uint tid, uint delta) { For<0, tuple_size >::value>::merge(skeys, key, svals, val, cmp, tid, delta); } // WarpReductor struct WarpReductor { template __device__ static void reduce(KP skeys, KR key, VP svals, VR val, uint tid, Cmp cmp) { loadToSmem(skeys, key, tid); loadToSmem(svals, val, tid); if (tid < 16) { merge(skeys, key, svals, val, cmp, tid, 16); merge(skeys, key, svals, val, cmp, tid, 8); merge(skeys, key, svals, val, cmp, tid, 4); merge(skeys, key, svals, val, cmp, tid, 2); merge(skeys, key, svals, val, cmp, tid, 1); } } }; } }} #endif