// ------------------------------------------------------------- // cuDPP -- CUDA Data Parallel Primitives library // ------------------------------------------------------------- // $Revision: 5289 $ // $Date: 2010-11-23 13:04:43 -0700 (Tue, 23 Nov 2010) $ // ------------------------------------------------------------- // This source code is distributed under the terms of license.txt in // the root directory of this source distribution. // ------------------------------------------------------------- /** * @file * cudpp_util.h * * @brief C++ utility functions and classes used internally to cuDPP */ #ifndef __CUDPP_UTIL_H__ #define __CUDPP_UTIL_H__ #ifdef WIN32 #include #endif #include #include #include #include #if (CUDA_VERSION >= 3000) #define LAUNCH_BOUNDS(x) __launch_bounds__((x)) #define LAUNCH_BOUNDS_MINBLOCKs(x, y) __launch_bounds__((x),(y)) #else #define LAUNCH_BOUNDS(x) #define LAUNCH_BOUNDS_MINBLOCKS(x, y) #endif /** @brief Determine if \a n is a power of two. * @param n Value to be checked to see if it is a power of two * @returns True if \a n is a power of two, false otherwise */ inline bool isPowerOfTwo(int n) { return ((n&(n-1))==0) ; } /** @brief Determine if an integer \a n is a multiple of an integer \a f. * @param n Multiple * @param f Factor * @returns True if \a n is a multiple of \a f, false otherwise */ inline bool isMultiple(int n, int f) { if (isPowerOfTwo(f)) return ((n&(f-1))==0); else return (n%f==0); } /** @brief Compute the smallest power of two larger than \a n. * @param n Input value * @returns The smallest power f two larger than \a n */ inline int ceilPow2(int n) { double log2n = log2((double)n); if (isPowerOfTwo(n)) return n; else return 1 << (int)ceil(log2n); } /** @brief Compute the largest power of two smaller than \a n. * @param n Input value * @returns The largest power of two smaller than \a n. */ inline int floorPow2(int n) { #ifdef WIN32 // method 2 return 1 << (int)_logb((float)n); #else // method 3 int exp; frexp((float)n, &exp); return 1 << (exp - 1); #endif } /** @brief Returns the maximum value for type \a T. * * Implemented using template specialization on \a T. */ template __host__ __device__ inline T getMax() { return 0; } /** @brief Returns the minimum value for type \a T. * * Implemented using template specialization on \a T. */ template __host__ __device__ inline T getMin() { return 0; } // type specializations for the above // getMax template <> __host__ __device__ inline int getMax() { return INT_MAX; } template <> __host__ __device__ inline unsigned int getMax() { return INT_MAX; } template <> __host__ __device__ inline float getMax() { return FLT_MAX; } template <> __host__ __device__ inline char getMax() { return (char)INT_MAX; } template <> __host__ __device__ inline unsigned char getMax() { return (unsigned char)INT_MAX; } // getMin template <> __host__ __device__ inline int getMin() { return INT_MIN; } template <> __host__ __device__ inline unsigned int getMin() { return 0; } template <> __host__ __device__ inline float getMin() { return -FLT_MAX; } template <> __host__ __device__ inline char getMin() { return (char)INT_MIN; } template <> __host__ __device__ inline unsigned char getMin() { return (unsigned char)0; } /** @brief Returns the maximum of three values. * @param a First value. * @param b Second value. * @param c Third value. * @returns The maximum of \a a, \a b and \a c. */ template inline int max3(T a, T b, T c) { return (a > b) ? ((a > c)? a : c) : ((b > c) ? b : c); } /** @brief Utility template struct for generating small vector types from scalar types * * Given a base scalar type (\c int, \c float, etc.) and a vector length (1 through 4) as * template parameters, this struct defines a vector type (\c float3, \c int4, etc.) of the * specified length and base type. For example: * \code * template * __device__ void myKernel(T *data) * { * typeToVector::Result myVec4; // create a vec4 of type T * myVec4 = (typeToVector::Result*)data[0]; // load first element of data as a vec4 * } * \endcode * * This functionality is implemented using template specialization. Currently specializations * for int, float, and unsigned int vectors of lengths 2-4 are defined. Note that this results * in types being generated at compile time -- there is no runtime cost. typeToVector is used by * the optimized scan \c __device__ functions in scan_cta.cu. */ template struct typeToVector { typedef T Result; }; template<> struct typeToVector { typedef int4 Result; }; template<> struct typeToVector { typedef uint4 Result; }; template<> struct typeToVector { typedef float4 Result; }; template<> struct typeToVector { typedef int3 Result; }; template<> struct typeToVector { typedef uint3 Result; }; template<> struct typeToVector { typedef float3 Result; }; template<> struct typeToVector { typedef int2 Result; }; template<> struct typeToVector { typedef uint2 Result; }; template<> struct typeToVector { typedef float2 Result; }; /** @brief Templatized operator class used by scan and segmented scan * * This Operator class is used to allow generic support of binary * associative operators in scan. It defines two member functions, * op() and identity(), that are used in place of + and 0 (for * example) in the scan and segmented scan code. Because this is * template code, all decisions in the code are made at compile * time, resulting in optimal operator code. Currently the operators * CUDPP_ADD, CUDPP_MULTIPLY, CUDPP_MIN, and CUDPP_MAX are supported. * Operator is implemented using template specialization for the * types \c int, \c unsigned int, and \c float. */ template class Operator { public: /** Applies the operator to operands \a a and \a b. * @param a First operand * @param b Second operand * @returns a OP b, where OP is defined by ::CUDPPOperator \a oper. */ static __device__ T op(const T a, const T b) { switch (oper) { case CUDPP_ADD: return a + b; case CUDPP_MULTIPLY: return a * b; case CUDPP_MIN: return min(a, b); case CUDPP_MAX: return max(a, b); } } /** Returns the identity element defined for type \a T */ static __device__ T identity() { return 0; } }; // specializations for different types template class Operator { public: static __device__ int op(const int a, const int b) { switch (oper) { default: case CUDPP_ADD: return a + b; case CUDPP_MULTIPLY: return a * b; case CUDPP_MIN: return min(a, b); case CUDPP_MAX: return max(a, b); } } static __device__ int identity() { switch (oper) { default: case CUDPP_ADD: return 0; case CUDPP_MULTIPLY: return 1; case CUDPP_MIN: return INT_MAX; case CUDPP_MAX: return INT_MIN; } } }; template class Operator { public: static __device__ unsigned int op(const unsigned int a, const unsigned int b) { switch (oper) { default: case CUDPP_ADD: return a + b; case CUDPP_MULTIPLY: return a * b; case CUDPP_MIN: return min(a, b); case CUDPP_MAX: return max(a, b); } } static __device__ unsigned int identity() { switch (oper) { default: case CUDPP_ADD: return 0; case CUDPP_MULTIPLY: return 1; case CUDPP_MIN: return UINT_MAX; case CUDPP_MAX: return 0; } } }; template class Operator { public: static __device__ float op(const float a, const float b) { switch (oper) { default: case CUDPP_ADD: return a + b; case CUDPP_MULTIPLY: return a * b; case CUDPP_MIN: return min(a, b); case CUDPP_MAX: return max(a, b); } } static __device__ float identity() { switch (oper) { default: case CUDPP_ADD: return 0.0f; case CUDPP_MULTIPLY: return 1.0f; case CUDPP_MIN: return FLT_MAX; case CUDPP_MAX: return -FLT_MAX; } } }; #endif // __CUDPP_UTIL_H__ // Leave this at the end of the file // Local Variables: // mode:c++ // c-file-style: "NVIDIA" // End: