/* * Copyright 2014-2023 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ /* * cudnn_ops_infer : cuDNN's basic definitions and inference operations. */ #if !defined(CUDNN_OPS_INFER_H_) #define CUDNN_OPS_INFER_H_ #include #include #include "cudnn_version.h" /* These version numbers are autogenerated, do not edit manually. */ #define CUDNN_OPS_INFER_MAJOR 8 #define CUDNN_OPS_INFER_MINOR 9 #define CUDNN_OPS_INFER_PATCH 5 #if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) || \ (CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL) #error Version mismatch in cuDNN OPS INFER!!! #endif #ifndef CUDNNWINAPI #ifdef _WIN32 #define CUDNNWINAPI __stdcall #else #define CUDNNWINAPI #endif #endif /* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */ #if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__)) /* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */ #define CUDNN_DEPRECATED __attribute__((deprecated)) #elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER) /* Microsoft Visual C++ */ #define CUDNN_DEPRECATED __declspec(deprecated) #elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L) /* C++14 compilers */ #define CUDNN_DEPRECATED [[deprecated]] #else /* No support for the deprecated attribute */ #define CUDNN_DEPRECATED #endif #if defined(__cplusplus) extern "C" { #endif struct cudnnContext; typedef struct cudnnContext *cudnnHandle_t; size_t CUDNNWINAPI cudnnGetVersion(void); size_t CUDNNWINAPI cudnnGetMaxDeviceVersion(void); /* Returns CUDA Runtime version statically linked against cudnn */ size_t CUDNNWINAPI cudnnGetCudartVersion(void); /* * CUDNN return codes */ typedef enum { CUDNN_STATUS_SUCCESS = 0, CUDNN_STATUS_NOT_INITIALIZED = 1, CUDNN_STATUS_ALLOC_FAILED = 2, CUDNN_STATUS_BAD_PARAM = 3, CUDNN_STATUS_INTERNAL_ERROR = 4, CUDNN_STATUS_INVALID_VALUE = 5, CUDNN_STATUS_ARCH_MISMATCH = 6, CUDNN_STATUS_MAPPING_ERROR = 7, CUDNN_STATUS_EXECUTION_FAILED = 8, CUDNN_STATUS_NOT_SUPPORTED = 9, CUDNN_STATUS_LICENSE_ERROR = 10, CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11, CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12, CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13, CUDNN_STATUS_VERSION_MISMATCH = 14, } cudnnStatus_t; /* human-readable error messages */ const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status); /* Forward definition in this version only */ typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t; typedef enum { CUDNN_ERRQUERY_RAWCODE = 0, CUDNN_ERRQUERY_NONBLOCKING = 1, CUDNN_ERRQUERY_BLOCKING = 2, } cudnnErrQueryMode_t; cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag); #ifndef __LIBRARY_TYPES_H__ typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType; #endif cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, int *value); cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle); cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle); cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId); cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId); /* Data structures to represent Image/Filter and the Neural Network Layer */ typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t; typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t; typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t; typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t; typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t; typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t; typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t; typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t; typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t; typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t; /* * CUDNN data type */ typedef enum { CUDNN_DATA_FLOAT = 0, CUDNN_DATA_DOUBLE = 1, CUDNN_DATA_HALF = 2, CUDNN_DATA_INT8 = 3, CUDNN_DATA_INT32 = 4, CUDNN_DATA_INT8x4 = 5, CUDNN_DATA_UINT8 = 6, CUDNN_DATA_UINT8x4 = 7, CUDNN_DATA_INT8x32 = 8, CUDNN_DATA_BFLOAT16 = 9, CUDNN_DATA_INT64 = 10, CUDNN_DATA_BOOLEAN = 11, CUDNN_DATA_FP8_E4M3 = 12, CUDNN_DATA_FP8_E5M2 = 13, CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14, } cudnnDataType_t; /* * CUDNN math type */ typedef enum { CUDNN_DEFAULT_MATH = 0, CUDNN_TENSOR_OP_MATH = 1, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2, CUDNN_FMA_MATH = 3, } cudnnMathType_t; /* * CUDNN propagate Nan */ typedef enum { CUDNN_NOT_PROPAGATE_NAN = 0, CUDNN_PROPAGATE_NAN = 1, } cudnnNanPropagation_t; /* * CUDNN Determinism */ typedef enum { CUDNN_NON_DETERMINISTIC = 0, CUDNN_DETERMINISTIC = 1, } cudnnDeterminism_t; /* Maximum supported number of tensor dimensions */ #define CUDNN_DIM_MAX 8 /* Create an instance of a generic Tensor descriptor */ cudnnStatus_t CUDNNWINAPI cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc); typedef enum { CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */ CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/ CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */ } cudnnTensorFormat_t; cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, /* image data type */ int n, /* number of inputs (batch size) */ int c, /* number of input feature maps */ int h, /* height of input section */ int w); /* width of input section */ cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, /* image data type */ int n, /* number of inputs (batch size) */ int c, /* number of input feature maps */ int h, /* height of input section */ int w, /* width of input section */ int nStride, int cStride, int hStride, int wStride); cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t *dataType, /* image data type */ int *n, /* number of inputs (batch size) */ int *c, /* number of input feature maps */ int *h, /* height of input section */ int *w, /* width of input section */ int *nStride, int *cStride, int *hStride, int *wStride); cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const int strideA[]); cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int nbDims, const int dimA[]); cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]); cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size); /* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride 1)Example of all images in row major order one batch of features after the other (with an optional padding on row) input_stride : c x h x h_stride feature_stride : h x h_stride h_stride : >= w ( h_stride = w if no padding) w_stride : 1 2)Example of all images in row major with features maps interleaved input_stride : c x h x h_stride feature_stride : 1 h_stride : w x c w_stride : c 3)Example of all images in column major order one batch of features after the other (with optional padding on column) input_stride : c x w x w_stride feature_stride : w x w_stride h_stride : 1 w_stride : >= h */ /* Destroy an instance of Tensor4d descriptor */ cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc); /* Fold/unfold transforms */ typedef enum { CUDNN_TRANSFORM_FOLD = 0U, CUDNN_TRANSFORM_UNFOLD = 1U, } cudnnFoldingDirection_t; /** Create a destination descriptor for cudnnTransformTensor */ cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc, const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, size_t *destSizeInBytes); /** Create an empty tensor transform descriptor */ cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc); /** Initialize a previously created tensor transform descriptor. */ cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], const int32_t padAfterA[], const uint32_t foldA[], const cudnnFoldingDirection_t direction); /** * Retrieves the values stored in a previously initialized tensor transform * descriptor. */ cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[], uint32_t foldA[], cudnnFoldingDirection_t *direction); /** * Destroys a previously created tensor transform descriptor. */ cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc); /* Tensor layout conversion helper (y = alpha * x + beta * y) */ cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void *alpha, const cudnnTensorDescriptor_t srcDesc, const void *srcData, const void *beta, const cudnnTensorDescriptor_t destDesc, void *destData); /* Tensor Bias addition : C = alpha * A + beta * C */ cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, const cudnnTensorDescriptor_t cDesc, void *C); /* * CUDNN OpTensor op type */ typedef enum { CUDNN_OP_TENSOR_ADD = 0, CUDNN_OP_TENSOR_MUL = 1, CUDNN_OP_TENSOR_MIN = 2, CUDNN_OP_TENSOR_MAX = 3, CUDNN_OP_TENSOR_SQRT = 4, CUDNN_OP_TENSOR_NOT = 5, } cudnnOpTensorOp_t; cudnnStatus_t CUDNNWINAPI cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc); cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt); cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt); cudnnStatus_t CUDNNWINAPI cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc); /* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */ /* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */ cudnnStatus_t CUDNNWINAPI cudnnOpTensor(cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, const void *beta, const cudnnTensorDescriptor_t cDesc, void *C); /* * CUDNN ReduceTensor op type */ typedef enum { CUDNN_REDUCE_TENSOR_ADD = 0, CUDNN_REDUCE_TENSOR_MUL = 1, CUDNN_REDUCE_TENSOR_MIN = 2, CUDNN_REDUCE_TENSOR_MAX = 3, CUDNN_REDUCE_TENSOR_AMAX = 4, CUDNN_REDUCE_TENSOR_AVG = 5, CUDNN_REDUCE_TENSOR_NORM1 = 6, CUDNN_REDUCE_TENSOR_NORM2 = 7, CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8, } cudnnReduceTensorOp_t; /* * CUDNN ReduceTensor indices type */ typedef enum { CUDNN_REDUCE_TENSOR_NO_INDICES = 0, CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1, } cudnnReduceTensorIndices_t; /* * CUDNN tensor indices type size (all unsigned) * Currently not supported, default is 32 bit unsigned. */ typedef enum { CUDNN_32BIT_INDICES = 0, CUDNN_64BIT_INDICES = 1, CUDNN_16BIT_INDICES = 2, CUDNN_8BIT_INDICES = 3, } cudnnIndicesType_t; cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc); cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, cudnnNanPropagation_t reduceTensorNanOpt, cudnnReduceTensorIndices_t reduceTensorIndices, cudnnIndicesType_t reduceTensorIndicesType); cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t *reduceTensorOp, cudnnDataType_t *reduceTensorCompType, cudnnNanPropagation_t *reduceTensorNanOpt, cudnnReduceTensorIndices_t *reduceTensorIndices, cudnnIndicesType_t *reduceTensorIndicesType); cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc); /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and * output tensors */ cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t *sizeInBytes); /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output * tensors */ cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t *sizeInBytes); /* Tensor operation : C = reduce op( alpha * A ) + beta * C */ /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */ /* The indices space is ignored for reduce ops other than min or max. */ cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, void *indices, size_t indicesSizeInBytes, void *workspace, size_t workspaceSizeInBytes, const void *alpha, const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, const cudnnTensorDescriptor_t cDesc, void *C); /* Set all values of a tensor to a given value : y[i] = value[0] */ cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr); /* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */ cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha); /* Create an instance of FilterStruct */ cudnnStatus_t CUDNNWINAPI cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc); cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, /* image data type */ cudnnTensorFormat_t format, int k, /* number of output feature maps */ int c, /* number of input feature maps */ int h, /* height of each input filter */ int w); /* width of each input filter */ cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc, cudnnDataType_t *dataType, /* image data type */ cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ int *c, /* number of input feature maps */ int *h, /* height of each input filter */ int *w); /* width of each input filter */ cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, /* image data type */ cudnnTensorFormat_t format, int nbDims, const int filterDimA[]); cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, cudnnDataType_t *dataType, /* image data type */ cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]); cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size); cudnnStatus_t CUDNNWINAPI cudnnTransformFilter(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void *alpha, const cudnnFilterDescriptor_t srcDesc, const void *srcData, const void *beta, const cudnnFilterDescriptor_t destDesc, void *destData); cudnnStatus_t CUDNNWINAPI cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc); /* * softmax algorithm */ typedef enum { CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */ CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */ CUDNN_SOFTMAX_LOG = 2 } cudnnSoftmaxAlgorithm_t; typedef enum { CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */ CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */ } cudnnSoftmaxMode_t; /* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform forward softmax */ cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); /* * pooling mode */ typedef enum { CUDNN_POOLING_MAX = 0, CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */ CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */ CUDNN_POOLING_MAX_DETERMINISTIC = 3 } cudnnPoolingMode_t; /* Create an instance of pooling descriptor */ cudnnStatus_t CUDNNWINAPI cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc); cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride); cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, int *windowWidth, int *verticalPadding, int *horizontalPadding, int *verticalStride, int *horizontalStride); cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, const int windowDimA[], const int paddingA[], const int strideA[]); cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, int *nbDims, int windowDimA[], int paddingA[], int strideA[]); cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int nbDims, int outputTensorDimA[]); cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int *n, int *c, int *h, int *w); /* Destroy an instance of pooling descriptor */ cudnnStatus_t CUDNNWINAPI cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc); /* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform forward pooling */ cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); /* * activation mode */ typedef enum { CUDNN_ACTIVATION_SIGMOID = 0, CUDNN_ACTIVATION_RELU = 1, CUDNN_ACTIVATION_TANH = 2, CUDNN_ACTIVATION_CLIPPED_RELU = 3, CUDNN_ACTIVATION_ELU = 4, CUDNN_ACTIVATION_IDENTITY = 5, CUDNN_ACTIVATION_SWISH = 6 } cudnnActivationMode_t; /* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */ cudnnStatus_t CUDNNWINAPI cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc); cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, cudnnNanPropagation_t reluNanOpt, double coef); /* ceiling for clipped RELU, alpha for ELU */ cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t *mode, cudnnNanPropagation_t *reluNanOpt, double *coef); /* ceiling for clipped RELU, alpha for ELU */ cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta); cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta); cudnnStatus_t CUDNNWINAPI cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc); /* Function to perform forward activation */ cudnnStatus_t CUDNNWINAPI cudnnActivationForward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); /* * Create an instance of LRN (Local Response Normalization) descriptor * Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper */ cudnnStatus_t CUDNNWINAPI cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc); #define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */ #define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */ #define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */ #define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */ /* LRN layer mode */ typedef enum { CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */ } cudnnLRNMode_t; /* * Uses a window [center-lookBehind, center+lookAhead], where * lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1. * Values of double parameters cast to tensor data type. */ cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK); /* * Retrieve the settings currently stored in an LRN layer descriptor * Any of the provided pointers can be NULL (no corresponding value will be returned) */ cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK); /* Destroy an instance of LRN descriptor */ cudnnStatus_t CUDNNWINAPI cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc); /* LRN functions: output = alpha * normalize(x) + beta * old_y */ /* LRN cross-channel forward computation. Double parameters cast to tensor data type */ cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); typedef enum { CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0, } cudnnDivNormMode_t; /* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */ cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void *alpha, const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ const void *x, const void *means, /* if NULL, means are assumed to be zero */ void *temp, void *temp2, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); typedef enum { /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */ CUDNN_BATCHNORM_PER_ACTIVATION = 0, /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */ CUDNN_BATCHNORM_SPATIAL = 1, /* * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors). * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values */ CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2, } cudnnBatchNormMode_t; #define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */ /* * Derives a tensor descriptor from layer data descriptor for BatchNormalization * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions. */ cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, cudnnBatchNormMode_t mode); typedef enum { CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */ CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */ CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */ } cudnnBatchNormOps_t; /* * Performs Batch Normalization during Inference: * y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k] * with bnScale, bnBias, runningMean, runningInvVariance tensors indexed * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining * above for notes on function arguments. */ cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, const void *bnBias, const void *estimatedMean, const void *estimatedVariance, double epsilon); typedef enum { /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */ CUDNN_NORM_PER_ACTIVATION = 0, /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */ CUDNN_NORM_PER_CHANNEL = 1, } cudnnNormMode_t; typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t; /* * Derives a tensor descriptor from layer data descriptor for Normalization * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions. */ cudnnStatus_t CUDNNWINAPI cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc, cudnnTensorDescriptor_t derivedNormMeanVarDesc, const cudnnTensorDescriptor_t xDesc, cudnnNormMode_t mode, int groupCnt); /* Place hold for future work, should be set to 1 now*/ typedef enum { CUDNN_NORM_OPS_NORM = 0, /* do normalization only */ CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */ CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */ } cudnnNormOps_t; /* * Performs Normalization during Inference: * y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k] * with normScale, normBias, runningMean, runningInvVariance tensors indexed * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining * above for notes on function arguments. */ cudnnStatus_t CUDNNWINAPI cudnnNormalizationForwardInference(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ const cudnnTensorDescriptor_t normScaleBiasDesc, const void *normScale, const void *normBias, const cudnnTensorDescriptor_t normMeanVarDesc, const void *estimatedMean, const void *estimatedVariance, const cudnnTensorDescriptor_t zDesc, const void *z, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ double epsilon, int groupCnt); /* Place hold for future work*/ /* APIs for spatial transformer network*/ typedef enum { CUDNN_SAMPLER_BILINEAR = 0, } cudnnSamplerType_t; cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc); cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, cudnnDataType_t dataType, const int nbDims, const int dimA[]); cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc); cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void *theta, void *grid); cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, void *y); typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t; cudnnStatus_t CUDNNWINAPI cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc); cudnnStatus_t CUDNNWINAPI cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc); /*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */ cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes); /*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */ cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes); cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed); /* Restores the dropout descriptor to a previously saved-off state */ cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed); cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, void **states, unsigned long long *seed); cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t xdesc, const void *x, const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, size_t reserveSpaceSizeInBytes); /* TODO: remove */ typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t; typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t; /* TODO: move these enums out to the appropriate submodule */ typedef enum { CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1, CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2, CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3, CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4, CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5, CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6, CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7, CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8 } cudnnConvolutionFwdAlgo_t; typedef enum { CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7 } cudnnConvolutionBwdFilterAlgo_t; typedef enum { CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */ CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1, CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2, CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3, CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4, CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5, CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6 } cudnnConvolutionBwdDataAlgo_t; typedef enum { CUDNN_RNN_ALGO_STANDARD = 0, CUDNN_RNN_ALGO_PERSIST_STATIC = 1, CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2, CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3, CUDNN_RNN_ALGO_COUNT = 4, } cudnnRNNAlgo_t; typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t; /* TODO: remove */ typedef struct cudnnAlgorithmUnionStruct { union Algorithm { cudnnConvolutionFwdAlgo_t convFwdAlgo; cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo; cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo; cudnnRNNAlgo_t RNNAlgo; cudnnCTCLossAlgo_t CTCLossAlgo; } algo; } cudnnAlgorithm_t; CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, cudnnStatus_t status, float time, size_t memory); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time, size_t *memory); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, void *algoSpace, size_t algoSpaceSizeInBytes); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm(cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes, cudnnAlgorithmDescriptor_t algoDesc); typedef enum { CUDNN_SEV_FATAL = 0, CUDNN_SEV_ERROR = 1, CUDNN_SEV_WARNING = 2, CUDNN_SEV_INFO = 3, } cudnnSeverity_t; /* Message masks to be used with cudnnSetCallback() */ #define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR) #define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING) #define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO) /* struct containing useful informaiton for each API call */ typedef struct cudnnDebugStruct { unsigned cudnn_version; cudnnStatus_t cudnnStatus; unsigned time_sec; /* epoch time in seconds */ unsigned time_usec; /* microseconds part of epoch time */ unsigned time_delta; /* time since start in seconds */ cudnnHandle_t handle; /* cudnn handle */ cudaStream_t stream; /* cuda stream ID */ unsigned long long pid; /* process ID */ unsigned long long tid; /* thread ID */ int cudaDeviceId; /* CUDA device ID */ int reserved[15]; /* reserved for future use */ } cudnnDebug_t; typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg); cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr); cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr); /* * \brief Cross-library version checker. * This function is implemented differently in each sub-library. Each sublib * checks whether its own version matches that of its dependencies. * \returns CUDNN_STATUS_SUCCESS if the version check passes, * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent. */ cudnnStatus_t CUDNNWINAPI cudnnOpsInferVersionCheck(void); #if defined(__cplusplus) } #endif #endif /* CUDNN_OPS_INFER_H_ */