// // NVIDIA_COPYRIGHT_BEGIN // // Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved. // // NVIDIA CORPORATION and its licensors retain all intellectual property // and proprietary rights in and to this software, related documentation // and any modifications thereto. Any use, reproduction, disclosure or // distribution of this software and related documentation without an express // license agreement from NVIDIA CORPORATION is strictly prohibited. // // NVIDIA_COPYRIGHT_END // #ifndef __NVRTC_H__ #define __NVRTC_H__ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ #include /*************************************************************************//** * * \defgroup error Error Handling * * NVRTC defines the following enumeration type and function for API call * error handling. * ****************************************************************************/ /** * \ingroup error * \brief The enumerated type nvrtcResult defines API call result codes. * NVRTC API functions return nvrtcResult to indicate the call * result. */ typedef enum { NVRTC_SUCCESS = 0, NVRTC_ERROR_OUT_OF_MEMORY = 1, NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, NVRTC_ERROR_INVALID_INPUT = 3, NVRTC_ERROR_INVALID_PROGRAM = 4, NVRTC_ERROR_INVALID_OPTION = 5, NVRTC_ERROR_COMPILATION = 6, NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, NVRTC_ERROR_INTERNAL_ERROR = 11, NVRTC_ERROR_TIME_FILE_WRITE_FAILED = 12 } nvrtcResult; /** * \ingroup error * \brief nvrtcGetErrorString is a helper function that returns a string * describing the given nvrtcResult code, e.g., NVRTC_SUCCESS to * \c "NVRTC_SUCCESS". * For unrecognized enumeration values, it returns * \c "NVRTC_ERROR unknown". * * \param [in] result CUDA Runtime Compilation API result code. * \return Message string for the given #nvrtcResult code. */ const char *nvrtcGetErrorString(nvrtcResult result); /*************************************************************************//** * * \defgroup query General Information Query * * NVRTC defines the following function for general information query. * ****************************************************************************/ /** * \ingroup query * \brief nvrtcVersion sets the output parameters \p major and \p minor * with the CUDA Runtime Compilation version number. * * \param [out] major CUDA Runtime Compilation major version number. * \param [out] minor CUDA Runtime Compilation minor version number. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * */ nvrtcResult nvrtcVersion(int *major, int *minor); /** * \ingroup query * \brief nvrtcGetNumSupportedArchs sets the output parameter \p numArchs * with the number of architectures supported by NVRTC. This can * then be used to pass an array to ::nvrtcGetSupportedArchs to * get the supported architectures. * * \param [out] numArchs number of supported architectures. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * * see ::nvrtcGetSupportedArchs */ nvrtcResult nvrtcGetNumSupportedArchs(int* numArchs); /** * \ingroup query * \brief nvrtcGetSupportedArchs populates the array passed via the output parameter * \p supportedArchs with the architectures supported by NVRTC. The array is * sorted in the ascending order. The size of the array to be passed can be * determined using ::nvrtcGetNumSupportedArchs. * * \param [out] supportedArchs sorted array of supported architectures. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * * see ::nvrtcGetNumSupportedArchs */ nvrtcResult nvrtcGetSupportedArchs(int* supportedArchs); /*************************************************************************//** * * \defgroup compilation Compilation * * NVRTC defines the following type and functions for actual compilation. * ****************************************************************************/ /** * \ingroup compilation * \brief nvrtcProgram is the unit of compilation, and an opaque handle for * a program. * * To compile a CUDA program string, an instance of nvrtcProgram must be * created first with ::nvrtcCreateProgram, then compiled with * ::nvrtcCompileProgram. */ typedef struct _nvrtcProgram *nvrtcProgram; /** * \ingroup compilation * \brief nvrtcCreateProgram creates an instance of nvrtcProgram with the * given input parameters, and sets the output parameter \p prog with * it. * * \param [out] prog CUDA Runtime Compilation program. * \param [in] src CUDA program source. * \param [in] name CUDA program name.\n * \p name can be \c NULL; \c "default_program" is * used when \p name is \c NULL or "". * \param [in] numHeaders Number of headers used.\n * \p numHeaders must be greater than or equal to 0. * \param [in] headers Sources of the headers.\n * \p headers can be \c NULL when \p numHeaders is * 0. * \param [in] includeNames Name of each header by which they can be * included in the CUDA program source.\n * \p includeNames can be \c NULL when \p numHeaders * is 0. These headers must be included with the exact * names specified here. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink * - \link #nvrtcResult NVRTC_ERROR_PROGRAM_CREATION_FAILURE \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcDestroyProgram */ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); /** * \ingroup compilation * \brief nvrtcDestroyProgram destroys the given program. * * \param [in] prog CUDA Runtime Compilation program. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcCreateProgram */ nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog); /** * \ingroup compilation * \brief nvrtcCompileProgram compiles the given program. * * \param [in] prog CUDA Runtime Compilation program. * \param [in] numOptions Number of compiler options passed. * \param [in] options Compiler options in the form of C string array.\n * \p options can be \c NULL when \p numOptions is 0. * * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_OPTION \endlink * - \link #nvrtcResult NVRTC_ERROR_COMPILATION \endlink * - \link #nvrtcResult NVRTC_ERROR_BUILTIN_OPERATION_FAILURE \endlink * - \link #nvrtcResult NVRTC_ERROR_TIME_FILE_WRITE_FAILED \endlink * * It supports compile options listed in \ref options. */ nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char * const *options); /** * \ingroup compilation * \brief nvrtcGetPTXSize sets the value of \p ptxSizeRet with the size of the PTX * generated by the previous compilation of \p prog (including the * trailing \c NULL). * * \param [in] prog CUDA Runtime Compilation program. * \param [out] ptxSizeRet Size of the generated PTX (including the trailing * \c NULL). * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetPTX */ nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet); /** * \ingroup compilation * \brief nvrtcGetPTX stores the PTX generated by the previous compilation * of \p prog in the memory pointed by \p ptx. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] ptx Compiled result. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetPTXSize */ nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx); /** * \ingroup compilation * \brief nvrtcGetCUBINSize sets the value of \p cubinSizeRet with the size of the cubin * generated by the previous compilation of \p prog. The value of * cubinSizeRet is set to 0 if the value specified to \c -arch is a * virtual architecture instead of an actual architecture. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] cubinSizeRet Size of the generated cubin. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetCUBIN */ nvrtcResult nvrtcGetCUBINSize(nvrtcProgram prog, size_t *cubinSizeRet); /** * \ingroup compilation * \brief nvrtcGetCUBIN stores the cubin generated by the previous compilation * of \p prog in the memory pointed by \p cubin. No cubin is available * if the value specified to \c -arch is a virtual architecture instead * of an actual architecture. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] cubin Compiled and assembled result. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetCUBINSize */ nvrtcResult nvrtcGetCUBIN(nvrtcProgram prog, char *cubin); #if defined(_WIN32) # define __DEPRECATED__(msg) __declspec(deprecated(msg)) #elif (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5 && !defined(__clang__)))) # define __DEPRECATED__(msg) __attribute__((deprecated)) #elif (defined(__GNUC__)) # define __DEPRECATED__(msg) __attribute__((deprecated(msg))) #else # define __DEPRECATED__(msg) #endif /** * \ingroup compilation * \brief * DEPRECATION NOTICE: This function will be removed in a future release. Please use * nvrtcGetLTOIRSize (and nvrtcGetLTOIR) instead. */ __DEPRECATED__("This function will be removed in a future release. Please use nvrtcGetLTOIRSize instead") nvrtcResult nvrtcGetNVVMSize(nvrtcProgram prog, size_t *nvvmSizeRet); /** * \ingroup compilation * \brief * DEPRECATION NOTICE: This function will be removed in a future release. Please use * nvrtcGetLTOIR (and nvrtcGetLTOIRSize) instead. */ __DEPRECATED__("This function will be removed in a future release. Please use nvrtcGetLTOIR instead") nvrtcResult nvrtcGetNVVM(nvrtcProgram prog, char *nvvm); #undef __DEPRECATED__ /** * \ingroup compilation * \brief nvrtcGetLTOIRSize sets the value of \p LTOIRSizeRet with the size of the LTO IR * generated by the previous compilation of \p prog. The value of * LTOIRSizeRet is set to 0 if the program was not compiled with * \c -dlto. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] LTOIRSizeRet Size of the generated LTO IR. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetLTOIR */ nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *LTOIRSizeRet); /** * \ingroup compilation * \brief nvrtcGetLTOIR stores the LTO IR generated by the previous compilation * of \p prog in the memory pointed by \p LTOIR. No LTO IR is available * if the program was compiled without \c -dlto. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] LTOIR Compiled result. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetLTOIRSize */ nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *LTOIR); /** * \ingroup compilation * \brief nvrtcGetOptiXIRSize sets the value of \p optixirSizeRet with the size of the OptiX IR * generated by the previous compilation of \p prog. The value of * nvrtcGetOptiXIRSize is set to 0 if the program was compiled with * options incompatible with OptiX IR generation. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] optixirSizeRet Size of the generated LTO IR. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetOptiXIR */ nvrtcResult nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t *optixirSizeRet); /** * \ingroup compilation * \brief nvrtcGetOptiXIR stores the OptiX IR generated by the previous compilation * of \p prog in the memory pointed by \p optixir. No OptiX IR is available * if the program was compiled with options incompatible with OptiX IR generation. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] Optix IR Compiled result. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetOptiXIRSize */ nvrtcResult nvrtcGetOptiXIR(nvrtcProgram prog, char *optixir); /** * \ingroup compilation * \brief nvrtcGetProgramLogSize sets \p logSizeRet with the size of the * log generated by the previous compilation of \p prog (including the * trailing \c NULL). * * Note that compilation log may be generated with warnings and informative * messages, even when the compilation of \p prog succeeds. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] logSizeRet Size of the compilation log * (including the trailing \c NULL). * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetProgramLog */ nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet); /** * \ingroup compilation * \brief nvrtcGetProgramLog stores the log generated by the previous * compilation of \p prog in the memory pointed by \p log. * * \param [in] prog CUDA Runtime Compilation program. * \param [out] log Compilation log. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * * \see ::nvrtcGetProgramLogSize */ nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log); /** * \ingroup compilation * \brief nvrtcAddNameExpression notes the given name expression * denoting the address of a __global__ function * or __device__/__constant__ variable. * * The identical name expression string must be provided on a subsequent * call to nvrtcGetLoweredName to extract the lowered name. * \param [in] prog CUDA Runtime Compilation program. * \param [in] name_expression constant expression denoting the address of * a __global__ function or __device__/__constant__ variable. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION \endlink * * \see ::nvrtcGetLoweredName */ nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog, const char * const name_expression); /** * \ingroup compilation * \brief nvrtcGetLoweredName extracts the lowered (mangled) name * for a __global__ function or __device__/__constant__ variable, * and updates *lowered_name to point to it. The memory containing * the name is released when the NVRTC program is destroyed by * nvrtcDestroyProgram. * The identical name expression must have been previously * provided to nvrtcAddNameExpression. * * \param [in] prog CUDA Runtime Compilation program. * \param [in] name_expression constant expression denoting the address of * a __global__ function or __device__/__constant__ variable. * \param [out] lowered_name initialized by the function to point to a * C string containing the lowered (mangled) * name corresponding to the provided name expression. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink * - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink * - \link #nvrtcResult NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID \endlink * * \see ::nvrtcAddNameExpression */ nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog, const char *const name_expression, const char** lowered_name); /** * \defgroup options Supported Compile Options * * NVRTC supports the compile options below. * Option names with two preceding dashs (\c --) are long option names and * option names with one preceding dash (\c -) are short option names. * Short option names can be used instead of long option names. * When a compile option takes an argument, an assignment operator (\c =) * is used to separate the compile option argument from the compile option * name, e.g., \c "--gpu-architecture=compute_60". * Alternatively, the compile option name and the argument can be specified in * separate strings without an assignment operator, .e.g, * \c "--gpu-architecture" \c "compute_60". * Single-character short option names, such as \c -D, \c -U, and \c -I, do * not require an assignment operator, and the compile option name and the * argument can be present in the same string with or without spaces between * them. * For instance, \c "-D=", \c "-D", and \c "-D " are all * supported. * * The valid compiler options are: * * - Compilation targets * - \c --gpu-architecture=\ (\c -arch)\n * Specify the name of the class of GPU architectures for which the * input must be compiled.\n * - Valid \s: * - \c compute_50 * - \c compute_52 * - \c compute_53 * - \c compute_60 * - \c compute_61 * - \c compute_62 * - \c compute_70 * - \c compute_72 * - \c compute_75 * - \c compute_80 * - \c compute_87 * - \c compute_89 * - \c compute_90 * - \c compute_90a * - \c sm_50 * - \c sm_52 * - \c sm_53 * - \c sm_60 * - \c sm_61 * - \c sm_62 * - \c sm_70 * - \c sm_72 * - \c sm_75 * - \c sm_80 * - \c sm_87 * - \c sm_89 * - \c sm_90 * - \c sm_90a * - Default: \c compute_52 * - Separate compilation / whole-program compilation * - \c --device-c (\c -dc)\n * Generate relocatable code that can be linked with other relocatable * device code. It is equivalent to --relocatable-device-code=true. * - \c --device-w (\c -dw)\n * Generate non-relocatable code. It is equivalent to * \c --relocatable-device-code=false. * - \c --relocatable-device-code={true|false} (\c -rdc)\n * Enable (disable) the generation of relocatable device code. * - Default: \c false * - \c --extensible-whole-program (\c -ewp)\n * Do extensible whole program compilation of device code. * - Default: \c false * - Debugging support * - \c --device-debug (\c -G)\n * Generate debug information. If --dopt is not specified, * then turns off all optimizations. * - \c --generate-line-info (\c -lineinfo)\n * Generate line-number information. * - Code generation * - \c --dopt on (\c -dopt)\n * - \c --dopt=on \n * Enable device code optimization. When specified along with '-G', enables * limited debug information generation for optimized device code (currently, * only line number information). * When '-G' is not specified, '-dopt=on' is implicit. * - \c --ptxas-options \ (\c -Xptxas)\n * - \c --ptxas-options=\ \n * Specify options directly to ptxas, the PTX optimizing assembler. * - \c --maxrregcount=\ (\c -maxrregcount)\n * Specify the maximum amount of registers that GPU functions can use. * Until a function-specific limit, a higher value will generally * increase the performance of individual GPU threads that execute this * function. However, because thread registers are allocated from a * global register pool on each GPU, a higher value of this option will * also reduce the maximum thread block size, thereby reducing the amount * of thread parallelism. Hence, a good maxrregcount value is the result * of a trade-off. If this option is not specified, then no maximum is * assumed. Value less than the minimum registers required by ABI will * be bumped up by the compiler to ABI minimum limit. * - \c --ftz={true|false} (\c -ftz)\n * When performing single-precision floating-point operations, flush * denormal values to zero or preserve denormal values. * \c --use_fast_math implies \c --ftz=true. * - Default: \c false * - \c --prec-sqrt={true|false} (\c -prec-sqrt)\n * For single-precision floating-point square root, use IEEE * round-to-nearest mode or use a faster approximation. * \c --use_fast_math implies \c --prec-sqrt=false. * - Default: \c true * - \c --prec-div={true|false} (\c -prec-div)\n * For single-precision floating-point division and reciprocals, use IEEE * round-to-nearest mode or use a faster approximation. * \c --use_fast_math implies \c --prec-div=false. * - Default: \c true * - \c --fmad={true|false} (\c -fmad)\n * Enables (disables) the contraction of floating-point multiplies and * adds/subtracts into floating-point multiply-add operations (FMAD, * FFMA, or DFMA). \c --use_fast_math implies \c --fmad=true. * - Default: \c true * - \c --use_fast_math (\c -use_fast_math)\n * Make use of fast math operations. * \c --use_fast_math implies \c --ftz=true \c --prec-div=false * \c --prec-sqrt=false \c --fmad=true. * - \c --extra-device-vectorization (\c -extra-device-vectorization)\n * Enables more aggressive device code vectorization in the NVVM optimizer. * - \c --modify-stack-limit={true|false} (\c -modify-stack-limit)\n * On Linux, during compilation, use \c setrlimit() to increase stack size * to maximum allowed. The limit is reset to the previous value at the * end of compilation. * Note: \c setrlimit() changes the value for the entire process. * - Default: \c true * - \c --dlink-time-opt (\c -dlto)\n * Generate intermediate code for later link-time optimization. * It implies \c -rdc=true. * Note: when this option is used the nvrtcGetLTOIR API should be used, * as PTX or Cubin will not be generated. * - \c --gen-opt-lto (\c -gen-opt-lto)\n * Run the optimizer passes before generating the LTO IR. * - \c --optix-ir (\c -optix-ir)\n * Generate OptiX IR. The Optix IR is only intended for consumption by OptiX * through appropriate APIs. This feature is not supported with * link-time-optimization (\c -dlto)\n. * Note: when this option is used the nvrtcGetOptiX API should be used, * as PTX or Cubin will not be generated. * - Preprocessing * - \c --define-macro=\ (\c -D)\n * \c \ can be either \c \ or \c \. * - \c \ \n * Predefine \c \ as a macro with definition \c 1. * - \c \=\ \n * The contents of \c \ are tokenized and preprocessed * as if they appeared during translation phase three in a \c \#define * directive. In particular, the definition will be truncated by * embedded new line characters. * - \c --undefine-macro=\ (\c -U)\n * Cancel any previous definition of \c \. * - \c --include-path=\ (\c -I)\n * Add the directory \c \ to the list of directories to be * searched for headers. These paths are searched after the list of * headers given to ::nvrtcCreateProgram. * - \c --pre-include=\ (\c -include)\n * Preinclude \c \ during preprocessing. * - \c --no-source-include (\c -no-source-include) * The preprocessor by default adds the directory of each input sources * to the include path. This option disables this feature and only * considers the path specified explicitly. * - Language Dialect * - \c --std={c++03|c++11|c++14|c++17|c++20} * (\c -std={c++11|c++14|c++17|c++20})\n * Set language dialect to C++03, C++11, C++14, C++17 or C++20 * - Default: \c c++17 * - \c --builtin-move-forward={true|false} (\c -builtin-move-forward)\n * Provide builtin definitions of \c std::move and \c std::forward, * when C++11 or later language dialect is selected. * - Default: \c true * - \c --builtin-initializer-list={true|false} * (\c -builtin-initializer-list)\n * Provide builtin definitions of \c std::initializer_list class and * member functions when C++11 or later language dialect is selected. * - Default: \c true * - Misc. * - \c --disable-warnings (\c -w)\n * Inhibit all warning messages. * - \c --restrict (\c -restrict)\n * Programmer assertion that all kernel pointer parameters are restrict * pointers. * - \c --device-as-default-execution-space * (\c -default-device)\n * Treat entities with no execution space annotation as \c __device__ * entities. * - \c --device-int128 (\c -device-int128)\n * Allow the \c __int128 type in device code. Also causes the macro \c __CUDACC_RTC_INT128__ * to be defined. * - \c --optimization-info=\ (\c -opt-info)\n * Provide optimization reports for the specified kind of optimization. * The following kind tags are supported: * - \c inline : emit a remark when a function is inlined. * - \c --display-error-number (\c -err-no)\n * Display diagnostic number for warning messages. (Default) * - \c --no-display-error-number (\c -no-err-no)\n * Disables the display of a diagnostic number for warning messages. * - \c --diag-error=,... (\c -diag-error)\n * Emit error for specified diagnostic message number(s). Message numbers can be separated by comma. * - \c --diag-suppress=,... (\c -diag-suppress)\n * Suppress specified diagnostic message number(s). Message numbers can be separated by comma. * - \c --diag-warn=,... (\c -diag-warn)\n * Emit warning for specified diagnostic message number(s). Message numbers can be separated by comma. * - \c --brief-diagnostics={true|false} (\c -brief-diag)\n * This option disables or enables showing source line and column info * in a diagnostic. * The --brief-diagnostics=true will not show the source line and column info. * - Default: \c false * - \c --time= (\c -time)\n * Generate a comma separated value table with the time taken by each compilation * phase, and append it at the end of the file given as the option argument. * If the file does not exist, the column headings are generated in the first row * of the table. If the file name is '-', the timing data is written to the compilation log. * - \c --split-compile= (\c -split-compile=)\n * [Experimental] Perform compiler optimizations in parallel. * Split compilation attempts to reduce compile time by enabling the compiler to run certain * optimization passes concurrently. This option accepts a numerical value that specifies the * maximum number of threads the compiler can use. One can also allow the compiler to use the maximum * threads available on the system by setting --split-compile=0. * Setting --split-compile=1 will cause this option to be ignored. * */ #ifdef __cplusplus } #endif /* __cplusplus */ /* The utility function 'nvrtcGetTypeName' is not available by default. Define the macro 'NVRTC_GET_TYPE_NAME' to a non-zero value to make it available. */ #if NVRTC_GET_TYPE_NAME || __DOXYGEN_ONLY__ #if NVRTC_USE_CXXABI || __clang__ || __GNUC__ || __DOXYGEN_ONLY__ #include #include #elif defined(_WIN32) #include #include #endif /* NVRTC_USE_CXXABI || __clang__ || __GNUC__ */ #include #include template struct __nvrtcGetTypeName_helper_t { }; /*************************************************************************//** * * \defgroup hosthelper Host Helper * * NVRTC defines the following functions for easier interaction with host code. * ****************************************************************************/ /** * \ingroup hosthelper * \brief nvrtcGetTypeName stores the source level name of a type in the given * std::string location. * * This function is only provided when the macro NVRTC_GET_TYPE_NAME is * defined with a non-zero value. It uses abi::__cxa_demangle or UnDecorateSymbolName * function calls to extract the type name, when using gcc/clang or cl.exe compilers, * respectively. If the name extraction fails, it will return NVRTC_INTERNAL_ERROR, * otherwise *result is initialized with the extracted name. * * Windows-specific notes: * - nvrtcGetTypeName() is not multi-thread safe because it calls UnDecorateSymbolName(), * which is not multi-thread safe. * - The returned string may contain Microsoft-specific keywords such as __ptr64 and __cdecl. * * \param [in] tinfo: reference to object of type std::type_info for a given type. * \param [in] result: pointer to std::string in which to store the type name. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INTERNAL_ERROR \endlink * */ inline nvrtcResult nvrtcGetTypeName(const std::type_info &tinfo, std::string *result) { #if USE_CXXABI || __clang__ || __GNUC__ const char *name = tinfo.name(); int status; char *undecorated_name = abi::__cxa_demangle(name, 0, 0, &status); if (status == 0) { *result = undecorated_name; free(undecorated_name); return NVRTC_SUCCESS; } #elif defined(_WIN32) const char *name = tinfo.raw_name(); if (!name || *name != '.') { return NVRTC_ERROR_INTERNAL_ERROR; } char undecorated_name[4096]; //name+1 skips over the '.' prefix if(UnDecorateSymbolName(name+1, undecorated_name, sizeof(undecorated_name) / sizeof(*undecorated_name), //note: doesn't seem to work correctly without UNDNAME_NO_ARGUMENTS. UNDNAME_NO_ARGUMENTS | UNDNAME_NAME_ONLY ) ) { *result = undecorated_name; return NVRTC_SUCCESS; } #endif /* USE_CXXABI || __clang__ || __GNUC__ */ return NVRTC_ERROR_INTERNAL_ERROR; } /** * \ingroup hosthelper * \brief nvrtcGetTypeName stores the source level name of the template type argument * T in the given std::string location. * * This function is only provided when the macro NVRTC_GET_TYPE_NAME is * defined with a non-zero value. It uses abi::__cxa_demangle or UnDecorateSymbolName * function calls to extract the type name, when using gcc/clang or cl.exe compilers, * respectively. If the name extraction fails, it will return NVRTC_INTERNAL_ERROR, * otherwise *result is initialized with the extracted name. * * Windows-specific notes: * - nvrtcGetTypeName() is not multi-thread safe because it calls UnDecorateSymbolName(), * which is not multi-thread safe. * - The returned string may contain Microsoft-specific keywords such as __ptr64 and __cdecl. * * \param [in] result: pointer to std::string in which to store the type name. * \return * - \link #nvrtcResult NVRTC_SUCCESS \endlink * - \link #nvrtcResult NVRTC_ERROR_INTERNAL_ERROR \endlink * */ template nvrtcResult nvrtcGetTypeName(std::string *result) { nvrtcResult res = nvrtcGetTypeName(typeid(__nvrtcGetTypeName_helper_t), result); if (res != NVRTC_SUCCESS) return res; std::string repr = *result; std::size_t idx = repr.find("__nvrtcGetTypeName_helper_t"); idx = (idx != std::string::npos) ? repr.find("<", idx) : idx; std::size_t last_idx = repr.find_last_of('>'); if (idx == std::string::npos || last_idx == std::string::npos) { return NVRTC_ERROR_INTERNAL_ERROR; } ++idx; *result = repr.substr(idx, last_idx - idx); return NVRTC_SUCCESS; } #endif /* NVRTC_GET_TYPE_NAME */ #endif /* __NVRTC_H__ */