# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set(CUTLASS_EXAMPLES_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common) add_custom_target(cutlass_examples) add_custom_target(test_examples) function(cutlass_example_add_executable NAME) set(options) set(oneValueArgs DISABLE_TESTS) set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS) cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if (NOT DEFINED __DISABLE_TESTS) set(__DISABLE_TESTS OFF) endif() cutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS} BATCH_SOURCES OFF) add_dependencies(cutlass_examples ${NAME}) target_link_libraries( ${NAME} PRIVATE CUTLASS cutlass_tools_util_includes $<$:nvidia::cublas> cuda ) target_include_directories( ${NAME} PRIVATE ${CUTLASS_EXAMPLES_COMMON_SOURCE_DIR} ${CUTLASS_EXAMPLES_UTILS_DIR} ) install( TARGETS ${NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) cutlass_add_executable_tests( test_examples_${NAME} ${NAME} DEPENDS ${__DEPENDS} DEPENDEES test_examples ${__DEPENDEES} TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS} DISABLE_EXECUTABLE_INSTALL_RULE DISABLE_TESTS ${__DISABLE_TESTS} ) endfunction() foreach(EXAMPLE 00_basic_gemm 01_cutlass_utilities 02_dump_reg_shmem 03_visualize_layout 04_tile_iterator 05_batched_gemm 06_splitK_gemm 07_volta_tensorop_gemm 08_turing_tensorop_gemm 09_turing_tensorop_conv2dfprop 10_planar_complex 11_planar_complex_array 12_gemm_bias_relu 13_two_tensor_op_fusion 14_ampere_tf32_tensorop_gemm 15_ampere_sparse_tensorop_gemm 16_ampere_tensorop_conv2dfprop 17_fprop_per_channel_bias 18_ampere_fp64_tensorop_affine2_gemm 19_tensorop_canonical 20_simt_canonical 21_quaternion_gemm 22_quaternion_conv 23_ampere_gemm_operand_reduction_fusion 24_gemm_grouped 25_ampere_fprop_mainloop_fusion 26_ampere_wgrad_mainloop_fusion 27_ampere_3xtf32_fast_accurate_tensorop_gemm 28_ampere_3xtf32_fast_accurate_tensorop_fprop 29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm 30_wgrad_split_k 31_basic_syrk 32_basic_trmm 33_ampere_3xtf32_tensorop_symm 34_transposed_conv2d 35_gemm_softmax 36_gather_scatter_fusion 37_gemm_layernorm_gemm_fusion 38_syr2k_grouped cute 39_gemm_permute 41_fused_multi_head_attention 42_ampere_tensorop_group_conv 43_ell_block_sparse_gemm 45_dual_gemm 46_depthwise_simt_conv2dfprop 47_ampere_gemm_universal_streamk 48_hopper_warp_specialized_gemm 49_hopper_gemm_with_collective_builder 50_hopper_gemm_with_epilogue_swizzle 51_hopper_gett 52_hopper_gather_scatter_fusion 53_hopper_gemm_permute 54_hopper_fp8_warp_specialized_gemm 55_hopper_mixed_dtype_gemm 56_hopper_ptr_array_batched_gemm 57_hopper_grouped_gemm ) add_subdirectory(${EXAMPLE}) endforeach()