cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. project("ggml" C CXX) include(CheckIncludeFileCXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(GGML_STANDALONE ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # configure project version # TODO else() set(GGML_STANDALONE OFF) endif() if (EMSCRIPTEN) set(BUILD_SHARED_LIBS_DEFAULT OFF) option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON) else() if (MINGW) set(BUILD_SHARED_LIBS_DEFAULT OFF) else() set(BUILD_SHARED_LIBS_DEFAULT ON) endif() endif() option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) # # option list # # TODO: mark all options as advanced when not GGML_STANDALONE if (APPLE) set(GGML_METAL_DEFAULT ON) set(GGML_BLAS_DEFAULT ON) set(GGML_BLAS_VENDOR_DEFAULT "Apple") else() set(GGML_METAL_DEFAULT OFF) set(GGML_BLAS_DEFAULT OFF) set(GGML_BLAS_VENDOR_DEFAULT "Generic") endif() if (CMAKE_CROSSCOMPILING) set(GGML_NATIVE_DEFAULT OFF) else() set(GGML_NATIVE_DEFAULT ON) endif() # defaults if (NOT GGML_LLAMAFILE_DEFAULT) set(GGML_LLAMAFILE_DEFAULT OFF) endif() if (NOT GGML_CUDA_GRAPHS_DEFAULT) set(GGML_CUDA_GRAPHS_DEFAULT OFF) endif() # general option(GGML_STATIC "ggml: static link libraries" OFF) option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT}) option(GGML_LTO "ggml: enable link time optimization" OFF) option(GGML_CCACHE "ggml: use ccache if available" ON) # debug option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON) option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF) option(GGML_GPROF "ggml: enable gprof" OFF) # build option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF) # sanitizers option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF) option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF) option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF) # instruction set specific if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT) set(INS_ENB OFF) else() set(INS_ENB ON) endif() option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF) option(GGML_AVX "ggml: enable AVX" ${INS_ENB}) option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB}) option(GGML_AVX512 "ggml: enable AVX512" OFF) option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF) option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF) option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF) option(GGML_FMA "ggml: enable FMA" ${INS_ENB}) if (NOT MSVC) option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512 endif() option(GGML_LASX "ggml: enable lasx" ON) option(GGML_LSX "ggml: enable lsx" ON) option(GGML_SVE "ggml: enable SVE" OFF) if (WIN32) set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version") endif() # ggml core set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism") # 3rd party libs / backends option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON) option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING "ggml: BLAS library vendor") option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT}) option(GGML_CUDA "ggml: use CUDA" OFF) option(GGML_MUSA "ggml: use MUSA" OFF) option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF) option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF) option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF) set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels") set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels") option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF) set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING "ggml: iters./thread per block for Q2_K/Q6_K") set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING "ggml: max. batch size for using peer access") option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF) option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) option(GGML_HIPBLAS "ggml: use hipBLAS" OFF) option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF) option(GGML_VULKAN "ggml: use Vulkan" OFF) option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF) option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF) option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF) option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF) option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF) option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) option(GGML_KOMPUTE "ggml: use Kompute" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL}) set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING "ggml: metal minimum macOS version") set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)") option(GGML_OPENMP "ggml: use OpenMP" ON) option(GGML_RPC "ggml: use RPC" OFF) option(GGML_SYCL "ggml: use SYCL" OFF) option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF) set (GGML_SYCL_TARGET "INTEL" CACHE STRING "ggml: sycl target device") # extra artifacts option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # bitnet methods option(GGML_BITNET_ARM_TL1 "ggml: use Bitnet tl1(arm)" OFF) option(GGML_BITNET_X86_TL2 "ggml: use Bitnet tl2(x86)" OFF) # # dependencies # if (GGML_BITNET_ARM_TL1) add_compile_definitions(GGML_BITNET_ARM_TL1) endif() if (GGML_BITNET_X86_TL2) add_compile_definitions(GGML_BITNET_X86_TL2) endif() set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED true) if (GGML_SYCL) set(CMAKE_CXX_STANDARD 17) else() set(CMAKE_CXX_STANDARD 11) endif() set(CMAKE_CXX_STANDARD_REQUIRED true) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) # # build the library # add_subdirectory(src) # # tests and examples # if (GGML_BUILD_TESTS) enable_testing() add_subdirectory(tests) endif () if (GGML_BUILD_EXAMPLES) add_subdirectory(examples) endif () # # install # include(GNUInstallDirs) include(CMakePackageConfigHelpers) # all public headers set(GGML_PUBLIC_HEADERS include/ggml.h include/ggml-alloc.h include/ggml-backend.h include/ggml-blas.h include/ggml-cann.h include/ggml-cuda.h include/ggml.h include/ggml-kompute.h include/ggml-metal.h include/ggml-rpc.h include/ggml-sycl.h include/ggml-vulkan.h include/ggml-bitnet.h) set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") #if (GGML_METAL) # set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") #endif() install(TARGETS ggml PUBLIC_HEADER) if (BUILD_SHARED_LIBS) install(TARGETS ggml LIBRARY) endif() if (GGML_METAL) install( FILES src/ggml-metal.metal PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ DESTINATION ${CMAKE_INSTALL_BINDIR}) if (NOT GGML_METAL_EMBED_LIBRARY) install( FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib DESTINATION ${CMAKE_INSTALL_BINDIR} ) endif() endif() if (GGML_STANDALONE) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc DESTINATION share/pkgconfig) endif()