[package] name = "llama_cpp" version = "0.3.2" description = "High-level bindings to llama.cpp with a focus on just being really, really easy to use" edition = "2021" authors = ["Dakota Thompson ", "Pedro Valente "] repository = "https://github.com/edgenai/llama_cpp-rs" license = "MIT OR Apache-2.0" readme = "../../README.md" publish = true [dependencies] derive_more = "0.99.17" futures = { workspace = true } llama_cpp_sys = { version = "^0.3.2", path = "../llama_cpp_sys", default-features = false } num_cpus = "1.16.0" thiserror = "1.0.57" tokio = { workspace = true, features = ["sync", "rt", "rt-multi-thread"] } tracing = "0.1.40" [features] default = ["compat", "native"] compat = ["llama_cpp_sys/compat"] # this feature modifies the symbols exposed by the generated libraries to avoid conflicts native = ["llama_cpp_sys/native", "avx", "avx2", "fma", "f16c", "accel"] avx = ["llama_cpp_sys/avx"] avx2 = ["llama_cpp_sys/avx2"] avx512 = ["llama_cpp_sys/avx512"] avx512_vmbi = ["llama_cpp_sys/avx512_vmbi"] avx512_vnni = ["llama_cpp_sys/avx512_vnni"] fma = ["llama_cpp_sys/fma"] f16c = ["llama_cpp_sys/f16c"] # implied when compiled using MSVC with avx2/avx512 accel = ["llama_cpp_sys/accel"] # Accelerate framework mpi = ["llama_cpp_sys/mpi"] cuda = ["llama_cpp_sys/cuda"] cuda_f16 = ["llama_cpp_sys/cuda_f16", "cuda"] cuda_dmmv = ["llama_cpp_sys/cuda_dmmv", "cuda"] # use dmmv instead of mmvq CUDA kernels cuda_mmq = ["llama_cpp_sys/cuda_mmq", "cuda"] # use mmq kernels instead of cuBLAS metal = ["llama_cpp_sys/metal"] blas = ["llama_cpp_sys/blas"] hipblas = ["llama_cpp_sys/hipblas"] clblast = ["llama_cpp_sys/clblast"] vulkan = ["llama_cpp_sys/vulkan"] sys_verbosity = [] # increase sys crate tracing log verbosity level [lib] doctest = false