[package] authors = ["Zhenyuan Zhang "] categories = ["science", "text-processing"] description = "An implementation of the RWKV language model in pure WebGPU." edition = "2021" exclude = ["assets/", "crates/", "screenshots/"] homepage = "https://github.com/cryscan/web-rwkv" keywords = ["deep-learning", "language", "model", "rwkv"] license = "MIT OR Apache-2.0" name = "web-rwkv" repository = "https://github.com/cryscan/web-rwkv" version = "0.8.20" [dependencies] ahash = "0.8" anyhow = "1.0" bytemuck = { version = "1.19", features = ["extern_crate_alloc"] } derive-getters = "0.5" document-features = "0.2.8" flume = "0.11.0" futures = "0.3" gpp = "0.6.2" half = { version = "2.2", features = ["bytemuck", "serde"] } instant = { version = "0.1", features = ["inaccurate", "wasm-bindgen"] } itertools = "0.13" log = "0.4" regex = "1.11" rustc-hash = "2.0.0" safetensors = "0.4" serde = { version = "1.0.210", features = ["derive", "rc"] } serde_bytes = "0.11.14" serde_json = "1.0" thiserror = "2.0" tracing = { version = "0.1.40", optional = true } tracing-subscriber = { version = "0.3.18", optional = true } tracing-tracy = { version = "0.11.0", optional = true } trait-variant = "0.1" uid = "0.1" wasm-bindgen = "0.2" wgpu = "23.0.0" [dependencies.web-rwkv-derive] path = "crates/web-rwkv-derive" version = "0.2.5" [dependencies.tokio] default-features = false features = ["macros", "rt", "sync", "time"] version = "1.41" [dev-dependencies] cbor4ii = { version = "0.3.2", features = ["half-f16", "serde1"] } fastrand = "2.2" memmap2 = "0.9" tokio = { version = "1.37", features = ["full"] } # wgpu-profiler = "0.14.1" clap = { version = "4.5", features = ["derive"] } crossterm = "0.28" dialoguer = "0.11.0" pollster = "0.4.0" ratatui = { version = "0.29", features = ["all-widgets"] } simple_logger = { version = "5.0.0", features = ["stderr"] } [features] default = ["runtime", "subgroup-ops", "tokio-multi-thread", "vanilla"] native = ["runtime", "subgroup-ops", "tokio-multi-thread"] web = ["vanilla"] ## Enables `runtime` API, which essentially doubles the inference speed comparing to the old API. runtime = [] ## Enables subgroup operations in the kernels. Accelerates the inference on some device. subgroup-ops = [] ## Enables tokio's multi-threaded runtime. Doesn't work on web platforms. tokio-multi-thread = ["tokio/rt-multi-thread"] ## Enables performance tracing. trace = ["tracing", "tracing-subscriber", "tracing-tracy"] ## Enables `vanilla` API. vanilla = [] [[example]] name = "gen" required-features = ["vanilla"] [[example]] name = "chat" required-features = ["vanilla"] [[example]] name = "batch" required-features = ["vanilla"] [[example]] name = "inspector" required-features = ["vanilla"] [[example]] name = "serialization" required-features = ["vanilla"] [[example]] name = "rt-gen" required-features = ["runtime"] [[example]] name = "rt-chat" required-features = ["runtime"] [[example]] name = "rt-batch" required-features = ["runtime"]