[package]
authors = ["Zhenyuan Zhang <cryscan@umich.edu>"]
categories = ["science", "text-processing"]
description = "An implementation of the RWKV language model in pure WebGPU."
edition = "2021"
exclude = ["assets/", "crates/", "screenshots/"]
homepage = "https://github.com/cryscan/web-rwkv"
keywords = ["deep-learning", "language", "model", "rwkv"]
license = "MIT OR Apache-2.0"
name = "web-rwkv"
repository = "https://github.com/cryscan/web-rwkv"
version = "0.8.20"

[dependencies]
ahash = "0.8"
anyhow = "1.0"
bytemuck = { version = "1.19", features = ["extern_crate_alloc"] }
derive-getters = "0.5"
document-features = "0.2.8"
flume = "0.11.0"
futures = "0.3"
gpp = "0.6.2"
half = { version = "2.2", features = ["bytemuck", "serde"] }
instant = { version = "0.1", features = ["inaccurate", "wasm-bindgen"] }
itertools = "0.13"
log = "0.4"
regex = "1.11"
rustc-hash = "2.0.0"
safetensors = "0.4"
serde = { version = "1.0.210", features = ["derive", "rc"] }
serde_bytes = "0.11.14"
serde_json = "1.0"
thiserror = "2.0"
tracing = { version = "0.1.40", optional = true }
tracing-subscriber = { version = "0.3.18", optional = true }
tracing-tracy = { version = "0.11.0", optional = true }
trait-variant = "0.1"
uid = "0.1"
wasm-bindgen = "0.2"
wgpu = "23.0.0"

[dependencies.web-rwkv-derive]
path = "crates/web-rwkv-derive"
version = "0.2.5"

[dependencies.tokio]
default-features = false
features = ["macros", "rt", "sync", "time"]
version = "1.41"

[dev-dependencies]
cbor4ii = { version = "0.3.2", features = ["half-f16", "serde1"] }
fastrand = "2.2"
memmap2 = "0.9"
tokio = { version = "1.37", features = ["full"] }
# wgpu-profiler = "0.14.1"
clap = { version = "4.5", features = ["derive"] }
crossterm = "0.28"
dialoguer = "0.11.0"
pollster = "0.4.0"
ratatui = { version = "0.29", features = ["all-widgets"] }
simple_logger = { version = "5.0.0", features = ["stderr"] }

[features]
default = ["runtime", "subgroup-ops", "tokio-multi-thread", "vanilla"]
native = ["runtime", "subgroup-ops", "tokio-multi-thread"]
web = ["vanilla"]

## Enables `runtime` API, which essentially doubles the inference speed comparing to the old API.
runtime = []
## Enables subgroup operations in the kernels. Accelerates the inference on some device.
subgroup-ops = []
## Enables tokio's multi-threaded runtime. Doesn't work on web platforms.
tokio-multi-thread = ["tokio/rt-multi-thread"]
## Enables performance tracing.
trace = ["tracing", "tracing-subscriber", "tracing-tracy"]
## Enables `vanilla` API.
vanilla = []

[[example]]
name = "gen"
required-features = ["vanilla"]

[[example]]
name = "chat"
required-features = ["vanilla"]

[[example]]
name = "batch"
required-features = ["vanilla"]

[[example]]
name = "inspector"
required-features = ["vanilla"]

[[example]]
name = "serialization"
required-features = ["vanilla"]

[[example]]
name = "rt-gen"
required-features = ["runtime"]

[[example]]
name = "rt-chat"
required-features = ["runtime"]

[[example]]
name = "rt-batch"
required-features = ["runtime"]