[package] authors = ["Anthony MOI ", "Nicolas Patry "] edition = "2018" name = "tokenizers" version = "0.21.0" homepage = "https://github.com/huggingface/tokenizers" repository = "https://github.com/huggingface/tokenizers" documentation = "https://docs.rs/tokenizers/" license = "Apache-2.0" keywords = ["tokenizer", "NLP", "huggingface", "BPE", "WordPiece"] readme = "./README.md" description = """ Provides an implementation of today's most used tokenizers, with a focus on performances and versatility. """ exclude = [ "rust-toolchain", "target/*", "Cargo.lock", "benches/*.txt", "benches/*.json", "data/*" ] [lib] name = "tokenizers" path = "src/lib.rs" bench = false [[bench]] name = "bpe_benchmark" harness = false [[bench]] name = "bert_benchmark" harness = false [[bench]] name = "layout_benchmark" harness = false [[bench]] name = "unigram_benchmark" harness = false [[bench]] name = "llama3" required-features = ["http"] harness = false [dependencies] lazy_static = "1.4" rand = "0.8" onig = { version = "6.4", default-features = false, optional = true } regex = "1.10" regex-syntax = "0.8" rayon = "1.10" rayon-cond = "0.3" serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" unicode-normalization-alignments = "0.1" unicode_categories = "0.1" unicode-segmentation = "1.11" indicatif = {version = "0.17", optional = true} itertools = "0.12" log = "0.4" derive_builder = "0.20" spm_precompiled = "0.1.3" hf-hub = { version = "0.3.2", optional = true } aho-corasick = "1.1" paste = "1.0.14" macro_rules_attribute = "0.2.0" thiserror = "1.0.49" fancy-regex = { version = "0.13", optional = true} getrandom = { version = "0.2.10" } esaxx-rs = { version = "0.1.10", default-features = false, features=[]} monostate = "0.1.12" [features] default = ["progressbar", "onig", "esaxx_fast"] esaxx_fast = ["esaxx-rs/cpp"] progressbar = ["indicatif"] http = ["hf-hub"] unstable_wasm = ["fancy-regex", "getrandom/js"] [dev-dependencies] criterion = "0.5" tempfile = "3.10" assert_approx_eq = "1.1" tracing = "0.1" tracing-subscriber = "0.3.18" [profile.release] lto = "fat" [[example]] name = "encode_batch" required-features = ["http"]