# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "tokenizers" version = "0.20.3" authors = [ "Anthony MOI ", "Nicolas Patry ", ] build = false exclude = [ "rust-toolchain", "target/*", "Cargo.lock", "benches/*.txt", "benches/*.json", "data/*", ] autobins = false autoexamples = false autotests = false autobenches = false description = """ Provides an implementation of today's most used tokenizers, with a focus on performances and versatility. """ homepage = "https://github.com/huggingface/tokenizers" documentation = "https://docs.rs/tokenizers/" readme = "README.md" keywords = [ "tokenizer", "NLP", "huggingface", "BPE", "WordPiece", ] license = "Apache-2.0" repository = "https://github.com/huggingface/tokenizers" [profile.release] lto = "fat" [lib] name = "tokenizers" path = "src/lib.rs" bench = false [[example]] name = "encode_batch" path = "examples/encode_batch.rs" required-features = ["http"] [[example]] name = "serialization" path = "examples/serialization.rs" [[test]] name = "added_tokens" path = "tests/added_tokens.rs" [[test]] name = "documentation" path = "tests/documentation.rs" [[test]] name = "from_pretrained" path = "tests/from_pretrained.rs" [[test]] name = "offsets" path = "tests/offsets.rs" [[test]] name = "serialization" path = "tests/serialization.rs" [[test]] name = "training" path = "tests/training.rs" [[test]] name = "unigram" path = "tests/unigram.rs" [[bench]] name = "bert_benchmark" path = "benches/bert_benchmark.rs" harness = false [[bench]] name = "bpe_benchmark" path = "benches/bpe_benchmark.rs" harness = false [[bench]] name = "layout_benchmark" path = "benches/layout_benchmark.rs" harness = false [[bench]] name = "llama3" path = "benches/llama3.rs" harness = false required-features = ["http"] [[bench]] name = "unigram_benchmark" path = "benches/unigram_benchmark.rs" harness = false [dependencies.aho-corasick] version = "1.1" [dependencies.derive_builder] version = "0.20" [dependencies.esaxx-rs] version = "0.1.10" features = [] default-features = false [dependencies.fancy-regex] version = "0.13" optional = true [dependencies.getrandom] version = "0.2.10" [dependencies.hf-hub] version = "0.3.2" optional = true [dependencies.indicatif] version = "0.17" optional = true [dependencies.itertools] version = "0.12" [dependencies.lazy_static] version = "1.4" [dependencies.log] version = "0.4" [dependencies.macro_rules_attribute] version = "0.2.0" [dependencies.monostate] version = "0.1.12" [dependencies.onig] version = "6.4" optional = true default-features = false [dependencies.paste] version = "1.0.14" [dependencies.rand] version = "0.8" [dependencies.rayon] version = "1.10" [dependencies.rayon-cond] version = "0.3" [dependencies.regex] version = "1.10" [dependencies.regex-syntax] version = "0.8" [dependencies.serde] version = "1.0" features = ["derive"] [dependencies.serde_json] version = "1.0" [dependencies.spm_precompiled] version = "0.1.3" [dependencies.thiserror] version = "1.0.49" [dependencies.unicode-normalization-alignments] version = "0.1" [dependencies.unicode-segmentation] version = "1.11" [dependencies.unicode_categories] version = "0.1" [dev-dependencies.assert_approx_eq] version = "1.1" [dev-dependencies.criterion] version = "0.5" [dev-dependencies.tempfile] version = "3.10" [dev-dependencies.tracing] version = "0.1" [dev-dependencies.tracing-subscriber] version = "0.3.18" [features] default = [ "progressbar", "onig", "esaxx_fast", ] esaxx_fast = ["esaxx-rs/cpp"] http = ["hf-hub"] progressbar = ["indicatif"] unstable_wasm = [ "fancy-regex", "getrandom/js", ]