# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.75.0" name = "text-splitter" version = "0.18.1" authors = ["Ben Brandt "] build = false exclude = [ ".github/**", ".vscode/**", "/bindings/**", "/benches/output.txt", "/docs/**", "/tests/snapshots/**", "/tests/text_splitter_snapshots.rs", "/tests/inputs/**", "/tests/tokenizers/**", "*.yml", "*.yaml", ] autobins = false autoexamples = false autotests = false autobenches = false description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python." readme = "README.md" keywords = [ "text", "split", "tokenizer", "nlp", "ai", ] categories = ["text-processing"] license = "MIT" repository = "https://github.com/benbrandt/text-splitter" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [profile.dev.package."*"] opt-level = 3 [lib] name = "text_splitter" path = "src/lib.rs" [[test]] name = "code" path = "tests/code.rs" [[test]] name = "markdown" path = "tests/markdown.rs" [[test]] name = "snapshots" path = "tests/snapshots.rs" [[test]] name = "text_splitter" path = "tests/text_splitter.rs" [[bench]] name = "chunk_size" path = "benches/chunk_size.rs" harness = false [dependencies.ahash] version = "0.8.7" [dependencies.auto_enums] version = "0.8" [dependencies.either] version = "1.6" [dependencies.itertools] version = "0.13" [dependencies.once_cell] version = "1.20" [dependencies.pulldown-cmark] version = "0.12" optional = true default-features = false [dependencies.regex] version = "1.10.6" [dependencies.rust_tokenizers] version = "8" optional = true [dependencies.strum] version = "0.26" features = ["derive"] [dependencies.thiserror] version = "1.0.65" [dependencies.tiktoken-rs] version = "0.6" optional = true [dependencies.tokenizers] version = "0.20" optional = true default-features = false [dependencies.tree-sitter] version = "0.24" optional = true [dependencies.unicode-segmentation] version = "1.12" [dev-dependencies.cached-path] version = "0.6" features = ["rustls-tls"] default-features = false [dev-dependencies.dirs] version = "5.0.1" [dev-dependencies.divan] version = "0.1.14" [dev-dependencies.fake] version = "2" [dev-dependencies.insta] version = "1.40" features = [ "glob", "yaml", ] [dev-dependencies.more-asserts] version = "0.3" [dev-dependencies.rayon] version = "1.10" [dev-dependencies.tokenizers] version = "0.20" features = [ "onig", "http", ] default-features = false [dev-dependencies.tree-sitter-rust] version = "0.23" [features] code = ["dep:tree-sitter"] markdown = ["dep:pulldown-cmark"] rust-tokenizers = ["dep:rust_tokenizers"] tiktoken-rs = ["dep:tiktoken-rs"] tokenizers = [ "dep:tokenizers", "tokenizers/onig", ] [lints.clippy] cargo = "warn" pedantic = "warn" [lints.rust] missing_debug_implementations = "warn" missing_docs = "warn" [lints.rust.future_incompatible] level = "warn" priority = -1 [lints.rust.nonstandard_style] level = "warn" priority = -1 [lints.rust.rust_2018_compatibility] level = "warn" priority = -1 [lints.rust.rust_2018_idioms] level = "warn" priority = -1 [lints.rust.rust_2021_compatibility] level = "warn" priority = -1 [lints.rust.rust_2024_compatibility] level = "warn" priority = -1 [lints.rust.unused] level = "warn" priority = -1