[package]
name = "dom-content-extraction"
version = "0.3.4"

description = "Rust implementation of Content extraction via text density paper"
license = "MPL-2.0"
documentation = "https://docs.rs/dom-content-extraction/latest/dom_content_extraction/"
homepage = "https://github.com/oiwn/dom-content-extraction"
repository = "https://github.com/oiwn/dom-content-extraction"
readme = "README.md"
keywords = ["dom-text-density"]
categories = ["data-structures", "web-programming", "text-processing"]
exclude = [
    "tmp/*",
    "benches/*",
    "html/*",
    ".github/*",
    ".gitignore",
    ".tmuxp.yaml",
    "notes.org"
]

edition = "2021"

[profile.release]
opt-level = "z"
lto = true
codegen-units = 1
strip = true
panic = "abort"

[dependencies]
ego-tree = "0.9"
scraper = "0.21"
thiserror = "2.0.3"

[dev-dependencies]
criterion = "0.5"
zip = "2.2"
clap = { version = "4.5", features = ["derive"] }
anyhow = "1"
regex = "1"
rayon = "1"

[[bench]]
name = "simple"
harness = false

[lib]
doctest = false