[package] name = "dom-content-extraction" version = "0.3.4" description = "Rust implementation of Content extraction via text density paper" license = "MPL-2.0" documentation = "https://docs.rs/dom-content-extraction/latest/dom_content_extraction/" homepage = "https://github.com/oiwn/dom-content-extraction" repository = "https://github.com/oiwn/dom-content-extraction" readme = "README.md" keywords = ["dom-text-density"] categories = ["data-structures", "web-programming", "text-processing"] exclude = [ "tmp/*", "benches/*", "html/*", ".github/*", ".gitignore", ".tmuxp.yaml", "notes.org" ] edition = "2021" [profile.release] opt-level = "z" lto = true codegen-units = 1 strip = true panic = "abort" [dependencies] ego-tree = "0.9" scraper = "0.21" thiserror = "2.0.3" [dev-dependencies] criterion = "0.5" zip = "2.2" clap = { version = "4.5", features = ["derive"] } anyhow = "1" regex = "1" rayon = "1" [[bench]] name = "simple" harness = false [lib] doctest = false