# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "ungoliant" version = "2.0.0" authors = ["Julien Abadji , Pedro J. Ortiz "] description = "The pipeline for the OSCAR corpus." homepage = "https://github.com/oscar-project/ungoliant" readme = "README.md" license = "Apache-2.0" repository = "https://github.com/oscar-project/ungoliant" [[bench]] name = "fasttext_bench" harness = false [[bench]] name = "pipeline_bench_rayon" harness = false [[bench]] name = "annotate_noisy" harness = false [dependencies.avro-rs] version = "0.13.0" features = ["snappy"] [dependencies.bytes] version = "1" [dependencies.csv] version = "1.1.6" [dependencies.ctclib-pp] version = "0.2.0" optional = true [dependencies.env_logger] version = "0.8.3" [dependencies.fasttext] version = "0.7.6" [dependencies.flate2] version = "1.0.20" [dependencies.futures] version = "0.3" [dependencies.futures-core] version = "0.3" [dependencies.futures-util] version = "0.3" [dependencies.glob] version = "0.3.0" [dependencies.itertools] version = "0.10.0" [dependencies.language-tags] version = "0.3.2" [dependencies.lazy_static] version = "1.4.0" [dependencies.log] version = "0.4.14" [dependencies.oscar-io] version = "0.2.2" [dependencies.oxilangtag] version = "0.1.3" features = ["serde"] [dependencies.rand] version = "0.8.4" [dependencies.rayon] version = "1" [dependencies.reqwest] version = "0.11" features = [ "rustls-tls", "blocking", "stream", ] default-features = false [dependencies.runiq-lib] version = "1.2.2" [dependencies.schemars] version = "0.8.3" [dependencies.serde] version = "1" features = ["derive"] [dependencies.serde_json] version = "1" [dependencies.sha2] version = "0.9.5" [dependencies.structopt] version = "0.3.21" [dependencies.tlsh-fixed] version = "0.1.1" [dependencies.tokio] version = "1" features = ["full"] [dependencies.tokio-util] version = "0.6.6" features = ["compat"] [dependencies.twox-hash] version = "1.6" [dependencies.unic-ucd] version = "0.9.0" [dependencies.unicode-script] version = "0.5.4" [dependencies.unicode-segmentation] version = "1.8.0" [dependencies.url] version = "2.2.2" [dependencies.ut1_blocklist] version = "0.3.0" [dependencies.warc] version = "0.3.0" features = ["with_serde"] [dev-dependencies.criterion] version = "0.3" [dev-dependencies.rand_distr] version = "0.4.2" [dev-dependencies.serial_test] version = "0.5.1" [dev-dependencies.sha-1] version = "0.9" [dev-dependencies.tempfile] version = "3.2.0" [dev-dependencies.test-log] version = "0.2.11" [features] kenlm = ["dep:ctclib-pp"]