# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. [package] name = "parquet" version = { workspace = true } license = { workspace = true } description = "Apache Parquet implementation in Rust" homepage = { workspace = true } repository = { workspace = true } authors = { workspace = true } keywords = ["arrow", "parquet", "hadoop"] readme = "README.md" edition = { workspace = true } rust-version = "1.70.0" [target.'cfg(target_arch = "wasm32")'.dependencies] ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } [dependencies] arrow-array = { workspace = true, optional = true } arrow-buffer = { workspace = true, optional = true } arrow-cast = { workspace = true, optional = true } arrow-csv = { workspace = true, optional = true } arrow-data = { workspace = true, optional = true } arrow-schema = { workspace = true, optional = true } arrow-select = { workspace = true, optional = true } arrow-ipc = { workspace = true, optional = true } # Intentionally not a path dependency as object_store is released separately object_store = { version = "0.11.0", default-features = false, optional = true } bytes = { version = "1.1", default-features = false, features = ["std"] } thrift = { version = "0.17", default-features = false } snap = { version = "1.0", default-features = false, optional = true } brotli = { version = "7.0", default-features = false, features = ["std"], optional = true } flate2 = { version = "1.0", default-features = false, features = ["rust_backend"], optional = true } lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true } zstd = { version = "0.13", optional = true, default-features = false } chrono = { workspace = true } num = { version = "0.4", default-features = false } num-bigint = { version = "0.4", default-features = false } base64 = { version = "0.22", default-features = false, features = ["std", ], optional = true } clap = { version = "4.1", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } seq-macro = { version = "0.3", default-features = false } futures = { version = "0.3", default-features = false, features = ["std"], optional = true } tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "rt", "io-util"] } hashbrown = { version = "0.14", default-features = false } twox-hash = { version = "1.6", default-features = false } paste = { version = "1.0" } half = { version = "2.1", default-features = false, features = ["num-traits"] } sysinfo = { version = "0.32.0", optional = true, default-features = false, features = ["system"] } crc32fast = { version = "1.4.2", optional = true, default-features = false } [dev-dependencies] base64 = { version = "0.22", default-features = false, features = ["std"] } criterion = { version = "0.5", default-features = false } snap = { version = "1.0", default-features = false } tempfile = { version = "3.0", default-features = false } brotli = { version = "7.0", default-features = false, features = ["std"] } flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"] } zstd = { version = "0.13", default-features = false } serde_json = { version = "1.0", features = ["std"], default-features = false } arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] } tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "io-util", "fs"] } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } object_store = { version = "0.11.0", default-features = false, features = ["azure"] } # TODO: temporary to fix parquet wasm build # upstream issue: https://github.com/gyscos/zstd-rs/issues/269 [target.'cfg(target_family = "wasm")'.dependencies] zstd-sys = { version = ">=2.0.0, <2.0.14", optional = true, default-features = false } [target.'cfg(target_family = "wasm")'.dev-dependencies] zstd-sys = { version = ">=2.0.0, <2.0.14", default-features = false } [package.metadata.docs.rs] all-features = true [features] default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"] # Enable lz4 lz4 = ["lz4_flex"] # Enable arrow reader/writer APIs arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"] # Enable CLI tools cli = ["json", "base64", "clap", "arrow-csv", "serde"] # Enable JSON APIs json = ["serde_json", "base64"] # Enable internal testing APIs test_common = ["arrow/test_utils"] # Experimental, unstable functionality primarily used for testing experimental = [] # Enable async APIs async = ["futures", "tokio"] # Enable object_store integration object_store = ["dep:object_store", "async"] # Group Zstd dependencies zstd = ["dep:zstd", "zstd-sys"] # Display memory in example/write_parquet.rs sysinfo = ["dep:sysinfo"] # Verify 32-bit CRC checksum when decoding parquet pages crc = ["dep:crc32fast"] [[example]] name = "external_metadata" required-features = ["arrow", "async"] path = "./examples/external_metadata.rs" [[example]] name = "read_parquet" required-features = ["arrow"] path = "./examples/read_parquet.rs" [[example]] name = "write_parquet" required-features = ["cli", "sysinfo"] path = "./examples/write_parquet.rs" [[example]] name = "async_read_parquet" required-features = ["arrow", "async"] path = "./examples/async_read_parquet.rs" [[example]] name = "read_with_rowgroup" required-features = ["arrow", "async"] path = "./examples/read_with_rowgroup.rs" [[test]] name = "arrow_writer_layout" required-features = ["arrow"] [[test]] name = "arrow_reader" required-features = ["arrow"] path = "./tests/arrow_reader/mod.rs" [[bin]] name = "parquet-read" required-features = ["cli"] [[bin]] name = "parquet-rewrite" required-features = ["arrow", "cli"] [[bin]] name = "parquet-schema" required-features = ["cli"] [[bin]] name = "parquet-rowcount" required-features = ["cli"] [[bin]] name = "parquet-concat" required-features = ["cli"] [[bin]] name = "parquet-fromcsv" required-features = ["arrow", "cli", "snap", "brotli", "flate2", "lz4", "zstd"] [[bin]] name = "parquet-show-bloom-filter" required-features = ["cli"] [[bin]] name = "parquet-layout" required-features = ["cli"] [[bin]] name = "parquet-index" required-features = ["cli"] [[bench]] name = "arrow_writer" required-features = ["arrow"] harness = false [[bench]] name = "arrow_reader" required-features = ["arrow", "test_common", "experimental"] harness = false [[bench]] name = "arrow_statistics" required-features = ["arrow"] harness = false [[bench]] name = "compression" required-features = ["experimental", "default"] harness = false [[bench]] name = "encoding" required-features = ["experimental", "default"] harness = false [[bench]] name = "metadata" harness = false [lib] bench = false