| Crates.io | tarstream |
| lib.rs | tarstream |
| version | 0.1.0 |
| created_at | 2026-01-03 13:03:00.214268+00 |
| updated_at | 2026-01-03 13:03:00.214268+00 |
| description | tarstream download and extract on the fly |
| homepage | https://1000x.sh |
| repository | https://github.com/ax-x2/tarstream |
| max_upload_size | |
| id | 2020030 |
| size | 80,415 |
WIP with breaking changes.
rust library for streaming large tar files during download and extracting them on-the-fly with zero-copy processing.
[dependencies]
tarstream = { path = "." }
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
use tarstream::*;
use std::fs::File;
use std::io::Write;
struct Extractor {
current_file: Option<File>,
}
impl FileCallback for Extractor {
fn on_file_start(&mut self, metadata: &FileMetadata) -> Result<CallbackAction> {
if !metadata.is_directory {
self.current_file = Some(File::create(&metadata.path)?);
}
Ok(CallbackAction::Continue)
}
fn on_file_chunk(&mut self, chunk: &[u8]) -> Result<CallbackAction> {
if let Some(file) = &mut self.current_file {
file.write_all(chunk)?;
}
Ok(CallbackAction::Continue)
}
fn on_file_end(&mut self, _: &FileMetadata) -> Result<CallbackAction> {
self.current_file = None;
Ok(CallbackAction::Continue)
}
}
#[tokio::main]
async fn main() -> Result<()> {
let extractor = TarStreamExtractor::new()
.with_compression(CompressionType::Auto);
let mut callback = Extractor { current_file: None };
extractor.extract_from_url(
"https://example.com/archive.tar.gz",
&mut callback
).await?;
Ok(())
}
use tarstream::*;
struct Filter {
skip_current: bool,
}
impl FileCallback for Filter {
fn on_file_start(&mut self, metadata: &FileMetadata) -> Result<CallbackAction> {
if metadata.path.ends_with(".jpg") {
println!("found: {}", metadata.path);
self.skip_current = false;
Ok(CallbackAction::Continue)
} else {
self.skip_current = true;
Ok(CallbackAction::Skip)
}
}
fn on_file_chunk(&mut self, chunk: &[u8]) -> Result<CallbackAction> {
if !self.skip_current {
// process chunk
}
Ok(CallbackAction::Continue)
}
fn on_file_end(&mut self, _: &FileMetadata) -> Result<CallbackAction> {
Ok(CallbackAction::Continue)
}
}
cargo run --example basic_download https://example.com/archive.tar.gz ./output
# extract only jpg images
cargo run --example custom_filter https://example.com/data.tar.gz '*.jpg' ./images
# extract json files from data directory
cargo run --example custom_filter https://example.com/logs.tar.gz 'data/*.json' ./output
# extract all csv files anywhere
cargo run --example custom_filter https://example.com/archive.tar.gz '**/*.csv' ./csv
&[u8] slices to callbackslet extractor = TarStreamExtractor::new()
.with_compression(CompressionType::Auto) // auto-detect or specify
.with_buffer_size(64 * 1024) // 64kb chunks
.with_max_file_size(1024 * 1024 * 1024); // 1gb limit
let stats = extractor.extract_from_url(url, &mut callback).await?;
CallbackAction::Continue - process this chunk and continueCallbackAction::Skip - skip rest of current fileCallbackAction::Stop - stop entire extractionCompressionType::None - plain tarCompressionType::Gzip - .tar.gzCompressionType::Bzip2 - .tar.bz2CompressionType::Xz - .tar.xzCompressionType::Auto - detect from url extensionmit