Crates.io | rust_warc |
lib.rs | rust_warc |
version | 1.1.0 |
source | src |
created_at | 2019-05-13 12:51:56.892716 |
updated_at | 2020-02-17 20:02:51.046688 |
description | A high performance and easy to use Web Archive (WARC) file reader |
homepage | |
repository | https://github.com/orottier/rust-warc |
max_upload_size | |
id | 133987 |
size | 14,921 |
A high performance and easy to use Web Archive (WARC) file reader
use rust_warc::WarcReader;
use std::io;
fn main() {
// we're taking input from stdin here, but any BufRead will do
let stdin = io::stdin();
let handle = stdin.lock();
let warc = WarcReader::new(handle);
let mut response_counter = 0;
let mut response_size = 0;
for item in warc {
let record = item.unwrap(); // could be IO/malformed error
// header names are case insensitive
if record.header.get(&"WARC-Type".into()) == Some(&"response".into()) {
response_counter += 1;
response_size += record.content.len();
}
}
println!("response records: {}", response_counter);
println!("response size: {} MiB", response_size >> 20);
}