/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /// Compress a set of u32 integers and output the (approximate) size of compressed bytes. /// # Arguments /// /// * `filepath` - The input file containing integers in range [0, max(a_i)]. /// ```txt /// a_1 \n /// ... /// a_n \n /// ``` /// The universe size will be the maximum value of the input file. /// /// # Usage /// /// To run this example, use the following command: /// ```bash /// cargo run --features builder --example uint_no_partition /// ``` /// use clubcard::*; use sha2::{Digest, Sha256}; use std::cmp::max; use std::collections::HashSet; use std::{env, fs, io, process}; use std::io::BufRead; use clubcard::builder::{ApproximateRibbon, ClubcardBuilder, ExactRibbon}; struct Universe { bound: u32, } const W: usize = 4; const BLOCK: [u8; 0] = []; struct Int { val: [u8; 4], // u32 included: bool, // whether it's in the subset } impl Int { fn new(num: u32, included: bool) -> Int { Int { val: num.to_le_bytes(), included, } } } impl ApproximateSizeOf for Universe {} impl AsQuery for Int { fn as_query(&self, m: usize) -> Equation { let mut digest = [0u8; 32]; let mut hasher = Sha256::new(); hasher.update(&self.val); hasher.finalize_into((&mut digest).into()); let mut a = [0u64; W]; // block of columns for (i, x) in digest .chunks_exact(8) .map(|x| TryInto::<[u8; 8]>::try_into(x).unwrap()) .map(u64::from_le_bytes) .enumerate() { a[i] = x; } a[0] |= 1; let s = (a[W - 1] as usize) % max(1, m); // let b = if self.included { 0 } else { 1 }; Equation::inhomogeneous(s, a, b) } fn block(&self) -> &[u8] { &BLOCK } fn discriminant(&self) -> &[u8] { &self.val } } impl Filterable for Int { fn included(&self) -> bool { self.included } } impl Queryable for Int { type UniverseMetadata = Universe; type PartitionMetadata = (); fn in_universe(&self, meta: &Self::UniverseMetadata) -> bool { u32::from_le_bytes(self.val) < meta.bound } } // Parse the file into a set of integers, return the hash set and maximum element + 1 in set fn parse_args() -> (u32, HashSet) { let args: Vec = env::args().collect(); if args.len() < 2 { println!("Please specify the name of input file."); process::exit(1); } let file_path = &args[1]; let file = fs::File::open(file_path).expect("Error opening the file."); let mut lines = io::BufReader::new(file).lines(); let mut numbers = HashSet::new(); let mut max_val = 0; while let Some(Ok(line)) = lines.next() { let number = line.trim().parse::().expect("Failed to parse 32-bit integer"); if number > max_val { max_val = number; } numbers.insert(number); } (max_val + 1, numbers) } fn main() { let (universe_size, numbers) = parse_args(); let mut clubcard_builder = ClubcardBuilder::new(); let mut approx_builder = clubcard_builder.new_approx_builder(&BLOCK); // Build approx filter for num in &numbers { let int = Int::new(num.clone(), true); approx_builder.insert(int) } approx_builder.set_universe_size(universe_size.try_into().unwrap()); clubcard_builder.collect_approx_ribbons(vec![ApproximateRibbon::from(approx_builder)]); // Build exact filter let mut exact_builder = clubcard_builder.new_exact_builder(&BLOCK); for num in 0..universe_size { let int = Int::new(num.clone(), numbers.contains(&num)); exact_builder.insert(int); } clubcard_builder.collect_exact_ribbons(vec![ExactRibbon::from(exact_builder)]); let clubcard = clubcard_builder.build::( Universe { bound: universe_size }, (), ); println!("Generated {}", clubcard); println!("The size of compressed set (of {} elements) with max value {} is {} bytes.", numbers.len(), universe_size - 1, clubcard.approximate_size_of()); }