use num_cpus; use sorer::dataframe::*; use sorer::schema::infer_schema; use std::env; fn main() { // parse the arguments let args: Vec = env::args().collect(); let parsed_args = ProgArgs::from(args); match &parsed_args.option { Options::Help => { println!("{}", HELP_MSG); return; } _ => (), }; let schema = infer_schema(&parsed_args.file.clone()).unwrap(); match &parsed_args.option { Options::PrintColType(col_idx) => { if *col_idx >= schema.len() { println!( "Error: There are only {} fields in the schema", schema.len() ); } else { println!( "{}", format!("{:?}", schema[*col_idx]).to_uppercase() ); } return; } _ => (), }; let num_threads = num_cpus::get(); let dataframe = from_file( &parsed_args.file, schema.clone(), parsed_args.from, parsed_args.len, num_threads, ); // metadata about the parsed file let num_cols = dataframe.len(); let num_lines = if num_cols != 0 { match &dataframe[0] { Column::Bool(b) => b.len(), Column::Int(b) => b.len(), Column::Float(b) => b.len(), Column::String(b) => b.len(), } } else { 0 }; // Retrieve and return the requested data match parsed_args.option { Options::PrintColIdx(col_idx, row_idx) => { if col_idx >= num_cols { println!( "Error: There are only {} fields in the schema", num_cols ); } else if row_idx >= num_lines { println!("Error: Only {} lines were parsed", num_lines); } else { println!("{}", get(&dataframe, col_idx, row_idx)); } } Options::IsMissingIdx(col_idx, row_idx) => { if col_idx >= num_cols { println!( "Error: There are only {} fields in the schema", num_cols ); } else if row_idx >= num_lines { println!("Error: Only {} lines were parsed", num_lines); } else if get(&dataframe, col_idx, row_idx) == Data::Null { println!("1"); } else { println!("0"); } } _ => unreachable!(), } } /// This command line program is a simple example of usage of the `sorer` /// crate. It simply parses a file, answers one of three possible queries, and /// exits. The point of this program is simply to be an example and nothing /// more. #[derive(Debug, Clone)] pub(crate) struct ProgArgs { /// The name of the file to parse pub(crate) file: String, /// The starting byte offset in the file to begin parsing from pub(crate) from: usize, /// The ending byte offset in the file to stop parsing at pub(crate) len: usize, /// The query we will make about the parsed data pub(crate) option: Options, } // Enum to depict all the operations to be done on the binary file #[derive(Debug, Clone, Copy)] pub(crate) enum Options { /// Prints the data type of the column at the given column index PrintColType(usize), /// Prints the value at the given column, row index PrintColIdx(usize, usize), /// Prints "1" if the data at the given column, row index is 'missing' or /// "1" if it is present IsMissingIdx(usize, usize), /// If the user needed help Help, } // Parses command line arguments for this binary impl From> for ProgArgs { fn from(args: Vec) -> Self { let mut file = None; let mut from = None; let mut len = None; let mut opt: Option = None; for mut i in 1..args.len() { if args[i] == "-f" { i += 1; match file { None => file = Some(args[i].clone()), Some(a) => panic!(format!("File was already set to {}", a)), } } if args[i] == "--from" { i += 1; match from { None => from = Some(args[i].parse::().unwrap()), Some(a) => panic!(format!("From was already set to {}", a)), } } if args[i] == "--len" { i += 1; match len { None => len = Some(args[i].parse::().unwrap()), Some(a) => panic!(format!("Len was already set to {}", a)), } } if args[i] == "--print-col-type" { match opt { None => { i += 1; let n = args[i].parse::().unwrap(); opt = Some(Options::PrintColType(n)); } Some(a) => { panic!(format!("Option was already set to {:?}", a)) } } } if args[i] == "--print-col-idx" { match opt { None => { i += 1; let n1 = args[i].parse::().unwrap(); i += 1; let n2 = args[i].parse::().unwrap(); opt = Some(Options::PrintColIdx(n1, n2)); } Some(a) => { panic!(format!("Option was already set to {:?}", a)) } } } if args[i] == "--is_missing_idx" { match opt { None => { i += 1; let n1 = args[i].parse::().unwrap(); i += 1; let n2 = args[i].parse::().unwrap(); opt = Some(Options::IsMissingIdx(n1, n2)); } Some(a) => { panic!(format!("Option was already set to {:?}", a)) } } } if args[i] == "--help" || args[i] == "-h" { match opt { None => { opt = Some(Options::Help); } Some(a) => { panic!(format!("Option was already set to {:?}", a)) } } } } match (&file, &from, &len, &opt) { (Some(file), Some(from), Some(len), Some(option)) => ProgArgs { file: file.to_owned(), from: *from, len: *len, option: option.to_owned(), }, (Some(file), None, Some(len), Some(option)) => ProgArgs { file: file.to_owned(), from: 0, len: *len, option: option.to_owned(), }, (Some(file), None, None, Some(option)) => ProgArgs { file: file.to_owned(), from: 0, len: std::usize::MAX, option: option.to_owned(), }, (Some(file), Some(from), None, Some(option)) => ProgArgs { file: file.to_owned(), from: *from, len: std::usize::MAX, option: option.to_owned(), }, (_, _, _, Some(option @ Options::Help)) => ProgArgs { file: "".to_string(), from: 0, len: std::usize::MAX, option: option.to_owned(), }, _ => panic!("Missing required arguments"), } } } const HELP_MSG: &str = "sorer 1.0 Samedh G. & Thomas H. Different types of queries to show information about what data was parsed USAGE: sorer [OPTIONS] --file FLAGS: -h, --help Prints help information -V, --version Prints version information OPTIONS: -f, --file The name of the file to parse --from The starting byte offset in the file to begin parsing from [default: 0] --len The ending byte offset in the file to stop parsing at [default: 4294967295] SUBCOMMANDS: --is-missing-idx Prints '1' if the data at the given column, row index is 'missing' or '1' if it is present --print-col-idx Prints the value at the given column, row index --print-col-type Prints the data type of the column at the given column index ";