use std::fs::File; mod deserialize; use deserialize::*; use orc_format::{error::Error, read, read::Column}; fn get_column(path: &str, column: u32) -> Result { // open the file, as expected. buffering this is not necessary - we // are very careful about the number of `read`s we perform. let mut f = File::open(path).expect("no file found"); // read the files' metadata let metadata = read::read_metadata(&mut f)?; // the next step is to identify which stripe we want to read. Let's say it is the first one. let stripe = 0; // Each stripe has a footer - we need to read it to extract the location of each column on it. let stripe_footer = read::read_stripe_footer(&mut f, &metadata, stripe, &mut vec![])?; // Finally, we read the column into `Column` read::read_stripe_column(&mut f, &metadata, stripe, stripe_footer, column, vec![]) } #[test] fn read_bool() -> Result<(), Error> { let column = get_column("test.orc", 2)?; let (a, b) = deserialize_bool_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![true, false, true, false]); let (_footer, _scratch) = column.into_inner(); Ok(()) } #[test] fn read_str_direct() -> Result<(), Error> { let column = get_column("test.orc", 3)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec!["a", "cccccc", "ddd", "ee"]); Ok(()) } #[test] fn read_str_delta_plus() -> Result<(), Error> { let column = get_column("test.orc", 4)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec!["a", "bb", "ccc", "ddd"]); Ok(()) } #[test] fn read_str_delta_minus() -> Result<(), Error> { let column = get_column("test.orc", 5)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec!["ddd", "cc", "bb", "a"]); Ok(()) } #[test] fn read_str_short_repeat() -> Result<(), Error> { let column = get_column("test.orc", 6)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec!["aaaaa", "bbbbb", "ccccc", "ddddd"]); Ok(()) } #[test] fn read_f32() -> Result<(), Error> { let column = get_column("test.orc", 1)?; let (a, b) = deserialize_f32_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![1.0, 2.0, 4.0, 5.0]); Ok(()) } #[test] fn read_int_short_repeated() -> Result<(), Error> { let column = get_column("test.orc", 7)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![5, 5, 5, 5]); Ok(()) } #[test] fn read_int_neg_short_repeated() -> Result<(), Error> { let column = get_column("test.orc", 8)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![-5, -5, -5, -5]); Ok(()) } #[test] fn read_int_delta() -> Result<(), Error> { let column = get_column("test.orc", 9)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![1, 2, 4, 5]); Ok(()) } #[test] fn read_int_neg_delta() -> Result<(), Error> { let column = get_column("test.orc", 10)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![5, 4, 2, 1]); Ok(()) } #[test] fn read_int_direct() -> Result<(), Error> { let column = get_column("test.orc", 11)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![1, 6, 3, 2]); Ok(()) } #[test] fn read_int_neg_direct() -> Result<(), Error> { let column = get_column("test.orc", 12)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![-1, -6, -3, -2]); Ok(()) } #[test] fn read_bigint_direct() -> Result<(), Error> { let column = get_column("test.orc", 13)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![1, 6, 3, 2]); Ok(()) } #[test] fn read_bigint_neg_direct() -> Result<(), Error> { let column = get_column("test.orc", 14)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, false, true, true]); assert_eq!(b, vec![-1, -6, -3, -2]); Ok(()) } #[test] fn read_bigint_other() -> Result<(), Error> { let column = get_column("test.orc", 15)?; let (a, b) = deserialize_int_array(&column)?; assert_eq!(a, vec![true, true, true, true, true]); assert_eq!(b, vec![5, -5, 1, 5, 5]); Ok(()) } #[test] fn read_boolean_long() -> Result<(), Error> { let column = get_column("long_bool.orc", 1)?; let (a, b) = deserialize_bool_array(&column)?; assert_eq!(a, vec![true; 32]); assert_eq!(b, vec![true; 32]); Ok(()) } #[test] fn read_bool_compressed() -> Result<(), Error> { let column = get_column("long_bool_gzip.orc", 1)?; let (a, b) = deserialize_bool_array(&column)?; assert_eq!(a, vec![true; 32]); assert_eq!(b, vec![true; 32]); Ok(()) } #[test] fn read_string_long() -> Result<(), Error> { let column = get_column("string_long.orc", 1)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true; 64]); assert_eq!( b, vec!["abcd", "efgh"] .into_iter() .cycle() .take(64) .collect::>() ); Ok(()) } #[test] fn read_string_dict() -> Result<(), Error> { let column = get_column("string_dict.orc", 1)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true; 64]); assert_eq!( b, vec!["abc", "efgh"] .into_iter() .cycle() .take(64) .collect::>() ); Ok(()) } #[test] fn read_string_dict_gzip() -> Result<(), Error> { let column = get_column("string_dict_gzip.orc", 1)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true; 64]); assert_eq!( b, vec!["abc", "efgh"] .into_iter() .cycle() .take(64) .collect::>() ); Ok(()) } #[test] fn read_string_long_long() -> Result<(), Error> { let column = get_column("string_long_long.orc", 1)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a.len(), 10_000); assert_eq!(a, vec![true; 10_000]); assert_eq!(b.len(), 10_000); assert_eq!( b, vec!["abcd", "efgh"] .into_iter() .cycle() .take(10_000) .collect::>() ); Ok(()) } #[test] fn read_string_long_long_gzip() -> Result<(), Error> { let column = get_column("string_long_long_gzip.orc", 1)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a.len(), 10_000); assert_eq!(a, vec![true; 10_000]); assert_eq!(b.len(), 10_000); assert_eq!( b, vec!["abcd", "efgh"] .into_iter() .cycle() .take(10_000) .collect::>() ); Ok(()) } #[test] fn read_f32_long_long_gzip() -> Result<(), Error> { let column = get_column("f32_long_long_gzip.orc", 1)?; let (a, b) = deserialize_f32_array(&column)?; assert_eq!(a.len(), 1_000_000); assert_eq!(a, vec![true; 1_000_000]); assert_eq!(b.len(), 1_000_000); Ok(()) } #[test] fn read_string_increase() -> Result<(), Error> { let column = get_column("test.orc", 16)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true; 5]); assert_eq!(b, vec!["a", "bb", "ccc", "dddd", "eeeee"]); Ok(()) } #[test] fn read_string_decrease() -> Result<(), Error> { let column = get_column("test.orc", 17)?; let (a, b) = deserialize_str_array(&column)?; assert_eq!(a, vec![true; 5]); assert_eq!(b, vec!["eeeee", "dddd", "ccc", "bb", "a"]); Ok(()) }