use parquet2::encoding::get_length; use parquet2::error::Error; #[derive(Debug)] pub struct BinaryPageDict { values: Vec>, } impl BinaryPageDict { pub fn new(values: Vec>) -> Self { Self { values } } #[inline] pub fn value(&self, index: usize) -> Result<&[u8], Error> { self.values .get(index) .map(|x| x.as_ref()) .ok_or_else(|| Error::OutOfSpec("invalid index".to_string())) } } fn read_plain(bytes: &[u8], length: usize) -> Result>, Error> { let mut bytes = bytes; let mut values = Vec::new(); for _ in 0..length { let slot_length = get_length(bytes).unwrap(); bytes = &bytes[4..]; if slot_length > bytes.len() { return Err(Error::OutOfSpec( "The string on a dictionary page has a length that is out of bounds".to_string(), )); } let (result, remaining) = bytes.split_at(slot_length); values.push(result.to_vec()); bytes = remaining; } Ok(values) } pub fn read(buf: &[u8], num_values: usize) -> Result { let values = read_plain(buf, num_values)?; Ok(BinaryPageDict::new(values)) }