//! A module exposing utilities for encoding and decoding Huffman-coded octet //! strings, under the Huffman code defined by HPACK. //! (HPACK-draft-10, Appendix B) use std::collections::HashMap; /// Represents a symbol that can be inserted into a Huffman-encoded octet /// string. enum HuffmanCodeSymbol { /// Any octet is a valid symbol Symbol(u8), /// A special symbol represents the end of the string EndOfString, } impl HuffmanCodeSymbol { pub fn new(symbol: usize) -> HuffmanCodeSymbol { if symbol == 256 { HuffmanCodeSymbol::EndOfString } else { // It is safe to downcast since now we know that the value // is in the half-open interval [0, 256) HuffmanCodeSymbol::Symbol(symbol as u8) } } } #[derive(thiserror::Error, PartialEq, Copy, Clone, Debug)] #[non_exhaustive] pub enum HuffmanDecoderError { /// Any padding strictly larger than 7 bits MUST be interpreted as an error #[error("Padding too large")] PaddingTooLarge, /// Any padding that does not correspond to the most significant bits of /// EOS MUST be interpreted as an error. #[error("Invalid padding")] InvalidPadding, /// If EOS is ever found in the string, it causes an error. #[error("EOS in string")] EOSInString, } /// The type that represents the result of the `decode` method of the /// `HuffmanDecoder`. pub type HuffmanDecoderResult = Result, HuffmanDecoderError>; /// A simple implementation of a Huffman code decoder. pub struct HuffmanDecoder { table: HashMap>, // The representation of the EOS: the left-aligned code representation and // the actual length of the codepoint, as a tuple. eos_codepoint: (u32, u8), } impl HuffmanDecoder { /// Constructs a new `HuffmanDecoder` using the given table of /// (code point, code length) tuples to represent the Huffman code. fn from_table(table: &[(u32, u8)]) -> HuffmanDecoder { if table.len() != 257 { panic!("Invalid Huffman code table. It must define exactly 257 symbols."); } let mut decoder_table: HashMap> = HashMap::new(); let mut eos_codepoint: Option<(u32, u8)> = None; for (symbol, &(code, code_len)) in table.iter().enumerate() { decoder_table.entry(code_len).or_default(); let subtable = decoder_table.get_mut(&code_len).unwrap(); let huff_symbol = HuffmanCodeSymbol::new(symbol); if let HuffmanCodeSymbol::EndOfString = huff_symbol { // We also remember the code point of the EOS for easier // reference later on. eos_codepoint = Some((code, code_len)); }; subtable.insert(code, huff_symbol); } HuffmanDecoder { table: decoder_table, eos_codepoint: eos_codepoint.unwrap(), } } /// Constructs a new HuffmanDecoder with the default Huffman code table, as /// defined in the HPACK-draft-10, Appendix B. pub fn new() -> HuffmanDecoder { HuffmanDecoder::from_table(HUFFMAN_CODE_TABLE) } /// Decodes the buffer `buf` into a newly allocated `Vec`. /// /// It assumes that the entire buffer should be considered as the Huffman /// encoding of an octet string and handles the padding rules /// accordingly. pub fn decode(&mut self, buf: &[u8]) -> HuffmanDecoderResult { let mut current: u32 = 0; let mut current_len: u8 = 0; let mut result: Vec = Vec::new(); for b in BitIterator::new(buf.iter()) { current_len += 1; current <<= 1; if b { current |= 1; } if self.table.contains_key(¤t_len) { let length_table = self.table.get(¤t_len).unwrap(); if length_table.contains_key(¤t) { let decoded_symbol = match length_table.get(¤t).unwrap() { HuffmanCodeSymbol::Symbol(symbol) => *symbol, HuffmanCodeSymbol::EndOfString => { // If the EOS symbol is detected within the stream, // we need to consider it an error. return Err(HuffmanDecoderError::EOSInString); } }; result.push(decoded_symbol); current = 0; current_len = 0; } } } // Now we need to verify that the padding is correct. // The spec mandates that the padding must not be strictly longer than // 7 bits and that it must represent the most significant bits of the // EOS symbol's code. // First: the check for the length of the padding if current_len > 7 { return Err(HuffmanDecoderError::PaddingTooLarge); } // Second: the padding corresponds to the most-significant bits of the // EOS symbol. // Align both of them to have their most significant bit as the most // significant bit of a u32. let right_align_current = if current_len == 0 { 0 } else { current << (32 - current_len) }; let right_align_eos = self.eos_codepoint.0 << (32 - self.eos_codepoint.1); // Now take only the necessary amount of most significant bit of EOS. // The mask defines a bit pattern of `current_len` leading set bits, // followed by the rest of the bits 0. let mask = if current_len == 0 { 0 } else { ((1 << current_len) - 1) << (32 - current_len) }; // The mask is now used to strip the unwanted bits of the EOS let eos_mask = right_align_eos & mask; if eos_mask != right_align_current { return Err(HuffmanDecoderError::InvalidPadding); } Ok(result) } } /// A helper struct that represents an iterator over individual bits of all /// bytes found in a wrapped Iterator over bytes. /// Bits are represented as `bool`s, where `true` corresponds to a set bit and /// `false` to a 0 bit. /// /// Bits are yielded in order of significance, starting from the /// most-significant bit. struct BitIterator<'a, I: Iterator> { buffer_iterator: I, current_byte: Option<&'a u8>, /// The bit-position within the current byte pos: u8, } impl<'a, I: Iterator> BitIterator<'a, I> where I: Iterator, { pub fn new(iterator: I) -> BitIterator<'a, I> { BitIterator::<'a, I> { buffer_iterator: iterator, current_byte: None, pos: 7, } } } impl<'a, I> Iterator for BitIterator<'a, I> where I: Iterator, { type Item = bool; fn next(&mut self) -> Option { if self.current_byte.is_none() { self.current_byte = self.buffer_iterator.next(); self.pos = 7; } // If we still have `None`, it means the buffer has been exhausted self.current_byte?; let b = *self.current_byte.unwrap(); let is_set = (b & (1 << self.pos)) == (1 << self.pos); if self.pos == 0 { // We have exhausted all bits from the current byte -- try to get // a new one on the next pass. self.current_byte = None; } else { // Still more bits left here... self.pos -= 1; } Some(is_set) } } static HUFFMAN_CODE_TABLE: &[(u32, u8)] = &[ (0x1ff8, 13), (0x7fffd8, 23), (0xfffffe2, 28), (0xfffffe3, 28), (0xfffffe4, 28), (0xfffffe5, 28), (0xfffffe6, 28), (0xfffffe7, 28), (0xfffffe8, 28), (0xffffea, 24), (0x3ffffffc, 30), (0xfffffe9, 28), (0xfffffea, 28), (0x3ffffffd, 30), (0xfffffeb, 28), (0xfffffec, 28), (0xfffffed, 28), (0xfffffee, 28), (0xfffffef, 28), (0xffffff0, 28), (0xffffff1, 28), (0xffffff2, 28), (0x3ffffffe, 30), (0xffffff3, 28), (0xffffff4, 28), (0xffffff5, 28), (0xffffff6, 28), (0xffffff7, 28), (0xffffff8, 28), (0xffffff9, 28), (0xffffffa, 28), (0xffffffb, 28), (0x14, 6), (0x3f8, 10), (0x3f9, 10), (0xffa, 12), (0x1ff9, 13), (0x15, 6), (0xf8, 8), (0x7fa, 11), (0x3fa, 10), (0x3fb, 10), (0xf9, 8), (0x7fb, 11), (0xfa, 8), (0x16, 6), (0x17, 6), (0x18, 6), (0x0, 5), (0x1, 5), (0x2, 5), (0x19, 6), (0x1a, 6), (0x1b, 6), (0x1c, 6), (0x1d, 6), (0x1e, 6), (0x1f, 6), (0x5c, 7), (0xfb, 8), (0x7ffc, 15), (0x20, 6), (0xffb, 12), (0x3fc, 10), (0x1ffa, 13), (0x21, 6), (0x5d, 7), (0x5e, 7), (0x5f, 7), (0x60, 7), (0x61, 7), (0x62, 7), (0x63, 7), (0x64, 7), (0x65, 7), (0x66, 7), (0x67, 7), (0x68, 7), (0x69, 7), (0x6a, 7), (0x6b, 7), (0x6c, 7), (0x6d, 7), (0x6e, 7), (0x6f, 7), (0x70, 7), (0x71, 7), (0x72, 7), (0xfc, 8), (0x73, 7), (0xfd, 8), (0x1ffb, 13), (0x7fff0, 19), (0x1ffc, 13), (0x3ffc, 14), (0x22, 6), (0x7ffd, 15), (0x3, 5), (0x23, 6), (0x4, 5), (0x24, 6), (0x5, 5), (0x25, 6), (0x26, 6), (0x27, 6), (0x6, 5), (0x74, 7), (0x75, 7), (0x28, 6), (0x29, 6), (0x2a, 6), (0x7, 5), (0x2b, 6), (0x76, 7), (0x2c, 6), (0x8, 5), (0x9, 5), (0x2d, 6), (0x77, 7), (0x78, 7), (0x79, 7), (0x7a, 7), (0x7b, 7), (0x7ffe, 15), (0x7fc, 11), (0x3ffd, 14), (0x1ffd, 13), (0xffffffc, 28), (0xfffe6, 20), (0x3fffd2, 22), (0xfffe7, 20), (0xfffe8, 20), (0x3fffd3, 22), (0x3fffd4, 22), (0x3fffd5, 22), (0x7fffd9, 23), (0x3fffd6, 22), (0x7fffda, 23), (0x7fffdb, 23), (0x7fffdc, 23), (0x7fffdd, 23), (0x7fffde, 23), (0xffffeb, 24), (0x7fffdf, 23), (0xffffec, 24), (0xffffed, 24), (0x3fffd7, 22), (0x7fffe0, 23), (0xffffee, 24), (0x7fffe1, 23), (0x7fffe2, 23), (0x7fffe3, 23), (0x7fffe4, 23), (0x1fffdc, 21), (0x3fffd8, 22), (0x7fffe5, 23), (0x3fffd9, 22), (0x7fffe6, 23), (0x7fffe7, 23), (0xffffef, 24), (0x3fffda, 22), (0x1fffdd, 21), (0xfffe9, 20), (0x3fffdb, 22), (0x3fffdc, 22), (0x7fffe8, 23), (0x7fffe9, 23), (0x1fffde, 21), (0x7fffea, 23), (0x3fffdd, 22), (0x3fffde, 22), (0xfffff0, 24), (0x1fffdf, 21), (0x3fffdf, 22), (0x7fffeb, 23), (0x7fffec, 23), (0x1fffe0, 21), (0x1fffe1, 21), (0x3fffe0, 22), (0x1fffe2, 21), (0x7fffed, 23), (0x3fffe1, 22), (0x7fffee, 23), (0x7fffef, 23), (0xfffea, 20), (0x3fffe2, 22), (0x3fffe3, 22), (0x3fffe4, 22), (0x7ffff0, 23), (0x3fffe5, 22), (0x3fffe6, 22), (0x7ffff1, 23), (0x3ffffe0, 26), (0x3ffffe1, 26), (0xfffeb, 20), (0x7fff1, 19), (0x3fffe7, 22), (0x7ffff2, 23), (0x3fffe8, 22), (0x1ffffec, 25), (0x3ffffe2, 26), (0x3ffffe3, 26), (0x3ffffe4, 26), (0x7ffffde, 27), (0x7ffffdf, 27), (0x3ffffe5, 26), (0xfffff1, 24), (0x1ffffed, 25), (0x7fff2, 19), (0x1fffe3, 21), (0x3ffffe6, 26), (0x7ffffe0, 27), (0x7ffffe1, 27), (0x3ffffe7, 26), (0x7ffffe2, 27), (0xfffff2, 24), (0x1fffe4, 21), (0x1fffe5, 21), (0x3ffffe8, 26), (0x3ffffe9, 26), (0xffffffd, 28), (0x7ffffe3, 27), (0x7ffffe4, 27), (0x7ffffe5, 27), (0xfffec, 20), (0xfffff3, 24), (0xfffed, 20), (0x1fffe6, 21), (0x3fffe9, 22), (0x1fffe7, 21), (0x1fffe8, 21), (0x7ffff3, 23), (0x3fffea, 22), (0x3fffeb, 22), (0x1ffffee, 25), (0x1ffffef, 25), (0xfffff4, 24), (0xfffff5, 24), (0x3ffffea, 26), (0x7ffff4, 23), (0x3ffffeb, 26), (0x7ffffe6, 27), (0x3ffffec, 26), (0x3ffffed, 26), (0x7ffffe7, 27), (0x7ffffe8, 27), (0x7ffffe9, 27), (0x7ffffea, 27), (0x7ffffeb, 27), (0xffffffe, 28), (0x7ffffec, 27), (0x7ffffed, 27), (0x7ffffee, 27), (0x7ffffef, 27), (0x7fffff0, 27), (0x3ffffee, 26), (0x3fffffff, 30), ]; #[cfg(test)] mod tests { use super::BitIterator; use super::HuffmanDecoder; use super::HuffmanDecoderError; /// A helper function that converts the given slice containing values `1` /// and `0` to a `Vec` of `bool`s, according to the number. fn to_expected_bit_result(numbers: &[u8]) -> Vec { numbers.iter().map(|b| -> bool { *b == 1 }).collect() } #[test] fn test_bit_iterator_single_byte() { let expected_result = to_expected_bit_result(&[0, 0, 0, 0, 1, 0, 1, 0]); let mut res: Vec = Vec::new(); for b in BitIterator::new([10u8].iter()) { res.push(b); } assert_eq!(res, expected_result); } #[test] fn test_bit_iterator_multiple_bytes() { let expected_result = to_expected_bit_result(&[ 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, ]); let mut res: Vec = Vec::new(); for b in BitIterator::new([10u8, 255, 128, 1, 0, 170].iter()) { res.push(b); } assert_eq!(res, expected_result); } /// Simple tests for the Huffman decoder -- whether it can decode a single /// character code represented additionally with only a single byte. #[test] fn test_huffman_code_single_byte() { let mut decoder = HuffmanDecoder::new(); // (The + (2^n - 1) at the final byte is to add the correct expected // padding: 1s) { // We need to shift it by 3, since we need the top-order bytes to // start the code point. let hex_buffer = [(0x7 << 3) + 7]; let expected_result = vec![b'o']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } { #[allow(clippy::identity_op)] // for communicating intent let hex_buffer = [0x0 + 7]; let expected_result = vec![b'0']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } { // The length of the codepoint is 6, so we shift by two let hex_buffer = [(0x21 << 2) + 3]; let expected_result = vec![b'A']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } } /// Tests that the Huffman decoder can decode a single character made of /// multiple bytes. #[test] fn test_huffman_code_single_char_multiple_byte() { let mut decoder = HuffmanDecoder::new(); // (The + (2^n - 1) at the final byte is to add the correct expected // padding: 1s) { let hex_buffer = [255, 160 + 15]; let expected_result = vec![b'#']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } { let hex_buffer = [255, 200 + 7]; let expected_result = vec![b'$']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } { let hex_buffer = [255, 255, 255, 240 + 3]; let expected_result = vec![10]; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } } #[test] fn test_huffman_code_multiple_chars() { let mut decoder = HuffmanDecoder::new(); { let hex_buffer = [254, 1]; let expected_result = vec![b'!', b'0']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } { let hex_buffer = [(0x14 << 2) | 0x3, 248]; let expected_result = vec![b' ', b'!']; let result = decoder.decode(&hex_buffer).ok().unwrap(); assert_eq!(result, expected_result); } } /// Tests that if we find the EOS symbol in the stream, we consider it an /// error. #[test] fn test_eos_is_error() { let mut decoder = HuffmanDecoder::new(); { // The EOS is an all-ones pattern of length 30 let hex_buffer = [0xFF, 0xFF, 0xFF, 0xFF]; let result = decoder.decode(&hex_buffer); assert_eq!( HuffmanDecoderError::EOSInString, match result { Err(e) => e, _ => panic!("Expected error due to EOS symbol in string"), } ); } { // Full EOS after a valid character. let hex_buffer = [0x3F, 0xFF, 0xFF, 0xFF, 0xFF]; let result = decoder.decode(&hex_buffer); assert_eq!( HuffmanDecoderError::EOSInString, match result { Err(e) => e, _ => panic!("Expected error due to EOS symbol in string"), } ); } } /// Tests that when there are 7 or less padding bits, the string is /// correctly decoded. #[test] fn test_short_padding_okay() { let mut decoder = HuffmanDecoder::new(); let hex_buffer = [0x3F]; let result = decoder.decode(&hex_buffer); assert_eq!(b"o".to_vec(), result.ok().unwrap()); } /// Tests that when there are more than 7 padding bits, we get an error. #[test] fn test_padding_too_long() { let mut decoder = HuffmanDecoder::new(); let hex_buffer = [0x3F, 0xFF]; let result = decoder.decode(&hex_buffer); assert_eq!( HuffmanDecoderError::PaddingTooLarge, match result { Err(e) => e, _ => panic!("Expected `PaddingTooLarge` error"), } ); } /// Tests that when there is a certain number of padding bits that deviate /// from the most significant bits of the EOS symbol, we get an error. #[test] fn test_padding_invalid() { let mut decoder = HuffmanDecoder::new(); { let hex_buffer = [0x3E]; let result = decoder.decode(&hex_buffer); assert_eq!( HuffmanDecoderError::InvalidPadding, match result { Err(e) => e, _ => panic!("Expected `InvalidPadding` error"), } ); } { let hex_buffer = [254, 0]; let result = decoder.decode(&hex_buffer); assert_eq!( HuffmanDecoderError::InvalidPadding, match result { Err(e) => e, _ => panic!("Expected `InvalidPadding` error"), } ); } } }