use std::io::{BufReader, Cursor, ErrorKind}; use std::string::FromUtf8Error; use utf8_bufread::BufRead; #[test] fn invalid_across_buffer() { let input = "foo💖bär€" .bytes() .enumerate() // Replacing the last byte of the heart with an invalid byte (not a continuation byte) .map(|(i, c)| if i == 6 { 21 } else { c }) .collect::>(); let mut r = BufReader::<&[u8]>::with_capacity(4, input.as_ref()); // Read first valid half of the input let s = r.read_str(); assert!(s.is_ok()); let s = s.unwrap(); // We encountered the first byte of the heart, but the remaining is not in the buffer, // so we'll only returning all the valid bytes in the buffer, ie "foo" assert_eq!(s, "foo"); // Attempt reading further in the string. As the first next byte to read is not complete // (4 bytes are expected but only one remains in the buffer), we'll try to read a codepoint // across the buffer let e = r.read_str(); // We will fail, since we replaced a bytes of the heart by an invalid one assert!(e.is_err()); let e = e.unwrap_err(); assert_eq!(e.kind(), ErrorKind::InvalidData); assert_eq!(e.leftovers(), [240, 159, 146, 21]); let e = e.into_inner_lossy().unwrap(); assert!(e.is::()); // While we have altered a codepoint, the string should still have valid bytes following. // We know this because we know our input, it is generally not know if subsequent read will // succeed // Read second valid half of the input let s = r.read_str(); assert!(s.is_ok()); let s = s.unwrap(); assert_eq!(s, "b"); let s = r.read_str(); assert!(s.is_ok()); let s = s.unwrap(); assert_eq!(s, "är"); let s = r.read_str(); assert!(s.is_ok()); let s = s.unwrap(); assert_eq!(s, "€"); } #[test] fn incomplete() { let mut r = Cursor::new(&"💖".as_bytes()[..3]); let e = r.read_str(); // We will fail, since we lack one byte for the codepoint to be complete assert!(e.is_err()); let e = e.unwrap_err(); assert_eq!(e.kind(), ErrorKind::UnexpectedEof); assert_eq!(e.leftovers(), [240, 159, 146]); assert!(e.into_inner_lossy().is_none()); }