use bytesbuf::BytesBuf; use std::error; use std::fmt; use std::iter::FromIterator; use std::io; use std::mem; use std::ops::{Deref, DerefMut}; use std::str; use utf8_decoder::{LossyUtf8Decoder, StrictUtf8Decoder, Utf8DecoderError}; /// A “zero copy” string buffer. /// /// See [crate documentation](index.html) for an overview. #[derive(Clone, Default, Hash, Eq, Ord)] pub struct StrBuf(BytesBuf); impl StrBuf { /// Return a new, empty, inline buffer. #[inline] pub fn new() -> Self { StrBuf(BytesBuf::new()) } /// Return a new buffer with capacity for at least (typically more than) /// the given number of bytes. /// /// ## Panics /// /// Panics if the requested capacity is greater than `std::u32::MAX` (4 gigabytes). /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// assert!(StrBuf::with_capacity(17).capacity() >= 17); /// ``` #[inline] pub fn with_capacity(capacity: usize) -> Self { StrBuf(BytesBuf::with_capacity(capacity)) } /// Converts a bytes buffer into a string buffer. /// /// This takes `O(length)` time to check that the input is well-formed in UTF-8, /// and returns `Err(_)` if it is not. /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer. /// /// If you already know for sure that a bytes buffer is well-formed in UTF-8, /// consider the `unsafe` [`from_utf8_unchecked`](#method.from_utf8_unchecked) method, /// which takes `O(1)` time, instead. /// /// ## Examples /// /// ``` /// # use zbuf::{StrBuf, BytesBuf}; /// assert!(StrBuf::from_utf8(BytesBuf::from(&b"abc"[..])).is_ok()); /// assert!(StrBuf::from_utf8(BytesBuf::from(&b"ab\x80"[..])).is_err()); /// ``` #[inline] pub fn from_utf8(bytes: BytesBuf) -> Result { match str::from_utf8(&bytes) { Ok(_) => Ok(StrBuf(bytes)), Err(error) => Err(FromUtf8Error { bytes_buf: bytes, utf8_error: error, }) } } /// Converts a bytes buffer into a string buffer without checking UTF-8 well-formedness. /// /// This takes `O(1)` time. /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer. /// /// ## Safety /// /// The given bytes buffer must be well-formed in UTF-8. /// /// ## Examples /// /// ``` /// # use zbuf::{StrBuf, BytesBuf}; /// let bytes_buf = BytesBuf::from(b"abc".as_ref()); /// let str_buf = unsafe { /// StrBuf::from_utf8_unchecked(bytes_buf) /// }; /// assert_eq!(str_buf, "abc"); /// ``` #[inline] pub unsafe fn from_utf8_unchecked(bytes: BytesBuf) -> Self { StrBuf(bytes) } /// Converts a bytes buffer into a string buffer. /// /// This takes `O(length)` time to check that the input is well-formed in UTF-8, /// and replaces invalid byte sequences (decoding errors) with the replacement character U+FFFD. /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer. /// /// If you want to handle decoding errors differently, /// consider the [`from_utf8`](#method.from_utf8) method which returns a `Result`. /// /// ## Examples /// /// ``` /// # use zbuf::{StrBuf, BytesBuf}; /// assert_eq!(StrBuf::from_utf8_lossy(BytesBuf::from(&b"abc"[..])), "abc"); /// assert_eq!(StrBuf::from_utf8_lossy(BytesBuf::from(&b"ab\x80"[..])), "ab�"); /// ``` pub fn from_utf8_lossy(bytes: BytesBuf) -> Self { let mut decoder = LossyUtf8Decoder::new(); let mut buf: StrBuf = decoder.feed(bytes).collect(); buf.extend(decoder.end()); buf } /// Converts an iterator of bytes buffers into a string buffer. /// /// This takes `O(total length)` time to check that the input is well-formed in UTF-8, /// and returns an error at the first invalid byte sequence (decoding error). /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let chunks = [ /// &[0xF0, 0x9F][..], /// &[0x8E], /// &[0x89], /// ]; /// assert_eq!(StrBuf::from_utf8_iter(&chunks).unwrap(), "🎉"); /// ``` pub fn from_utf8_iter(iter: I) -> Result where I: IntoIterator, I::Item: Into { let mut decoder = StrictUtf8Decoder::new(); let mut buf = StrBuf::new(); for item in iter { for result in decoder.feed(item.into()) { buf.push_buf(&result?) } } decoder.end()?; Ok(buf) } /// Converts an iterator of bytes buffers into a string buffer. /// /// This takes `O(total length)` time to check that the input is well-formed in UTF-8, /// and replaces invalid byte sequences (decoding errors) with the replacement character U+FFFD. /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let chunks = [ /// &[0xF0, 0x9F][..], /// &[0x8E], /// &[0x89, 0xF0, 0x9F], /// ]; /// assert_eq!(StrBuf::from_utf8_iter_lossy(&chunks), "🎉�"); /// ``` pub fn from_utf8_iter_lossy(iter: I) -> Self where I: IntoIterator, I::Item: Into { let mut decoder = LossyUtf8Decoder::new(); let mut buf = StrBuf::new(); for item in iter { buf.extend(decoder.feed(item.into())) } buf.extend(decoder.end()); buf } /// Return a shared (immutable) reference to the bytes buffer representation /// of this string buffer. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let buf = StrBuf::from("🎉").as_bytes_buf().clone(); /// assert_eq!(buf, [0xF0, 0x9F, 0x8E, 0x89]); /// ``` #[inline] pub fn as_bytes_buf(&self) -> &BytesBuf { // This return value can be cloned to obtain a bytes buffer that shares // the same heap allocation as this string buffer. // Since that clone is shared, any mutation will cause it to re-allocate. // Therefore this can not be use to make a `StrBuf` not UTF-8. &self.0 } /// Return the length of this buffer, in bytes. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// assert_eq!(StrBuf::from("🎉").len(), 4); /// ``` #[inline] pub fn len(&self) -> usize { self.0.len() } /// Return whether this buffer is empty. /// /// ## Examples /// /// ``` /// # use zbuf::BytesBuf; /// assert_eq!(BytesBuf::new().is_empty(), true); /// assert_eq!(BytesBuf::from(b"abc".as_ref()).is_empty(), false); /// ``` #[inline] pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Return the capacity of this buffer: the length to which it can grow /// without re-allocating. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// assert!(StrBuf::with_capacity(17).capacity() >= 17); /// ``` #[inline] pub fn capacity(&self) -> usize { self.0.capacity() } /// Remove the given number of bytes from the front (the start) of the buffer. /// /// This takes `O(1)` time and does not copy any heap-allocated data. /// /// ## Panics /// /// Panics if `bytes` is out of bounds or not at a `char` boundary. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.pop_front(2); /// assert_eq!(buf, "llo"); /// ``` pub fn pop_front(&mut self, bytes: usize) { let _: &str = &self[bytes..]; // Check char boundary with a nice panic message self.0.pop_front(bytes) } /// Remove the given number of bytes from the back (the end) of the buffer. /// /// This takes `O(1)` time and does not copy any heap-allocated data. /// /// ## Panics /// /// Panics if `bytes` is out of bounds or not at a `char` boundary. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.pop_back(2); /// assert_eq!(buf, "hel"); /// ``` pub fn pop_back(&mut self, bytes: usize) { let len = self.len(); match len.checked_sub(bytes) { None => panic!("tried to pop {} bytes, only {} are available", bytes, len), Some(new_len) => self.truncate(new_len) } } /// Split the buffer into two at the given index. /// /// Return a new buffer that contains bytes `[at, len)`, /// while `self` contains bytes `[0, at)`. /// /// # Panics /// /// Panics if `at` is out of bounds or not at a `char` boundary. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// let tail = buf.split_off(2); /// assert_eq!(buf, "he"); /// assert_eq!(tail, "llo"); /// ``` pub fn split_off(&mut self, at: usize) -> StrBuf { let _: &str = &self[..at]; // Check char boundary with a nice panic message StrBuf(self.0.split_off(at)) } /// This makes the buffer empty but, unless it is shared, does not change its capacity /// /// If potentially freeing memory is preferable, consider `buf = StrBuf::empty()` instead. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// assert_eq!(buf, "hello"); /// buf.clear(); /// assert_eq!(buf, ""); /// assert!(buf.capacity() > 0); /// ``` #[inline] pub fn clear(&mut self) { self.0.clear() } /// Shortens the buffer to the specified length. /// /// If `new_len` is greater than the buffer’s current length, this has no effect. /// /// ## Panics /// /// Panics if `new_len` is not at a `char` boundary. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.truncate(10); /// assert_eq!(buf, "hello"); /// buf.truncate(2); /// assert_eq!(buf, "he"); /// ``` pub fn truncate(&mut self, new_len: usize) { if new_len < self.len() { let _: &str = &self[..new_len]; // Check char boundary with a nice panic message self.0.truncate(new_len) } } /// Ensures that the buffer has capacity for at least (typically more than) /// `additional` bytes beyond its current length. /// /// This copies the data if this buffer is shared or if the existing capacity is insufficient. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from(&*"abc".repeat(10)); /// assert!(buf.capacity() < 100); /// buf.reserve(100); /// assert!(buf.capacity() >= 130); /// ``` #[inline] pub fn reserve(&mut self, additional: usize) { self.0.reserve(additional) } /// Extend this buffer by writing to its existing capacity. /// /// The closure is given a potentially-uninitialized mutable string slice, /// and returns the number of consecutive bytes written from the start of the slice. /// The buffer’s length is increased by that much. /// /// If `self.reserve(additional)` is called immediately before this method, /// the slice is at least `additional` bytes long. /// Without a `reserve` call the slice can be any length, including zero. /// /// This copies the existing data if there are other references to this buffer. /// /// ## Safety /// /// The closure must not *read* from the given slice, which may be uninitialized. /// It must initialize the `0..written` range and make it well-formed in UTF-8, /// where `written` is the return value. /// /// ## Panics /// /// Panics if the value returned by the closure is larger than the given closure’s length. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.reserve(10); /// unsafe { /// buf.write_to_uninitialized_tail(|uninitialized_str| { /// let uninitialized_bytes = as_bytes_mut(uninitialized_str); /// for byte in &mut uninitialized_bytes[..3] { /// *byte = b'!' /// } /// 3 /// }) /// } /// assert_eq!(buf, "hello!!!"); /// /// /// https://github.com/rust-lang/rust/issues/41119 /// unsafe fn as_bytes_mut(s: &mut str) -> &mut [u8] { /// ::std::mem::transmute(s) /// } /// ``` pub unsafe fn write_to_uninitialized_tail(&mut self, f: F) where F: FnOnce(&mut str) -> usize { self.0.write_to_uninitialized_tail(|uninitialized| { // Safety: the BytesBuf inside StrBuf is private, // and this module mantains UTF-8 well-formedness. let uninitialized_str = str_from_utf8_unchecked_mut(uninitialized); f(uninitialized_str) }) } /// Extend this buffer by writing to its existing capacity. /// /// The closure is given a mutable string slice /// that has been overwritten with zeros (which takes `O(n)` extra time). /// The buffer’s length is increased by the closure’s return value. /// /// If `self.reserve(additional)` is called immediately before this method, /// the slice is at least `additional` bytes long. /// Without a `reserve` call the slice can be any length, including zero. /// /// This copies the existing data if there are other references to this buffer. /// /// ## Panics /// /// Panics if the value returned by the closure is larger than the given closure’s length, /// or if it is not at a `char` boundary. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.reserve(10); /// buf.write_to_zeroed_tail(|tail| { /// let tail = unsafe { /// as_bytes_mut(tail) /// }; /// for byte in &mut tail[..3] { /// *byte = b'!' /// } /// 10 /// }); /// assert_eq!(buf, "hello!!!\0\0\0\0\0\0\0"); /// /// /// https://github.com/rust-lang/rust/issues/41119 /// unsafe fn as_bytes_mut(s: &mut str) -> &mut [u8] { /// ::std::mem::transmute(s) /// } /// ``` pub fn write_to_zeroed_tail(&mut self, f: F) where F: FnOnce(&mut str) -> usize { self.0.write_to_zeroed_tail(|tail_bytes| { // Safety: a sequence of zero bytes is well-formed UTF-8. let tail_str = unsafe { str_from_utf8_unchecked_mut(tail_bytes) }; let additional_len = f(tail_str); &tail_str[..additional_len]; // Check char boundary additional_len }) } /// Appends the given string slice onto the end of this buffer. /// /// This copies the existing data if this buffer is shared /// or if the existing capacity is insufficient. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.push_str(" world!"); /// assert_eq!(buf, "hello world!"); /// ``` #[inline] pub fn push_str(&mut self, slice: &str) { self.0.push_slice(slice.as_bytes()) } /// Appends the given character onto the end of this buffer. /// /// This copies the existing data if this buffer is shared /// or if the existing capacity is insufficient. /// /// ## Examples /// /// ``` /// # use zbuf::StrBuf; /// let mut buf = StrBuf::from("hello"); /// buf.push_char('!'); /// assert_eq!(buf, "hello!"); /// ``` #[inline] pub fn push_char(&mut self, c: char) { self.push_str(c.encode_utf8(&mut [0; 4])) } /// Appends the given string buffer onto the end of this buffer. /// /// This is similar to [`push_str`](#method.push_str), but sometimes more efficient. /// /// ## Examples /// /// This allocates only once: /// /// ``` /// # use zbuf::StrBuf; /// let string = "abc".repeat(20); /// let mut buf = StrBuf::from(&*string); /// let tail = buf.split_off(50); /// assert_eq!(buf.len(), 50); /// assert_eq!(tail.len(), 10); /// buf.push_buf(&tail); /// assert_eq!(buf, string); /// ``` #[inline] pub fn push_buf(&mut self, other: &StrBuf) { self.0.push_buf(&other.0) } } // FIXME https://github.com/rust-lang/rust/issues/41119 #[inline] unsafe fn str_from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { mem::transmute(v) } impl Deref for StrBuf { type Target = str; #[inline] fn deref(&self) -> &str { // Safety: the BytesBuf inside StrBuf is private, // and this module mantains UTF-8 well-formedness. unsafe { str::from_utf8_unchecked(&self.0) } } } /// This copies the existing data if there are other references to this buffer. impl DerefMut for StrBuf { #[inline] fn deref_mut(&mut self) -> &mut str { // Safety: the BytesBuf inside StrBuf is private, // and this module mantains UTF-8 well-formedness. unsafe { str_from_utf8_unchecked_mut(&mut self.0) } } } impl AsRef for StrBuf { #[inline] fn as_ref(&self) -> &str { self } } impl AsMut for StrBuf { #[inline] fn as_mut(&mut self) -> &mut str { self } } impl<'a> From<&'a str> for StrBuf { #[inline] fn from(slice: &'a str) -> Self { StrBuf(BytesBuf::from(slice.as_bytes())) } } impl From for BytesBuf { #[inline] fn from(buf: StrBuf) -> Self { buf.0 } } impl fmt::Debug for StrBuf { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } impl fmt::Display for StrBuf { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } impl> PartialEq for StrBuf { #[inline] fn eq(&self, other: &T) -> bool { str::eq(self, other.as_ref()) } } impl> PartialOrd for StrBuf { #[inline] fn partial_cmp(&self, other: &T) -> Option<::std::cmp::Ordering> { str::partial_cmp(self, other.as_ref()) } } impl Extend for StrBuf { #[inline] fn extend(&mut self, iter: I) where I: IntoIterator { for item in iter { self.push_char(item) } } } impl FromIterator for StrBuf { #[inline] fn from_iter(iter: I) -> Self where I: IntoIterator { let mut buf = Self::new(); buf.extend(iter); buf } } impl<'a> Extend<&'a char> for StrBuf { #[inline] fn extend(&mut self, iter: I) where I: IntoIterator { for &item in iter { self.push_char(item) } } } impl<'a> FromIterator<&'a char> for StrBuf { #[inline] fn from_iter(iter: I) -> Self where I: IntoIterator { let mut buf = Self::new(); buf.extend(iter); buf } } impl<'a> Extend<&'a str> for StrBuf { #[inline] fn extend(&mut self, iter: I) where I: IntoIterator { for item in iter { self.push_str(item) } } } impl<'a> FromIterator<&'a str> for StrBuf { #[inline] fn from_iter(iter: I) -> Self where I: IntoIterator { let mut buf = Self::new(); buf.extend(iter); buf } } impl<'a> Extend<&'a StrBuf> for StrBuf { #[inline] fn extend(&mut self, iter: I) where I: IntoIterator { for item in iter { self.push_buf(item) } } } impl<'a> FromIterator<&'a StrBuf> for StrBuf { #[inline] fn from_iter(iter: I) -> Self where I: IntoIterator { let mut buf = Self::new(); buf.extend(iter); buf } } impl Extend for StrBuf { #[inline] fn extend(&mut self, iter: I) where I: IntoIterator { for item in iter { self.push_buf(&item) } } } impl FromIterator for StrBuf { #[inline] fn from_iter(iter: I) -> Self where I: IntoIterator { let mut buf = Self::new(); buf.extend(iter); buf } } impl fmt::Write for StrBuf { #[inline] fn write_str(&mut self, s: &str) -> fmt::Result { self.push_str(s); Ok(()) } fn write_char(&mut self, c: char) -> fmt::Result { self.push_char(c); Ok(()) } } /// The error type for [`StrBuf::from_utf8`](struct.StrBuf.html#method.from_utf8). #[derive(Debug)] pub struct FromUtf8Error { bytes_buf: BytesBuf, utf8_error: str::Utf8Error, } impl FromUtf8Error { pub fn as_bytes_buf(&self) -> &BytesBuf { &self.bytes_buf } pub fn into_bytes_buf(self) -> BytesBuf { self.bytes_buf } pub fn utf8_error(&self) -> str::Utf8Error { self.utf8_error } } impl fmt::Display for FromUtf8Error { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { self.utf8_error.fmt(formatter) } } impl error::Error for FromUtf8Error { fn description(&self) -> &str { "invalid utf-8" } } impl From for io::Error { fn from(error: FromUtf8Error) -> Self { Self::new(io::ErrorKind::InvalidData, error.utf8_error()) } }