//! A crate for quick and easy format structure definitions for use in binary file parsing. //! //! # Usage //! //! This crate should be used by invoking the provided [`format_struct`] macro like this: //! //! ```rust //! use format_struct::{format_struct, ReprByteSlice}; //! //! // Here we define a small structure. //! format_struct! { //! struct little Test { //! foo: u8, //! bar: u32, //! baz: [u8; 2], //! } //! } //! //! # pub fn main() { //! // This is the data we want to parse: //! let data = &[ //! 0x42u8, // this goes into foo //! 0x39, 0x05, 0x00, 0x00, // this goes into bar //! 0xaa, 0x55, // this goes into baz //! ][..]; //! //! // This is completely zero-cost since the implementation is just a transmute. //! let s = Test::from_byte_slice(data).unwrap(); //! //! // Each integer field access compiles to a single unaligned memory access instruction. //! assert_eq!(s.foo, 0x42); //! assert_eq!(s.bar.get(), 1337); //! assert_eq!(&s.baz, &[0xaa, 0x55]); //! # } //! ``` #![no_std] #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![deny(rust_2018_idioms)] #![deny(unreachable_pub)] #[cfg(feature = "std")] extern crate std; pub mod endian; use core::mem::MaybeUninit; use endian::FixedEndian; pub use endian::{BigEndian, Endian, LittleEndian}; /// The error type returned when a byte slice of size that is either not equal to or not a multiple /// of the target type's size is transmuted into that type. #[derive(Copy, Clone, Debug)] pub struct UnalignedSizeError; impl core::fmt::Display for UnalignedSizeError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("byte slice is not aligned to the structure's size") } } #[cfg(feature = "std")] impl std::error::Error for UnalignedSizeError {} /// The error type returned when a type is transmuted into a byte slice and the multiple of the /// slice's length and the type's size overflows `isize`. #[derive(Copy, Clone, Debug)] pub struct SliceSizeOverflowError; impl core::fmt::Display for SliceSizeOverflowError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("multiple of slice count and type size overflows isize") } } #[cfg(feature = "std")] impl std::error::Error for SliceSizeOverflowError {} /// Multiplies a size of a type with a count and returns an error in case the resulting value /// overflows [`isize`]. pub const fn safe_count_to_size(count: usize) -> Result { const MAX_SIZE: usize = isize::MAX as usize; if let Some(size) = core::mem::size_of::().checked_mul(count) { if size <= MAX_SIZE { Ok(size) } else { Err(SliceSizeOverflowError) } } else { Err(SliceSizeOverflowError) } } /// Checks that the specified size if a multiple of a type's size and returns the size divided by /// the type's size or an error if that is not the case. pub const fn safe_size_to_count(size: usize) -> Result { if size % core::mem::size_of::() == 0 { if let Some(count) = size.checked_div(core::mem::size_of::()) { Ok(count) } else { Err(UnalignedSizeError) } } else { Err(UnalignedSizeError) } } /// An **unsafe** trait for types that may be safely transmuted from and to byte slices. /// /// This trait is usually automatically implemented by the [`format_struct`] macro so there is no /// need to implement it manually. /// /// All the trait's methods could be implemented automatically but are not due to limitations of the /// Rust's generics: using `Self` in a const context (array size on our case) isn't possible in /// traits. Since the trait isn't meant to be implemented manually that is considered a non-issue. /// /// # Safety /// /// Types implementing the trait must be safe to transmute from an arbitrary byte slice of the same /// size as the type itself. The alignment for the type must be 1. pub unsafe trait ReprByteSlice: Sized { /// Transmutes an immutable byte slice reference into an immutable `Self` reference. /// /// # Errors /// /// Returns an error in case the size doesn't match the type's size. fn from_byte_slice(s: &[u8]) -> Result<&Self, UnalignedSizeError>; /// Transmutes a mutable byte slice reference into a mutable `Self` reference. /// /// # Errors /// /// Returns an error in case the size doesn't match the type's size. fn from_byte_slice_mut(s: &mut [u8]) -> Result<&mut Self, UnalignedSizeError>; /// Transmutes an immutable reference to `self` into an immutable reference to a byte slice. fn as_byte_slice(&self) -> &[u8]; /// Transmutes a mutable reference to `self` into a mutable reference to a byte slice. fn as_byte_slice_mut(&mut self) -> &mut [u8]; /// Transmutes an immutable byte slice reference into an immutable to a slice of `Self`. /// /// # Errors /// /// Returns an error in case the size isn't a multiple of the type's size. fn slice_from_byte_slice(s: &[u8]) -> Result<&[Self], UnalignedSizeError>; /// Transmutes a mutable byte slice reference into a mutable to a slice of `Self`. /// /// # Errors /// /// Returns an error in case the size isn't a multiple of the type's size. fn slice_from_byte_slice_mut(s: &mut [u8]) -> Result<&mut [Self], UnalignedSizeError>; /// Transmutes an immutable reference to a slice of `Self` into an immutable reference to a byte /// slice. /// /// # Errors /// /// Returns [`SliceSizeOverflowError`] in case the product of the slice length and the type's /// size would be larger than [`isize::MAX`]. fn slice_as_byte_slice(slice: &[Self]) -> Result<&[u8], SliceSizeOverflowError>; /// Transmutes a mutable reference to a slice of `Self` into a mutable reference to a byte /// slice. /// /// # Errors /// /// Returns [`SliceSizeOverflowError`] in case the product of the slice length and the type's /// size would be larger than [`isize::MAX`]. fn slice_as_byte_slice_mut(s: &mut [Self]) -> Result<&mut [u8], SliceSizeOverflowError>; /// Transmutes an immutable reference to a slice of [`MaybeUninit`] into an immutable /// reference to a slice of [`MaybeUninit`]. /// /// # Errors /// /// Returns an error in case the size isn't a multiple of the type's size. fn uninit_slice_from_byte_slice( bytes: &[MaybeUninit], ) -> Result<&[MaybeUninit], UnalignedSizeError>; /// Transmutes a mutable reference to a slice of [`MaybeUninit`] into a mutable reference /// to a slice of [`MaybeUninit`]. /// /// # Errors /// /// Returns an error in case the size isn't a multiple of the type's size. fn uninit_slice_from_byte_slice_mut( bytes: &mut [MaybeUninit], ) -> Result<&mut [MaybeUninit], UnalignedSizeError>; /// Transmutes an immutable reference to a slice of [`MaybeUninit`] into an immutable /// reference to a slice of [`MaybeUninit`]. /// /// # Errors /// /// Returns [`SliceSizeOverflowError`] in case the product of the slice length and the type's /// size would be larger than [`isize::MAX`]. fn uninit_slice_as_byte_slice( slice: &[MaybeUninit], ) -> Result<&[MaybeUninit], SliceSizeOverflowError>; /// Transmutes a mutable reference to a slice of [`MaybeUninit`] into a mutable reference /// to a slice of [`MaybeUninit`]. /// /// # Errors /// /// Returns [`SliceSizeOverflowError`] in case the product of the slice length and the type's /// size would be larger than [`isize::MAX`]. fn uninit_slice_as_byte_slice_mut( s: &mut [MaybeUninit], ) -> Result<&mut [MaybeUninit], SliceSizeOverflowError>; } macro_rules! define_int_wrapper { ($ty:ident, $name:ident) => { #[doc = concat!( "A type that wraps a byte array to be decoded into a `", stringify!($ty), "`.\n\n" )] /// The generic parameter represents the endianness used to decode the wrapped value. In /// case the value is expected to have fixed endianness, either [`BigEndian`] or /// [`LittleEndian`] types should be used, otherwise the [`Endian`] type. #[derive(Copy, Clone, Eq, PartialEq, Hash)] #[repr(C)] pub struct $name([u8; ($ty::BITS as usize) / 8], ::core::marker::PhantomData); impl $name { #[doc = concat!("Converts a byte array into a [`", stringify!($name), "`].")] pub const fn from_bytes(bytes: [u8; ($ty::BITS as usize) / 8]) -> Self { Self(bytes, ::core::marker::PhantomData) } #[doc = concat!("Converts a [`", stringify!($name), "`] into a byte array.")] pub const fn into_bytes(self) -> [u8; ($ty::BITS as usize) / 8] { self.0 } } $crate::format_struct!(@impl_conv $name size (($ty::BITS as usize) / 8)); impl $name { #[doc = concat!( "Constructs a [`", stringify!($name), "`] wrapper type from a `", stringify!($ty), "` value using the specified endianness." )] #[inline] pub const fn new_with_endian(value: $ty, endian: Endian) -> Self { let bytes = match endian { Endian::Little => value.to_le_bytes(), Endian::Big => value.to_be_bytes(), }; Self(bytes, ::core::marker::PhantomData) } #[doc = concat!( "Extracts a `", stringify!($ty), "` value from a [`", stringify!($name), "`] wrapper using the specified endianness." )] #[inline] pub const fn get_with_endian(self, endian: Endian) -> $ty { match endian { Endian::Little => $ty::from_le_bytes(self.0), Endian::Big => $ty::from_be_bytes(self.0), } } } impl $name { #[doc = concat!( "Constructs a [`", stringify!($name), "`] wrapper type from a `", stringify!($ty), "` value using the type's fixed endianness." )] #[inline] pub const fn new(value: $ty) -> Self { let bytes = match E::ENDIAN { Endian::Little => value.to_le_bytes(), Endian::Big => value.to_be_bytes(), }; Self(bytes, ::core::marker::PhantomData) } #[doc = concat!( "Extracts a `", stringify!($ty), "` value from a [`", stringify!($name), "`] wrapper using the type's fixed endianness." )] #[inline] pub const fn get(self) -> $ty { match E::ENDIAN { Endian::Little => $ty::from_le_bytes(self.0), Endian::Big => $ty::from_be_bytes(self.0), } } } impl ::core::default::Default for $name { fn default() -> Self { Self(Default::default(), ::core::marker::PhantomData) } } impl From<$ty> for $name { fn from(value: $ty) -> Self { Self::new(value) } } impl core::fmt::Debug for $name { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { core::fmt::Debug::fmt(&self.get(), f) } } impl core::fmt::Debug for $name { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { core::fmt::Debug::fmt(&self.get(), f) } } impl core::fmt::Debug for $name { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_tuple(stringify!($name)) .field(&self.0) .finish() } } }; } define_int_wrapper!(u16, U16); define_int_wrapper!(i16, I16); define_int_wrapper!(u32, U32); define_int_wrapper!(i32, I32); define_int_wrapper!(u64, U64); define_int_wrapper!(i64, I64); define_int_wrapper!(u128, U128); define_int_wrapper!(i128, I128); /// Defines a structure that can be transmuted from/into a byte slice for parsing/constructing binary formats in a /// zero-copy way. /// /// The macro achieves this by replacing all multibyte integers with wrapper types that are byte /// arrays internally and only allowing integer and fixed size array fields in a structure. /// /// Accepted syntax is similar to a standard structure definition in Rust with some differences: /// /// * The `struct` keyword is followed by either `little` or `big` keywords if you want fixed /// endianness or `dynamic` keyword if you want dynamic endianness. /// * Fields of the generated structure may only have documentation meta, other meta types are /// disallowed. /// /// # Examples /// /// ```rust /// # use format_struct::format_struct; /// format_struct! { /// /// A little-endian test structure. /// #[derive(Default, Clone)] /// pub struct little Test { /// /// this byte is public /// pub byte: u8, /// short: u16, /// word: i32, /// dword: i64, /// qword: u128, /// byte_arr: [u8; 16], /// } /// } /// ``` /// /// It is also possible to define multiple structures in one macro invocation: /// /// ```rust /// # use format_struct::format_struct; /// format_struct! { /// struct little Foo { /// byte: u8, /// } /// /// struct big Bar { /// a: u64, /// } /// /// pub struct little Baz { /// z: [u8; 33], /// } /// } /// ``` /// /// # Allowed field types /// /// Currently only integer types (`u8`, `u16`, `u32`, `u64`, `u128` and their signed counterparts) are allowed and /// statically sized integer arrays (`[u8; N]`). /// /// # Layout /// /// The fields in the structure are laid out in declaration order without any padding. That means that the following /// structure will take 7 bytes instead of 16 you might expect: /// /// ```rust /// # use format_struct::format_struct; /// format_struct! { /// struct little SmallStruct { /// byte: u8, /// dword: u64, /// } /// } /// ``` #[macro_export] macro_rules! format_struct { ($($(#[$m:meta])* $vis:vis struct $endian:tt $name:ident { $($(#[doc = $field_doc:literal])* $field_vis:vis $field_name:ident: $ty:tt),*, })+) => { $( #[repr(C)] $(#[$m])* $vis struct $name { $($(#[doc = $field_doc])* $field_vis $field_name: format_struct!(@wrapper_type $ty $endian)),* } impl $name { #[doc = concat!("Converts a byte array into a [`", stringify!($name), "`].")] pub const fn from_bytes(bytes: [u8; ::core::mem::size_of::()]) -> Self { unsafe { ::core::mem::transmute(bytes) } } #[doc = concat!("Converts a [`", stringify!($name), "`] into a byte array.")] pub const fn into_bytes(self) -> [u8; ::core::mem::size_of::()] { unsafe { ::core::mem::transmute(self) } } } $crate::format_struct!(@impl_conv $name size ::core::mem::size_of::<$name>()); impl ::core::fmt::Debug for $name { fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { f.debug_struct(stringify!($name)) $(.field(stringify!($field_name), &self.$field_name))* .finish() } } )+ }; (@impl_conv $name:ident$(<$gen:ident>)? size $size_expr:expr) => { unsafe impl$(<$gen>)? $crate::ReprByteSlice for $name$(<$gen>)? { fn from_byte_slice(s: &[u8]) -> ::core::result::Result<&Self, $crate::UnalignedSizeError> { let bytes: &[u8; $size_expr] = ::core::convert::TryInto::try_into(s).map_err(|_| $crate::UnalignedSizeError)?; let ptr = bytes.as_ptr() as *const Self; ::core::result::Result::Ok(unsafe { &*ptr }) } fn from_byte_slice_mut(s: &mut [u8]) -> ::core::result::Result<&mut Self, $crate::UnalignedSizeError> { let bytes: &mut [u8; $size_expr] = ::core::convert::TryInto::try_into(s).map_err(|_| $crate::UnalignedSizeError)?; let ptr = bytes.as_ptr() as *mut Self; ::core::result::Result::Ok(unsafe { &mut *ptr }) } fn as_byte_slice(&self) -> &[u8] { let data = self as *const Self as *const u8; let len = ::core::mem::size_of::(); unsafe { ::core::slice::from_raw_parts(data, len) } } fn as_byte_slice_mut(&mut self) -> &mut [u8] { let data = self as *mut Self as *mut u8; let len = ::core::mem::size_of::(); unsafe { ::core::slice::from_raw_parts_mut(data, len) } } fn slice_from_byte_slice(s: &[u8]) -> ::core::result::Result<&[Self], $crate::UnalignedSizeError> { if s.is_empty() { ::core::result::Result::Ok(&[]) } else { let size = $crate::safe_size_to_count::(s.len())?; let ptr = s.as_ptr() as *const Self; ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts(ptr, size) }) } } fn slice_from_byte_slice_mut(s: &mut [u8]) -> ::core::result::Result<&mut [Self], $crate::UnalignedSizeError> { if s.is_empty() { ::core::result::Result::Ok(&mut []) } else { let size = $crate::safe_size_to_count::(s.len())?; let ptr = s.as_mut_ptr() as *mut Self; ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts_mut(ptr, size) }) } } fn slice_as_byte_slice(slice: &[Self]) -> ::core::result::Result<&[u8], $crate::SliceSizeOverflowError> { let data = slice.as_ptr() as *const u8; let len = $crate::safe_count_to_size::(slice.len())?; ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts(data, len) }) } fn slice_as_byte_slice_mut(slice: &mut [Self]) -> ::core::result::Result<&mut [u8], $crate::SliceSizeOverflowError> { let data = slice.as_ptr() as *mut u8; let len = $crate::safe_count_to_size::(slice.len())?; ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts_mut(data, len) }) } fn uninit_slice_from_byte_slice( s: &[::core::mem::MaybeUninit] ) -> ::core::result::Result<&[::core::mem::MaybeUninit], $crate::UnalignedSizeError> { if s.is_empty() { ::core::result::Result::Ok(&[]) } else { let size = $crate::safe_size_to_count::(s.len())?; let ptr = s.as_ptr() as *const ::core::mem::MaybeUninit; ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts(ptr, size) }) } } fn uninit_slice_from_byte_slice_mut( s: &mut [::core::mem::MaybeUninit] ) -> ::core::result::Result<&mut [::core::mem::MaybeUninit], $crate::UnalignedSizeError> { if s.is_empty() { ::core::result::Result::Ok(&mut []) } else { let size = $crate::safe_size_to_count::(s.len())?; let ptr = s.as_mut_ptr() as *mut ::core::mem::MaybeUninit; ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts_mut(ptr, size) }) } } fn uninit_slice_as_byte_slice( slice: &[::core::mem::MaybeUninit] ) -> ::core::result::Result<&[::core::mem::MaybeUninit], $crate::SliceSizeOverflowError> { let data = slice.as_ptr() as *const ::core::mem::MaybeUninit; let len = ::core::mem::size_of::().checked_mul(slice.len()).expect(""); ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts(data, len) }) } fn uninit_slice_as_byte_slice_mut( slice: &mut [::core::mem::MaybeUninit] ) -> ::core::result::Result<&mut [::core::mem::MaybeUninit], $crate::SliceSizeOverflowError> { let data = slice.as_ptr() as *mut ::core::mem::MaybeUninit; let len = ::core::mem::size_of::().checked_mul(slice.len()).unwrap(); ::core::result::Result::Ok(unsafe { ::core::slice::from_raw_parts_mut(data, len) }) } } }; (@endian_type little) => {$crate::LittleEndian}; (@endian_type big) => {$crate::BigEndian}; (@endian_type dynamic) => {$crate::Endian}; (@wrapper_type [$ty:ident; $n:literal] $endian:tt) => { [$crate::format_struct!(@wrapper_type $ty $endian); $n] }; (@wrapper_type u8 $endian:tt) => {u8}; (@wrapper_type i8 $endian:tt) => {i8}; (@wrapper_type u16 $endian:tt) => {$crate::U16<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type i16 $endian:tt) => {$crate::I16<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type u32 $endian:tt) => {$crate::U32<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type i32 $endian:tt) => {$crate::I32<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type u64 $endian:tt) => {$crate::U64<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type i64 $endian:tt) => {$crate::I64<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type u128 $endian:tt) => {$crate::U128<$crate::format_struct!(@endian_type $endian)>}; (@wrapper_type i128 $endian:tt) => {$crate::I128<$crate::format_struct!(@endian_type $endian)>}; } #[cfg(test)] #[allow(unused, unreachable_pub)] mod tests { use super::*; use core::{marker::PhantomData, mem::MaybeUninit}; format_struct! { #[derive(Default, Clone)] struct little TestLe { /// this is a byte /// this is a multiline comment #[doc = "this is the third line"] byte: u8, short: u16, word: u32, dword: u64, qword: u128, byte_arr: [u8; 16], short_arr: [u16; 16], } #[derive(Default, Clone)] struct big TestBe { pub byte: u8, short: u16, word: u32, dword: u64, qword: u128, byte_arr: [u8; 16], short_arr: [u16; 16], } #[derive(Default, Clone)] struct dynamic TestDyn { byte: u8, short: u16, word: u32, dword: u64, qword: u128, byte_arr: [u8; 16], short_arr: [u16; 16], } } #[test] fn test_access_short_arr() { let mut test_le = TestLe::default(); for (i, s) in test_le.short_arr.iter_mut().enumerate() { *s = U16((i as u16).to_le_bytes(), PhantomData); } assert_eq!(test_le.short_arr[5].get(), 5); } #[test] fn test_access_u8() { let mut test = TestLe::default(); test.byte = 42; assert_eq!(test.byte, 42); } #[test] fn test_access_u16() { let mut test_le = TestLe::default(); test_le.short = U16::new(1337); assert_eq!(test_le.short.get(), 1337); assert_eq!(test_le.short.0, 1337u16.to_le_bytes()); let mut test_be = TestBe::default(); test_be.short = U16::new(1337); assert_eq!(test_be.short.get(), 1337); assert_eq!(test_be.short.0, 1337u16.to_be_bytes()); } #[test] fn test_access_u32() { let mut test_le = TestLe::default(); test_le.word = U32::new(13371337); assert_eq!(test_le.word.get(), 13371337); assert_eq!(test_le.word.0, 13371337u32.to_le_bytes()); let mut test_be = TestBe::default(); test_be.word = U32::new(13371337); assert_eq!(test_be.word.get(), 13371337); assert_eq!(test_be.word.0, 13371337u32.to_be_bytes()); } #[test] fn test_access_u64() { let mut test_le = TestLe::default(); test_le.dword = U64::new(1337133713371337); assert_eq!(test_le.dword.get(), 1337133713371337); assert_eq!(test_le.dword.0, 1337133713371337u64.to_le_bytes()); let mut test_be = TestBe::default(); test_be.dword = U64::new(1337133713371337); assert_eq!(test_be.dword.get(), 1337133713371337); assert_eq!(test_be.dword.0, 1337133713371337u64.to_be_bytes()); } #[test] fn test_access_u128() { let mut test_le = TestLe::default(); test_le.qword = U128::new(13371337133713371337133713371337); assert_eq!(test_le.qword.get(), 13371337133713371337133713371337u128); assert_eq!( test_le.qword.0, 13371337133713371337133713371337u128.to_le_bytes() ); let mut test_be = TestBe::default(); test_be.qword = U128::new(13371337133713371337133713371337u128); assert_eq!(test_be.qword.get(), 13371337133713371337133713371337); assert_eq!( test_be.qword.0, 13371337133713371337133713371337u128.to_be_bytes() ); } #[test] fn test_uninit() { let mut test = [ MaybeUninit::::uninit(), MaybeUninit::::uninit(), ]; TestLe::uninit_slice_as_byte_slice(&test[..]).unwrap()[0]; } }