use std::mem::size_of; use std::fmt::{ Debug, Formatter, Result as FmtResult }; use std::io::Cursor; use std::iter::{ self, FromIterator }; use std::collections::{ HashMap, HashSet, BTreeMap, BTreeSet }; use serde::{ ser::{ Serialize, Serializer }, de::{ Deserialize, Deserializer, Visitor, Error as DeError }, }; use serde_derive::{ Serialize, Deserialize }; use quickcheck::{ Arbitrary, TestResult, Gen }; use quickcheck_macros::quickcheck; use neodyn_xc::{ Value, to_value, to_string, to_bytes, to_writer, from_value, from_value_ref, from_str, from_bytes, from_reader, error::{ Error, Result, ResultExt }, span::Location, }; /// Helper for generating ratios. #[derive(Debug, Clone, Copy)] struct Ratio(f32); impl Ratio { fn as_f32(&self) -> f32 { self.0 } } impl Arbitrary for Ratio { fn arbitrary(g: &mut Gen) -> Self { Ratio(u32::arbitrary(g) as f32 / u32::MAX as f32) } fn shrink(&self) -> Box> { Box::new(self.0.shrink().map(Ratio)) } } /// Generates a not-`NaN` float. It may still be infinite. fn not_nan_float(g: &mut Gen) -> f64 { loop { let x = f64::arbitrary(g); if !x.is_nan() { break x; } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] struct NaiveDate { year: i32, // negative for B.C. dates month: u8, day: u16, } impl Arbitrary for NaiveDate { fn arbitrary(g: &mut Gen) -> Self { NaiveDate { year: i32::arbitrary(g), month: u8::arbitrary(g), day: u16::arbitrary(g), } } } #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] enum ContactInfo { /// There is a contact but you're not allowed to see it. Secret, /// Something that can't be categorized as either a phone # or an email. Other(String), /// Country code + Line Number Phone(u32, String), /// Email address with the name of the responsible person. Email { address: String, name: String, }, } impl Arbitrary for ContactInfo { fn arbitrary(g: &mut Gen) -> Self { // I know, I know, bias and stuff. // But `Gen::gen_range()` has been made private, // and this is not a crypto application anyway. // So this naive, module-based way is good enough. match usize::arbitrary(g) % 4 { 0 => ContactInfo::Secret, 1 => ContactInfo::Other(String::arbitrary(g)), 2 => ContactInfo::Phone(u32::arbitrary(g), String::arbitrary(g)), 3 => ContactInfo::Email { address: String::arbitrary(g), name: String::arbitrary(g), }, _ => unreachable!("remainder mod 4 is outside of 0..4 ?!") } } } /// A mock type for testing de/serialization of various types. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] struct Department { guid: u64, name: Option, established: NaiveDate, employee_ids: Vec, contacts: Vec, geolocation_lng_lat: [f64; 2], } impl Department { /// Returns a (somewhat) realistic instance. fn sensible_example() -> Self { Department { guid: 1337, name: String::from("Department of Redundancy Department").into(), established: NaiveDate { year: 1984, month: 4, day: 2, }, employee_ids: vec![ 256, 628, 142, ], contacts: vec![ ContactInfo::Email { address: String::from("redundancy@redundancy.net"), name: String::from("Sam Secretary"), }, ContactInfo::Secret, ContactInfo::Phone(39, String::from("1234567890")), ContactInfo::Other(String::from("Carrier Pigeon")), ], geolocation_lng_lat: [ 10.9929546, 49.436009, ], } } } impl Arbitrary for Department { fn arbitrary(g: &mut Gen) -> Self { Department { guid: Arbitrary::arbitrary(g), name: Arbitrary::arbitrary(g), established: Arbitrary::arbitrary(g), employee_ids: Arbitrary::arbitrary(g), contacts: Arbitrary::arbitrary(g), geolocation_lng_lat: [ not_nan_float(g), not_nan_float(g), ], } } } #[test] fn serialize_to_value_and_string() -> Result<()> { let department = Department::sensible_example(); let value = to_value(&department)?; let dept_string = to_string(&department, " ")?; let value_string = to_string(&value, " ")?; let expected_dept_string = r#" { "guid": 1337, "name": ?"Department of Redundancy Department", "established": { "year": +1984, "month": 4, "day": 2, }, "employee_ids": [ 256, 628, 142, ], "contacts": [ { "email": { "address": "redundancy@redundancy.net", "name": "Sam Secretary", } }, "secret", { "phone": [ 39, "1234567890", ] }, { "other": "Carrier Pigeon", }, ], "geolocation_lng_lat": [ +10.9929546, +49.436009, ], } "#.trim(); let expected_value_string = r#" { "contacts": [ { "email": { "address": "redundancy@redundancy.net", "name": "Sam Secretary", }, }, "secret", { "phone": [ 39, "1234567890", ], }, { "other": "Carrier Pigeon", }, ], "employee_ids": [ 256, 628, 142, ], "established": { "day": 2, "month": 4, "year": +1984, }, "geolocation_lng_lat": [ +10.9929546, +49.436009, ], "guid": 1337, "name": ?"Department of Redundancy Department", } "#.trim(); assert_eq!(dept_string, expected_dept_string); assert_eq!(value_string, expected_value_string); let dept_copy: Department = from_value(value.clone())?; let value_copy: Value = from_value(value.clone())?; assert_eq!(department, dept_copy); assert_eq!(value, value_copy); Ok(()) } #[test] fn tricky_floating_point() -> Result<()> { // Indentation should not affect how floating-point values are printed. let indents = &[None, Some(""), Some(" ")]; let cases = &[ // Ensure that integer-valued floats are still printed // with a decimal point followed by one zero. (0.0, "+0.0"), (-0.0, "-0.0"), (1.0, "+1.0"), (-2.0, "-2.0"), (-4e20, "-400000000000000000000.0"), (7e22, "+70000000000000000000000.0"), // However, non-integers should still keep all decimal digits. (1000000000.0001, "+1000000000.0001"), (-99999.99999999, "-99999.99999999"), (1000080000.0003, "+1000080000.0003"), (-69897.34995361, "-69897.34995361"), // Infinities, which are considered integers by the internal test, // should not be affected (i.e. they should not print any decimals). (f64::INFINITY, "+inf"), (f64::NEG_INFINITY, "-inf"), // `NaN` should be serialized as `null`. (f64::NAN, "null"), ]; for &indent in indents { for &(f, s) in cases { assert_eq!(to_string(&f, indent)?, s); } } Ok(()) } /// Serializing and deserializing a `Value` to/from another `Value`, /// respsectively, should produce an exact copy, with the intermediate /// value also being equal to both the initial and the resulting value. #[quickcheck] fn qc_value_round_trip_through_value(value: Value) -> Result<()> { let tmp = to_value(&value)?; let copy: Value = from_value(tmp.clone())?; assert_eq!(value, tmp); assert_eq!(tmp, copy); assert_eq!(value, copy); Ok(()) } /// Deserializing from a borrowed value should yield an exact copy as well. #[quickcheck] fn qc_round_trip_through_value_ref(value: Value) -> Result<()> { let copy: Value = from_value_ref(&value)?; assert_eq!(value, copy); Ok(()) } /// Deserializing borrowed strings and bytes from a &Value should work. #[test] fn borrowed_str_and_bytes_from_value_ref() -> Result<()> { let orig_str = "hello borrowed"; let orig_bytes = [0xff, 0x00, 0x99, 0xab, 0x13, 0x37]; let val_str = Value::String(orig_str.to_string()); let val_bytes = Value::Blob(orig_bytes.to_vec()); let copy_str: &str = from_value_ref(&val_str)?; let copy_bytes: &[u8] = from_value_ref(&val_bytes)?; assert_eq!(orig_str, copy_str); assert_eq!(orig_bytes, copy_bytes); Ok(()) } #[quickcheck] fn qc_value_round_trip_through_text_compact(value: Value) -> Result<()> { let string = to_string(&value, None)?; let copy: Value = from_str(&string)?; assert_eq!(value, copy); Ok(()) } #[quickcheck] fn qc_value_round_trip_through_text_pretty(value: Value) -> Result<()> { let string = to_string(&value, Some(" "))?; let copy: Value = from_str(&string)?; assert_eq!(value, copy); Ok(()) } #[quickcheck] fn qc_value_round_trip_through_binary(value: Value) -> Result<()> { let bytes = to_bytes(&value)?; let copy_slice: Value = from_bytes(&bytes)?; let copy_stream: Value = from_reader(Cursor::new(&bytes))?; assert_eq!(copy_slice, value); assert_eq!(copy_stream, value); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_value(department: Department) -> Result<()> { let value = to_value(&department)?; let copy_ref: Department = from_value_ref(&value)?; let copy: Department = from_value(value)?; assert_eq!(department, copy_ref); assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_text_compact(department: Department) -> Result<()> { let string = to_string(&department, None)?; let copy: Department = from_str(&string)?; assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_text_pretty(department: Department) -> Result<()> { let string = to_string(&department, Some(" "))?; let copy: Department = from_str(&string)?; assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_text_compact_then_value( department: Department ) -> Result<()> { let string = to_string(&department, None)?; let value: Value = string.parse()?; let copy_ref: Department = from_value_ref(&value)?; let copy: Department = from_value(value)?; assert_eq!(department, copy_ref); assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_text_pretty_then_value( department: Department ) -> Result<()> { let string = to_string(&department, Some(" "))?; let value: Value = string.parse()?; let copy_ref: Department = from_value_ref(&value)?; let copy: Department = from_value(value)?; assert_eq!(department, copy_ref); assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_value_then_text_compact( department: Department ) -> Result<()> { let value = to_value(&department)?; let string = to_string(&value, None)?; let copy: Department = from_str(&string)?; assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_value_then_text_pretty( department: Department ) -> Result<()> { let value = to_value(&department)?; let string = to_string(&value, Some(" "))?; let copy: Department = from_str(&string)?; assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_binary(department: Department) -> Result<()> { let bytes = to_bytes(&department)?; let copy_slice: Department = from_bytes(&bytes)?; let copy_stream: Department = from_reader(Cursor::new(&bytes))?; assert_eq!(copy_slice, department); assert_eq!(copy_stream, department); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_binary_slice_then_value( department: Department ) -> Result<()> { let bytes = to_bytes(&department)?; let value: Value = from_bytes(&bytes)?; let copy_ref: Department = from_value_ref(&value)?; let copy: Department = from_value(value)?; assert_eq!(department, copy_ref); assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_binary_stream_then_value( department: Department ) -> Result<()> { let reader = { let mut buf = Vec::with_capacity(1024 * 1024); to_writer(&mut buf, &department)?; buf.shrink_to_fit(); Cursor::new(buf) }; let value: Value = from_reader(reader)?; let copy_ref: Department = from_value_ref(&value)?; let copy: Department = from_value(value)?; assert_eq!(department, copy_ref); assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_value_then_binary_slice( department: Department ) -> Result<()> { let value = to_value(&department)?; let bytes = to_bytes(&value)?; let copy: Department = from_bytes(&bytes)?; assert_eq!(department, copy); Ok(()) } #[quickcheck] fn qc_typed_round_trip_through_value_then_binary_stream( department: Department ) -> Result<()> { let value = to_value(&department)?; let reader = { let mut buf = Vec::with_capacity(1024 * 1024); to_writer(&mut buf, &value)?; buf.shrink_to_fit(); Cursor::new(buf) }; let copy: Department = from_reader(reader)?; assert_eq!(department, copy); Ok(()) } /// The text representation was in a sense "lossy" with regards to `f32`: it /// wouldn't exactly reproduce the literal representation of an `f32` because /// everything was handled using an `f64` internally. However, this only meant /// that the text representation would produce spurious significant digits, and /// converting the resulting string back to an `f32` should still have exactly /// preserved the value. /// /// However, because human-readability is important for the textual format, /// we changed the serializer and now it serializes `f32` without spurious /// significant digits. We still include this test to ensure nothing funny /// or unexpected happened (or will happen in the future). #[quickcheck] fn qc_f32_round_trip_through_text(f: f32) -> Result { // `NaN` becomes `null` so don't expect that to work if f.is_nan() { return Ok(TestResult::discard()); } let string = to_string(&f, None)?; let copy: f32 = from_str(&string)?; Ok(TestResult::from_bool(f == copy)) } /// Test for round-tripping an f32 through `Value`, just for symmetry. #[quickcheck] fn qc_f32_round_trip_through_value(f: f32) -> Result { if f.is_nan() { return Ok(TestResult::discard()); } let value = to_value(&f)?; let copy_ref: f32 = from_value_ref(&value)?; let copy: f32 = from_value(value)?; Ok(TestResult::from_bool(f == copy_ref && f == copy)) } #[quickcheck] fn qc_f32_round_trip_through_binary(f: f32) -> Result { if f.is_nan() { return Ok(TestResult::discard()); } let bytes = to_bytes(&f)?; let copy_slice: f32 = from_bytes(&bytes)?; let copy_stream: f32 = from_reader(Cursor::new(&bytes))?; assert_eq!(copy_slice, f); assert_eq!(copy_stream, f); Ok(TestResult::passed()) } #[test] fn basic_deserialize_text_good() -> Result<()> { let cases = &[ ("null", Value::Null), ("?null", Value::Opt(Box::new(Value::Null))), ("??null", Value::Opt(Value::Opt(Value::Null.into()).into())), ("true", Value::Bool(true)), (" false", Value::Bool(false)), (" ?true ", Value::Opt(Box::new(Value::Bool(true)))), ("#f0e0216500#", Value::Blob(vec![0xf0, 0xe0, 0x21, 0x65, 0x00])), (" ? #a0 0a#", Value::Opt(Box::new(Value::Blob(vec![0xa0, 0x0a])))), ]; for (string, expected) in cases { let actual: Value = string.parse()?; assert_eq!(&actual, expected); } Ok(()) } #[test] fn basic_deserialize_text_bad() { let cases = &[ (1, 3, " ❤", "unexpected character `❤`"), (2, 4, " \n ", "unexpected end of input"), (1, 2, "?", "unexpected end of input"), (1, 2, " ' ", "unexpected character `'`"), (1, 1, "foobar", "unexpected word `foobar`"), (3, 4, "\n\n#01w2#", "unexpected character `w`, expected hex digits"), (1, 5, "#012z#", "unexpected character `z`, expected hex digits"), (1, 5, "#012 3#", "unexpected character ` `, expected hex digits"), (1, 4, "#012#", "odd number of hex digits in blob"), (1, 5, "#abc", "unexpected end of input"), (1, 6, "#abab", "unexpected end of input"), (2, 3, "#abadf00f# \n #cafebabe#", "unexpected garbage, expected end of input"), (2, 1, "\r", "unexpected end of input"), (2, 1, "\r\n", "unexpected end of input"), (3, 5, "\r\n\u{85} ", "unexpected end of input"), ]; for &(line, column, string, message) in cases { let result: Result = string.parse(); let err = result.expect_err(string); let err_str = err.to_string(); let location = Location { line, column }; assert_eq!(err.context::(), Some(&location), "{}", err_str); assert!(err_str.contains(message), "actual: {}, expected: {}", err_str, message); assert!(err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location); } } #[test] fn deserialize_unsigned_number_good() -> Result<()> { let int_cases = &[ ("0", 0), ("08", 8), ("00", 0), (" 010", 10), ("010 ", 10), ("999", 999), ]; for &(string, expected) in int_cases { let actual: u32 = from_str(string)?; assert_eq!(actual, expected); } let float_cases: &[(&str, f64)] = &[ ("0.0", 0.0), (".0", 0.0), ("0.", 0.0), (" .33", 0.33), ("1337. ", 1337.0), ("99.999", 99.999), (" 0.5", 0.5), (" 6.81729", 6.81729), ("3.1415927", 3.1415927), ("2.71828 ", 2.71828), (" 0137.035999 ", 137.035999), (" 0.007297353 ", 0.007297353), ("inf", f64::INFINITY), (" inf ", f64::INFINITY), ]; for &(string, expected_f64) in float_cases { let expected_f32 = expected_f64 as f32; let actual_f32: f32 = from_str(string)?; let actual_f64: f64 = from_str(string)?; // Parsing should preserve the numbers as well as possible, and // by this I mean agreement with the Rust literal form, rounded. assert_eq!(actual_f32, expected_f32); assert_eq!(actual_f64, expected_f64); // Conversion from f64 to f32 loses precision. // However, the numbers should still be close. // We have to store the test cases in f64, though, // because parsing an f32 and converting the // result to f64 need not yield the exact same // value as directly parsing into an f64 would. // We must also check infinities, preventively, // since `inf - inf` is NaN. assert!( ( actual_f32.is_infinite() && actual_f64.is_infinite() ) || ( f64::abs(f64::from(actual_f32) - actual_f64) <= f64::from(f32::EPSILON) * actual_f64 ), "{}_f32 vs {}_f64", actual_f32, actual_f64 ); } Ok(()) } #[test] fn deserialize_unsigned_number_bad() { let cases = &[ (1, 1, "²⁷⁵⁰", "invalid digit found in string"), (1, 1, "൵", "invalid digit found in string"), // U+0D75 MALAYALAM FRACTION THREE QUARTERS (1, 4, "123abc", "expected Unicode word boundary or punctuation"), (1, 1, "99999999999999999999999", "number too large to fit in target type"), // overflows internal `u64` repr ]; for &(line, column, string, message) in cases { let err = from_str::(string).unwrap_err(); let err_str = err.to_string(); let location = Location { line, column }; assert_eq!(err.context::(), Some(&location), "{}", err_str); assert!(err_str.contains(message), "actual: {}, expected: {}", err_str, message); assert!(err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location); } } #[test] fn deserialize_signed_number_good() -> Result<()> { let cases = &[ // First, floats ("+0.0", Value::from(0.0_f64)), ("+000.000", Value::from(0.0_f64)), ("-000.00", Value::from(0.0_f64)), (" -inf", Value::from(f64::NEG_INFINITY)), (" +inf ", Value::from(f64::INFINITY)), ("+0099.87654321 ", Value::from(99.87654321_f64)), (" -162456.23962", Value::from(-162456.23962_f64)), (" +.05", Value::from(0.05_f64)), (" -60.", Value::from(-60.0_f64)), ("-8.3", Value::from(-8.3_f64)), ("-8.", Value::from(-8.0_f64)), ("-.3", Value::from(-0.3_f64)), // Then, integers (" +000", Value::from(0_i64)), ("-000", Value::from(0_i64)), ("+0080 ", Value::from(80_i64)), ("-0080", Value::from(-80_i64)), (" -649", Value::from(-649_i64)), (" +1947562 ", Value::from(1947562_i64)), ("+1", Value::from(1_i64)), ]; for &(string, ref expected) in cases { let actual: Value = string.parse()?; assert_eq!(&actual, expected); } Ok(()) } #[test] fn deserialize_signed_number_bad() { let cases = &[ (2, 4, "\n +INF ", "unexpected character `I`"), (2, 4, "\n -INF ", "unexpected character `I`"), (1, 3, " + inf ", "unexpected character ` `"), (1, 3, " - inf ", "unexpected character ` `"), (1, 2, "-infinity", "unexpected character `i`"), (1, 2, "+infinity", "unexpected character `i`"), (1, 2, "+nan", "unexpected character `n`"), (1, 2, "-nan", "unexpected character `n`"), (1, 3, "+1foo", "expected Unicode word boundary or punctuation"), (3, 1, "\r\n\r\n+.", "invalid float literal"), (1, 2, "+", "unexpected end of input, expected decimal digits or 'inf'"), (1, 2, "-", "unexpected end of input, expected decimal digits or 'inf'"), (1, 5, "+1.2.3", "unexpected garbage, expected end of input"), (1, 1, ".", "failed to parse floating-point"), ]; for &(line, column, string, message) in cases { let err = from_str::(string).unwrap_err(); let err_str = err.to_string(); let location = Location { line, column }; assert_eq!(err.context::(), Some(&location), "{}", err_str); assert!(err_str.contains(message), "actual: {}, expected: {}", err_str, message); assert!(err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location); } } #[test] fn deserialize_string_good() -> Result<()> { let cases = &[ (r#""""#, ""), (r#""foo bar""#, "foo bar"), (r#""a\nb""#, "a\nb"), (r#""\n\rnewline" "#, "\n\rnewline"), (r#" "tab\t" "#, "tab\t"), (r#"" \u{5b57}""#, " 字"), (r#""more text \u{006F22}\u{a} qux \u{0009}""#, "more text 漢\n qux \t"), (r#""\'\"\'\\""#, r#"'"'\"#), (r#""🦀👍""#, "\u{1F980}\u{1F44D}"), (r#""\u{1F980}\u{1f44D}""#, "🦀👍"), (r#""{""#, "{"), (r#""}{""#, "}{"), (r#""}{ }""#, "}{ }"), ]; for &(escaped, expected) in cases { let actual: String = from_str(escaped) .chain(|| format!("escaped: {}", escaped))?; assert_eq!(actual, expected, "escaped: {}", escaped); } Ok(()) } #[test] fn deserialize_string_bad() { let cases = &[ (1, 2, r#"""#, "unexpected end of input in string"), (1, 3, r#""\"#, "unexpected end of input in escape sequence"), (1, 3, r#""\q""#, "unexpected character `q`"), (1, 3, r#""\{""#, "unexpected character `{`"), (1, 4, r#""\u61""#, "unexpected character `6`, expected '{'"), (1, 4, r#""\u"#, "unexpected end of input in Unicode escape, expected '{'"), (1, 5, r#""\u{"#, "unexpected end of input in Unicode escape, expected '}'"), (1, 5, r#""\u{z}""#, "unexpected character `z`, expected hex digits"), (1, 5, r#""\u{fffffffffff}""#, "invalid Unicode escape"), (1, 5, r#""\u{DCBA}""#, "invalid Unicode code point: U+DCBA") ]; for &(line, column, escaped, message) in cases { let err = from_str::(escaped).unwrap_err(); let err_str = err.to_string(); let location = Location { line, column }; assert_eq!(err.context::(), Some(&location), "{}", err_str); assert!(err_str.contains(message), "actual: {}, expected: {}", err_str, message); assert!(err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location); } } #[test] fn deserialize_option_good_value() -> Result<()> { let opt_1: Option = from_value(Value::Null)?; assert_eq!(opt_1, None); let opt_1_ref: Option = from_value_ref(&Value::Null)?; assert_eq!(opt_1_ref, None); let opt_2: Option = from_value( Value::Opt(Box::new(Value::Bool(true))) )?; assert_eq!(opt_2, Some(true)); // Option-of-Option should not confuse the deserializer let opt_3: Option> = from_value(Value::Null)?; assert_eq!(opt_3, None); let opt_3_ref: Option> = from_value_ref(&Value::Null)?; assert_eq!(opt_3_ref, None); // Option-of-Option should *still* not confuse the deserializer let opt_4: Option> = from_value( Value::Opt(Box::new(Value::Null)) )?; assert_eq!(opt_4, Some(None)); let opt_4_ref: Option> = from_value_ref( &Value::Opt(Box::new(Value::Null)) )?; assert_eq!(opt_4_ref, Some(None)); let opt_5: Option> = from_value( Value::Opt(Box::new(Value::Opt(Box::new(Value::Bool(false))))) )?; assert_eq!(opt_5, Some(Some(false))); let opt_5_ref: Option> = from_value_ref( &Value::Opt(Box::new(Value::Opt(Box::new(Value::Int(-23765))))) )?; assert_eq!(opt_5_ref, Some(Some(-23765))); // Optional is also a recursive type, but previously, // it wasn't properly parsed in some cases // (mutable state is *hard!*) let opt_6: Vec>> = from_value( Value::Array(vec![ Value::Opt(Box::new(Value::Array(vec![]))), Value::Opt(Box::new(Value::Array(vec![Value::Null]))), ]) )?; assert_eq!(opt_6, vec![Some(vec![]), Some(vec![()])]); let opt_6_ref: Vec>> = from_value_ref( &Value::Array(vec![ Value::Opt(Box::new(Value::Array(vec![Value::Null]))), Value::Null, Value::Opt(Box::new(Value::Array(Vec::new()))), ]) )?; assert_eq!(opt_6_ref, vec![Some(vec![()]), None, Some(vec![])]); // Try the same with maps too, just for good measure let opt_7: BTreeMap, Option>> = from_value( Value::Map(BTreeMap::from_iter(vec![ ( Value::Null, Value::Opt(Box::new( Value::Map(BTreeMap::from_iter(vec![ (Value::Bool(false), Value::Uint(1337)), ])) )) ), ( Value::Opt(Box::new(Value::Null)), Value::Null ), ])) )?; assert_eq!(opt_7, BTreeMap::from_iter(vec![ ( None, Some(BTreeMap::from_iter(vec![ (false, Value::Uint(1337)), ])) ), ( Some(()), None ), ])); let val_7 = Value::Map(BTreeMap::from_iter(vec![ ( Value::Null, Value::Array(vec![Value::Null]) ), ( Value::Opt(Box::new(Value::from("key"))), Value::Array(vec![ Value::Opt(Box::new(Value::Null)) ]) ), ])); let opt_7_ref: HashMap, HashSet>> = from_value_ref(&val_7)?; assert_eq!(opt_7_ref, HashMap::from_iter(vec![ (None, HashSet::from_iter(vec![None])), (Some("key"), HashSet::from_iter(vec![Some(())])), ])); Ok(()) } #[test] fn deserialize_option_good_text() -> Result<()> { let opt_1: Option = from_str("null")?; assert_eq!(opt_1, None); let opt_2: Option = from_str("?true")?; assert_eq!(opt_2, Some(true)); // Option-of-Option should not confuse the deserializer let opt_3: Option> = from_str("null")?; assert_eq!(opt_3, None); // Option-of-Option should *still* not confuse the deserializer let opt_4: Option> = from_str("?null")?; assert_eq!(opt_4, Some(None)); let opt_5: Option> = from_str("??false")?; assert_eq!(opt_5, Some(Some(false))); // Optional is also a recursive type, but previously, // it wasn't properly parsed in some cases // (mutable state is *hard!*) let opt_6: Vec>> = from_str("[?[], ?[null]]")?; assert_eq!(opt_6, vec![Some(vec![]), Some(vec![()])]); // Try the same with maps too, just for good measure let opt_7: BTreeMap, Option>> = from_str("{null: ?{false: 1337}, ?null: null}")?; assert_eq!(opt_7, BTreeMap::from_iter(vec![ ( None, Some(BTreeMap::from_iter(vec![ (false, Value::Uint(1337)), ])) ), ( Some(()), None ), ])); Ok(()) } #[test] fn deserialize_option_good_binary() -> Result<()> { fn test_case(bytes: &[u8], expected: T) -> Result<()> where T: Debug + PartialEq + for<'de> Deserialize<'de> { let value_slice: T = from_bytes(bytes)?; let value_stream: T = from_reader(Cursor::new(bytes))?; assert_eq!(value_slice, expected); assert_eq!(value_stream, expected); Ok(()) } test_case::>(&[0b000_001_00], None)?; test_case::>( &[ 0b000_001_01, 0b000_001_11, ], Some(true) )?; // Option-of-Option should not confuse the deserializer test_case::>>( &[0b000_001_00], None )?; // Option-of-Option should *still* not confuse the deserializer test_case::>>( &[ 0b000_001_01, 0b000_001_00, ], Some(None) )?; // One last! test_case::>>( &[ 0b000_001_01, 0b000_001_01, 0b000_001_10, ], Some(Some(false)) )?; // Optional is also a recursive type, but previously, // it wasn't properly parsed in some cases // (mutable state is *hard!*) test_case::>>>( &[ 0b101_000_10, // small array, 2 items 0b000_001_01, // some 0b101_000_00, // small array, 0 items 0b000_001_01, // some 0b101_000_01, // small array, 1 item 0b000_001_00, // null (here: unit) ], vec![Some(vec![]), Some(vec![()])] )?; // Try the same with maps too, just for good measure test_case::, Option>>>( &[ 0b110_000_10, // small map, 2 entries 0b000_001_00, // key #0 = null 0b000_001_01, // val #0 = some 0b110_000_01, // small map, 1 entry 0b000_001_10, // key #0 = false 0b111_010_01, // val #0 = unsigned integer, 2 bytes 0x39, 0x05, // = 1337 0b000_001_01, // key #1 = some 0b000_001_00, // null (here: unit) 0b000_001_00, // val #1 = null (here: unit) ], BTreeMap::from_iter(vec![ ( None, Some(BTreeMap::from_iter(vec![ (false, Value::Uint(1337)), ])) ), ( Some(()), None ), ]) )?; Ok(()) } /// Implicit `Some`-wrapping used to be allowed, but it no longer is. #[test] fn deserialize_option_implicit_bad_value() { fn test_case(value: Value, message: &str) where T: Debug + for<'de> Deserialize<'de> { let res_ref: Result> = from_value_ref(&value); let err_ref = res_ref.expect_err(&format!( "deserialization should have failed with message: {}", message )); let err_str_ref = err_ref.to_string(); let res: Result> = from_value(value); let err = res.expect_err(&format!( "deserialization should have failed with message: {}", message )); let err_str = err.to_string(); assert!( err_str_ref.contains( &format!("invalid type: {}, expected option", message) ), "actual: {}", err_str_ref ); assert!( err_str.contains( &format!("invalid type: {}, expected option", message) ), "actual: {}", err_str ); } test_case::( Value::Uint(42), "integer `42`" ); test_case::( Value::String(String::from("foo")), "string \"foo\"" ); // two layers of `Option` should _still_ not confuse the deserializer! test_case::>( Value::Opt(Box::new(Value::Bool(true))), "boolean `true`" ); test_case::>( Value::Array(vec![Value::Int(-1), Value::Int(0), Value::Int(1)]), "sequence" ); test_case::>( Value::Map(BTreeMap::from_iter(vec![ (Value::String(String::from("bar")), Value::from(3.1415927)), (Value::String(String::from("qux")), Value::from(2.7182818)), ])), "map" ); // Implicit optionals should be disallowed recursively, // even inside arrays and maps let val_arr = Value::Array(vec![ Value::Array(vec![ Value::Opt(Box::new(Value::Int(32767))), Value::Opt(Box::new(Value::Int(-32768))), Value::Int(-999), Value::Opt(Box::new(Value::Int(9999))), ]), ]); let res_arr_ref: Result>>> = from_value_ref(&val_arr); let res_arr: Result>>> = from_value(val_arr); let err_arr_ref = res_arr_ref.expect_err( "Vec>> deserialized from array of array of non-optional" ); let err_arr = res_arr.expect_err( "Vec>> deserialized from array of array of non-optional" ); let err_str_arr_ref = err_arr_ref.to_string(); let err_str_arr = err_arr.to_string(); assert!(err_str_arr_ref.contains( "invalid type: integer `-999`, expected option" )); assert!(err_str_arr.contains( "invalid type: integer `-999`, expected option" )); let val_map_1 = Value::Map( BTreeMap::from_iter(vec![ (Value::Opt(Box::new(Value::Bool(false))), Value::Uint(137)), (Value::Bool(true), Value::Uint(138)), ]) ); let res_map_1_ref: Result, u64>> = from_value_ref(&val_map_1); let res_map_1: Result, u64>> = from_value(val_map_1); let err_map_1_ref = res_map_1_ref.expect_err( "Map, _> deserialized from map with non-optional key" ); let err_map_1 = res_map_1.expect_err( "Map, _> deserialized from map with non-optional key" ); let err_str_map_1_ref = err_map_1_ref.to_string(); let err_str_map_1 = err_map_1.to_string(); assert!(err_str_map_1_ref.contains( "invalid type: boolean `true`, expected option" )); assert!(err_str_map_1.contains( "invalid type: boolean `true`, expected option" )); let val_map_2 = Value::Map( BTreeMap::from_iter(vec![ (Value::Bool(true), Value::Opt(Box::new(Value::Uint(138)))), (Value::Bool(false), Value::Uint(137)), ]) ); let res_map_2_ref: Result>> = from_value_ref(&val_map_2); let res_map_2: Result>> = from_value(val_map_2); let err_map_2_ref = res_map_2_ref.expect_err( "Map<_, Option<_>> deserialized from map with non-optional value" ); let err_map_2 = res_map_2.expect_err( "Map<_, Option<_>> deserialized from map with non-optional value" ); let err_str_map_2_ref = err_map_2_ref.to_string(); let err_str_map_2 = err_map_2.to_string(); assert!(err_str_map_2_ref.contains( "invalid type: integer `137`, expected option" )); assert!(err_str_map_2.contains( "invalid type: integer `137`, expected option" )); } #[test] fn deserialize_option_implicit_bad_text() { fn test_case(string: &str, message: &str) where T: Debug + for<'de> Deserialize<'de> { let res: Result> = from_str(string); let err = res.expect_err(&format!( "deserialization should have failed with message: {}", message )); let err_str = err.to_string(); assert!( err_str.contains( &format!("unexpected {}, expected option", message) ), "actual: {}", err_str ); } test_case::( "42", "integer `42`" ); test_case::( "\"foo\"", "string \"foo\"" ); // two layers of `Option` should _still_ not confuse the deserializer! test_case::>( "?true", "boolean `true`" ); test_case::>( "[-1, +0, +1]", "sequence" ); test_case::>( "{ \"bar\": +3.1415927, \"qux\": +2.7182818 }", "map" ); // Implicit optionals should be disallowed recursively, // even inside arrays and maps let res_arr: Result>>> = from_str( "[[?+32767, ?-32768, -999, ?+9999]]" ); let err_arr = res_arr.expect_err( "Vec>> deserialized from array of array of non-optional" ); let err_str_arr = err_arr.to_string(); assert!(err_str_arr.contains( "unexpected integer `-999`, expected option" )); let res_map_1: Result, u64>> = from_str( "{?false: 137, true: 138}" ); let err_map_1 = res_map_1.expect_err( "Map, _> deserialized from map with non-optional key" ); let err_str_map_1 = err_map_1.to_string(); assert!(err_str_map_1.contains( "unexpected boolean `true`, expected option" )); let res_map_2: Result>> = from_str( "{true: ?138, false: 137}" ); let err_map_2 = res_map_2.expect_err( "Map<_, Option<_>> deserialized from map with non-optional value" ); let err_str_map_2 = err_map_2.to_string(); assert!(err_str_map_2.contains( "unexpected integer `137`, expected option" )); } #[test] fn deserialize_option_implicit_bad_binary() { fn test_case(bytes: &[u8], message: &str) where T: Debug + for<'de> Deserialize<'de> { // First, test the slice deserializer { let res: Result> = from_bytes(bytes); let err = res.expect_err(&format!( "slice deserialization should have failed with message: {}", message )); let err_str = err.to_string(); assert!( err_str.contains( &format!("invalid type: {}, expected option", message) ), "actual: {}", err_str ); } // Next, test the stream deserializer { let res: Result> = from_reader(Cursor::new(bytes)); let err = res.expect_err(&format!( "stream deserialization should have failed with message: {}", message )); let err_str = err.to_string(); assert!( err_str.contains( &format!("invalid type: {}, expected option", message), ), "actual: {}", err_str ); } } test_case::( &[ 0b111_010_10, // unsigned integer, 4 bytes 0x2a, 0x00, 0x00, 0x00, // 42 in little-endian ], "integer `42`" ); test_case::( &[ 0b000_000_00, // symbol table start, 1 bytes of length 0b0000_0001, // 1 symbol 0b100_000_11, // short string, single use, 3 bytes b'f', b'o', b'o', // string payload 0b011_000_00, // string #0 = "foo" ], // Not `string "foo"`, because the contents of the string depends on // the symbol table, so `impl From for Unexpected` can // only tell that it's a string, but not its actual value. "string" ); // two layers of `Option` should _still_ not confuse the deserializer! test_case::>( &[ 0b000_001_01, // optional present 0b000_001_11, // Boolean `true` ], "boolean `true`" ); test_case::>( &[ 0b101_000_11, // small array, 3 elements 0b111_001_11, // signed integer, 8 bytes 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -1 0b111_001_11, // signed integer, 8 bytes 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0 0b111_001_11, // signed integer, 8 bytes 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1 ], "sequence" ); test_case::>( &[ 0b000_000_00, // symbol table start, 2^00 = 1 bytes of length 0b0000_0010, // 2 symbols 0b100_000_11, // symbol #0: short string, single use, 3 bytes b'b', b'a', b'r', // symbol #0 payload 0b100_000_11, // symbol #1: short string, single use, 3 bytes b'q', b'u', b'x', // symbol #1 payload 0b110_000_10, // small map, 2 entries 0b011_000_00, // key #0 = string #0 0b111_111_10, // val #0 = floating-point number, 4 bytes 0xdb, 0x0f, 0x49, 0x40, // 3.1415927 as IEEE-754 little-endian 0b011_000_00, // key #1 = string #1 0b111_111_10, // val #1 = floating-point number, 4 bytes 0x54, 0xf8, 0x2d, 0x40, // 2.7182818 as IEEE-754 little-endian ], "map" ); // Implicit optionals should be disallowed recursively, // even inside arrays and maps let check_arr = |res: Result>>>| { let err = res.expect_err( "Vec>> deserialized from array of array of non-optional" ); let err_str = err.to_string(); assert!(err_str.contains( "invalid type: integer `-999`, expected option" )); }; let arr_bytes: &[u8] = &[ 0b101_000_01, // small array, 1 item 0b101_001_00, // small array, 4 items 0b000_001_01, // item #0: optional 0b111_001_01, // signed integer, 2 bytes 0xff, 0x7f, // +32767 0b000_001_01, // item #1: optional 0b111_001_01, // signed integer, 2 bytes 0x00, 0x80, // -32768 0b111_001_01, // item #2: signed integer, 2 bytes 0x19, 0xfc, // -999 0b000_001_01, // item #3: optional 0b111_001_01, // signed integer, 2 bytes 0x0f, 0x27, // +9999 ]; check_arr(from_bytes(arr_bytes)); check_arr(from_reader(Cursor::new(arr_bytes))); let check_map_1 = |res: Result, u64>>| { let err = res.expect_err( "Map, _> deserialized from map with non-optional key" ); let err_str = err.to_string(); assert!(err_str.contains( "invalid type: boolean `true`, expected option" )); }; let map_1_bytes: &[u8] = &[ 0b110_000_10, // small map, 2 entries 0b000_001_01, // key #0: optional 0b000_001_10, // = false 0b111_010_00, // val #0: unsigned int, 1 byte 0b1000_1001, // = 137 0b000_001_11, // key #1: true 0b111_010_00, // val #1: unsigned int, 1 byte 0b1000_1010, // = 138 ]; check_map_1(from_bytes(map_1_bytes)); check_map_1(from_reader(Cursor::new(map_1_bytes))); let check_map_2 = |res: Result>>| { let err = res.expect_err( "Map<_, Option<_>> deserialized from map with non-optional value" ); let err_str = err.to_string(); assert!(err_str.contains( "invalid type: integer `137`, expected option" )); }; let map_2_bytes: &[u8] = &[ 0b110_000_10, // small map, 2 entries 0b000_001_11, // key #0: true 0b000_001_01, // val #0: optional 0b111_010_00, // = unsigned int, 1 byte 0b1000_1010, // = 138 0b000_001_11, // key #1: false 0b111_010_00, // val #1: unsigned int, 1 byte 0b1000_1001, // = 137 ]; check_map_2(from_bytes(map_2_bytes)); check_map_2(from_reader(Cursor::new(map_2_bytes))); } #[test] fn deserialize_type_error_location() { let errors = vec![ from_str::("\r\n #abcdef#").unwrap_err(), from_str::<()>("\n }").unwrap_err(), from_str::(":").unwrap_err(), from_str::>("\r\r\r , ").unwrap_err(), from_str::(" ] ").unwrap_err(), ]; let cases = &[ (2, 3, "unexpected byte array, expected u32"), (2, 6, "unexpected character `}`, expected unit"), (1, 1, "unexpected character `:`, expected a string"), (4, 3, "unexpected character `,`, expected a sequence"), (1, 4, "unexpected character `]`, expected a Neodyn Exchange value"), ]; assert_eq!(errors.len(), cases.len()); for (&(line, column, message), error) in cases.iter().zip(errors) { let err_str = error.to_string(); let location = Location { line, column }; assert_eq!(error.context::(), Some(&location), "{}", err_str); assert!(err_str.contains(message), "actual: {}, expected: {}", err_str, message); assert!(err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location); } } #[test] fn deserialize_array_good() -> Result<()> { let value_1: Vec> = from_str("[?true, ?false, null]")?; assert_eq!(value_1, vec![Some(true), Some(false), None]); let value_2: Vec>> = from_str("[[], [ ?null,] , ]")?; assert_eq!(value_2, vec![vec![], vec![Some(())]]); let value_3: Value = "[[[[],[],],]]".parse()?; assert_eq!(value_3, Value::Array(vec![ Value::Array(vec![ Value::Array(vec![ Value::Array(Vec::new()), Value::Array(Vec::new()), ]) ]) ])); let value_4: Vec = from_str("[+1,-2,+3]")?; assert_eq!(value_4, [1, -2, 3]); let value_5: Vec = from_str("[-876,-349,+05]")?; assert_eq!(value_5, [-876, -349, 5]); let value_6: HashSet = from_str(" [+13, +37] ")?; assert_eq!(value_6, HashSet::from_iter(vec![13, 37])); let value_7: BTreeSet = from_str(" [-50, +19]")?; assert_eq!(value_7, BTreeSet::from_iter(vec![-50, 19])); let value_8: Vec = from_str("[4,6,8]")?; assert_eq!(value_8, [4, 6, 8]); let value_9: Vec = from_str("[45,90,36]")?; assert_eq!(value_9, [45, 90, 36]); let value_10: Vec = from_str("[+28.39,-44.226,+65,-.23,+3681.]")?; assert_eq!(value_10, [28.39, -44.226, 65.0, -0.23, 3681.0]); let value_11: Vec = from_str("[ -5.15, +48., ]")?; assert_eq!(value_11, [-5.15, 48.0]); Ok(()) } #[test] fn deserialize_array_bad() { let strings = [ "[", "[,", "[1", "[null,", "[+6.28,,", "[[]", "[#7777# :]", ]; let errors: Vec<_> = strings.iter() .map(|s| s.parse::().unwrap_err()) .collect(); let cases = [ (1, 2, "unexpected end of input"), (1, 2, "unexpected character `,`"), (1, 3, "unexpected end of input"), (1, 7, "unexpected end of input"), (1, 8, "unexpected character `,`"), (1, 4, "unexpected end of input"), (1, 9, "unexpected character `:`, expected ',' or ']' after value in array"), ]; assert_eq!(errors.len(), cases.len()); for (&(line, column, message), error) in cases.iter().zip(errors) { let err_str = error.to_string(); let location = Location { line, column }; assert!( err_str.contains(message), "actual: {}, expected: {}", err_str, message ); assert!( err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location ); assert_eq!(error.context::(), Some(&location), "{}", err_str); } } #[test] fn deserialize_map_good() -> Result<()> { let value_1: HashMap> = from_str("{ null: ?true, []: ?false, {}: null, }")?; assert_eq!(value_1, HashMap::from_iter(vec![ (Value::Null, Some(true)), (Value::Array(Vec::new()), Some(false)), (Value::Map(BTreeMap::new()), None), ])); let value_2: Value = "{#aa# : #bb# , #ccdd# : [true]}".parse()?; assert_eq!(value_2, Value::from_iter(vec![ (Value::Blob(vec![0xaa]), Value::Blob(vec![0xbb])), (Value::Blob(vec![0xcc, 0xdd]), Value::Array(vec![Value::Bool(true)])), ])); let value_3: Value = " {947:+36.42,-83:38817}".parse()?; assert_eq!(value_3, Value::from_iter(vec![ (Value::Uint(947), Value::from(36.42)), (Value::Int(-83), Value::Uint(38817)), ])); let value_4: BTreeMap<&'static str, BTreeMap> = from_str( r#"{"key 1":{99:true},"second|key":{},"THIRDKEY":{444:false,777:true}}"# )?; assert_eq!(value_4, BTreeMap::from_iter(vec![ ("key 1", BTreeMap::from_iter(vec![ (99, true) ])), ("second|key", BTreeMap::new()), ("THIRDKEY", BTreeMap::from_iter(vec![ (444, false), (777, true), ])), ])); Ok(()) } #[test] fn deserialize_map_bad() { let strings = [ "{", "{,", "{true", "{false,", "{?true:", "\r{1337::", "{null:,}", "{+2.7182}", "{-1.0, -2.0}", "{:4,}", "{\"foo\":\"bar\":}", ]; let errors: Vec<_> = strings.iter() .map(|s| s.parse::().unwrap_err()) .collect(); let cases = [ (1, 2, "unexpected end of input"), (1, 2, "unexpected character `,`"), (1, 6, "unexpected end of input"), (1, 7, "unexpected character `,`"), (1, 8, "unexpected end of input"), (2, 7, "unexpected character `:`"), (1, 7, "unexpected character `,`"), (1, 9, "unexpected character `}`"), (1, 6, "unexpected character `,`"), (1, 2, "unexpected character `:`"), (1, 13, "unexpected character `:`, expected ',' or '}' after value in map"), ]; assert_eq!(errors.len(), cases.len()); for (&(line, column, message), error) in cases.iter().zip(errors) { let err_str = error.to_string(); let location = Location { line, column }; assert!( err_str.contains(message), "actual: {}, expected: {}", err_str, message ); assert!( err_str.contains(&location.to_string()), "actual: {}, expected: {}", err_str, location ); assert_eq!(error.context::(), Some(&location), "{}", err_str); } } #[test] fn serialize_simple_binary_good() -> Result<()> { let cases: &[(Value, &[u8])] = &[ // Simple values with a single unique encoding (Value::Null, &[0b000_001_00]), (Value::from(Some(true)), &[0b000_001_01, 0b000_001_11]), (Value::Bool(false), &[0b000_001_10]), (Value::Bool(true), &[0b000_001_11]), // Small signed integers (Value::Int(-16), &[0b001_100_00]), (Value::Int(-1), &[0b001_111_11]), (Value::Int(0), &[0b001_000_00]), (Value::Int(1), &[0b001_000_01]), (Value::Int(15), &[0b001_011_11]), // Multibyte signed integers (Value::Int(-17), &[0b111_001_00, 0b1110_1111]), (Value::Int(16), &[0b111_001_00, 0b0001_0000]), (Value::Int(-128), &[0b111_001_00, 0b1000_0000]), (Value::Int(127), &[0b111_001_00, 0b0111_1111]), (Value::Int(-129), &[0b111_001_01, 0b0111_1111, 0b1111_1111]), (Value::Int(128), &[0b111_001_01, 0b1000_0000, 0b0000_0000]), (Value::Int(-32768), &[0b111_001_01, 0b0000_0000, 0b1000_0000]), (Value::Int(32767), &[0b111_001_01, 0b1111_1111, 0b0111_1111]), ( Value::Int(-32769), &[ 0b111_001_10, 0b1111_1111, 0b0111_1111, 0b1111_1111, 0b1111_1111, ], ), ( Value::Int(32768), &[ 0b111_001_10, 0b0000_0000, 0b1000_0000, 0b0000_0000, 0b0000_0000, ], ), ( Value::Int(-2_147_483_648), // -2^31 &[ 0b111_001_10, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b1000_0000, ], ), ( Value::Int(2_147_483_647), // 2^31 - 1 &[ 0b111_001_10, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b0111_1111, ], ), ( Value::Int(-2_147_483_649), // -2^31 - 1 &[ 0b111_001_11, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b0111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, ], ), ( Value::Int(2_147_483_648), // 2^31 &[ 0b111_001_11, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b1000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, ], ), ( Value::Int(-9_223_372_036_854_775_808), // -2^63 &[ 0b111_001_11, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b1000_0000, ], ), ( Value::Int(9_223_372_036_854_775_807), // 2^63 - 1 &[ 0b111_001_11, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b0111_1111, ], ), // Small unsigned integers (Value::Uint(0), &[0b010_000_00]), (Value::Uint(16), &[0b010_100_00]), (Value::Uint(31), &[0b010_111_11]), // Multibyte unsigned integers (Value::Uint(32), &[0b111_010_00, 0b0010_0000]), (Value::Uint(255), &[0b111_010_00, 0b1111_1111]), (Value::Uint(256), &[0b111_010_01, 0b0000_0000, 0b0000_0001]), (Value::Uint(65535), &[0b111_010_01, 0b1111_1111, 0b1111_1111]), ( Value::Uint(65536), &[ 0b111_010_10, // 4-byte unsigned integer 0b0000_0000, 0b0000_0000, 0b0000_0001, 0b0000_0000, ], ), ( Value::Uint(4_294_967_295), // 2^32 - 1 &[ 0b111_010_10, // 4-byte unsigned integer 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, ], ), ( Value::Uint(4_294_967_296), // 2^32 &[ 0b111_010_11, // 8-byte unsigned integer 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0001, 0b0000_0000, 0b0000_0000, 0b0000_0000, ], ), ( Value::Uint(18_446_744_073_709_551_615), // 2^64 - 1 &[ 0b111_010_11, // 8-byte unsigned integer 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, ], ), // Floating-point numbers (Value::from(0.0), &[0b111_111_11, // floating-point, 8-byte 0, 0, 0, 0, 0, 0, 0, 0]), (Value::from(-0.0), &[0b111_111_11, 0, 0, 0, 0, 0, 0, 0, 0x80]), (Value::from(12.5), &[0b111_111_11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40]), (Value::from(-12.5), &[0b111_111_11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0xc0]), // Check that empty strings and blobs are serialized without interning (Value::String(String::new()), &[0b000_010_00]), (Value::Blob(Vec::new()), &[0b000_010_01]), // Small-index strings. The classification of strings doesn't only // depend on their length; since strings are interned, they are all // lumped together in a symbol table at the beginning of the binary. // Strings which appear at indices that can be described using a // small unsigned integer are encoded as a small-index string. // Exactly the same is true for blobs. ( Value::from("hello bits"), &[ 0b000_000_00, // symtab start, 2^00 = 1 byte of length follows 0b000_000_01, // number of interned symbols = 1 0b100_010_10, // short string, single-use, 10 bytes // data of symbol #0 follows (read with syntax highlighting!) b'h', b'e', b'l', b'l', b'o', b' ', b'b', b'i', b't', b's', 0b011_000_00, // small-index string, symbol #0 ] ), ( Value::Blob(b"ASN.1 sucks".to_vec()), &[ 0b000_000_00, // symtab start, 2^00 = 1 byte of length follows 0b000_000_01, // number of interned symbols = 1 0b010_010_11, // short blob, single-use, 11 bytes // blob data b'A', b'S', b'N', b'.', b'1', b' ', b's', b'u', b'c', b'k', b's', 0b100_000_00, // small-index blob, symbol #0 ], ), ]; for &(ref value, expected) in cases { let actual = to_bytes(value)?; assert_eq!(actual, expected, "error serializing `{}`", value); } // Additionally, check that `NaN` is serialized as `null`. let nan64_bytes = to_bytes(&f64::NAN)?; assert_eq!(nan64_bytes, &[0b000_001_00], "f64 NaN serialized as non-null"); let nan32_bytes = to_bytes(&f32::NAN)?; assert_eq!(nan32_bytes, &[0b000_001_00], "f32 NaN serialized as non-null"); // Also check that 32-bit non-`NaN` floats are serialized correctly let f32_bytes = to_bytes(&12.5_f32)?; assert_eq!(f32_bytes, &[0b111_111_10, 0x00, 0x00, 0x48, 0x41]); Ok(()) } #[test] fn serialize_complex_binary_good() -> Result<()> { // similar to the example on the front page of messagepack let map: Value = from_str(r#"{"compact": true, "schema": 0}"#)?; let map_bytes = to_bytes(&map)?; assert_eq!(map_bytes, &[ 0b000_000_00, // symtab start, 2^00 = 1 bytes of length 0b000_000_10, // two symbols in symtab 0b100_001_11, // short string, single-use, 7 bytes 0b011_000_11, // 'c' 0b011_011_11, // 'o' 0b011_011_01, // 'm' 0b011_100_00, // 'p' 0b011_000_01, // 'a' 0b011_000_11, // 'c' 0b011_101_00, // 't' 0b100_001_10, // short string, single-use, 6 bytes 0b011_100_11, // 's' 0b011_000_11, // 'c' 0b011_010_00, // 'h' 0b011_001_01, // 'e' 0b011_011_01, // 'm' 0b011_000_01, // 'a' 0b110_000_10, // 2-element map 0b011_000_00, // string #0 0b000_001_11, // true 0b011_000_01, // string #1 0b010_000_00, // unsigned integer 0 ]); // An array with two elements containing the same data, first as a string, // then as a blob too. This should test that for the 2nd time, the blob // is correctly referred to as a blob, even though the corresponding // symbol in the symbol table should be a string. let arr1: Value = from_str(r#"["string", #73 74 72 69 6e 67#]"#)?; let arr1_bytes = to_bytes(&arr1)?; assert_eq!(arr1_bytes, &[ 0b000_000_00, // symtab start, 2^00 = 1 bytes of length 0b000_000_01, // one symbol in symtab 0b101_001_10, // short string, multiple uses, 6 bytes 0b010_000_10, // use count: small unsigned integer = 2 b's', b't', b'r', b'i', b'n', b'g', // symbol data 0b101_000_10, // small array, 2 elements, 0b011_000_00, // small-index string, symbol #0 0b100_000_00, // small-index blob, symbol #0 ]); // The same contents but in the opposite order let arr2: Value = from_str(r#"[#73 74 72 69 6e 67#, "string"]"#)?; let arr2_bytes = to_bytes(&arr2)?; assert_eq!(arr2_bytes, &[ 0b000_000_00, // symtab start, 2^00 = 1 bytes of length 0b000_000_01, // one symbol in symtab 0b101_001_10, // short string, multiple uses, 6 bytes 0b010_000_10, // use count: small unsigned integer = 2 b's', b't', b'r', b'i', b'n', b'g', // symbol data 0b101_000_10, // small array, 2 elements, 0b100_000_00, // small-index blob, symbol #0 0b011_000_00, // small-index string, symbol #0 ]); // 32 identical strings -> array and use count don't fit into a small uint let arr3 = ["a"; 32]; let arr3_bytes = to_bytes(&arr3)?; assert_eq!(arr3_bytes, [ 0b000_000_00, // symtab start, 2^00 = 1 bytes of length 0b000_000_01, // one symbol in symtab 0b101_000_01, // short string, multiple uses, 1 byte 0b111_010_00, // unsigned integer, 2^00 = 1 byte of use count 0b001_000_00, // use count = 32 0b011_000_01, // character 'a' 0b111_101_00, // array, 1 byte of length 0b001_000_00, // array length = 32 elements 0b011_000_00, // array element #00: string #0 0b011_000_00, // array element #01: string #0 0b011_000_00, // array element #02: string #0 0b011_000_00, // array element #03: string #0 0b011_000_00, // array element #04: string #0 0b011_000_00, // array element #05: string #0 0b011_000_00, // array element #06: string #0 0b011_000_00, // array element #07: string #0 0b011_000_00, // array element #08: string #0 0b011_000_00, // array element #09: string #0 0b011_000_00, // array element #10: string #0 0b011_000_00, // array element #11: string #0 0b011_000_00, // array element #12: string #0 0b011_000_00, // array element #13: string #0 0b011_000_00, // array element #14: string #0 0b011_000_00, // array element #15: string #0 0b011_000_00, // array element #16: string #0 0b011_000_00, // array element #17: string #0 0b011_000_00, // array element #18: string #0 0b011_000_00, // array element #19: string #0 0b011_000_00, // array element #20: string #0 0b011_000_00, // array element #21: string #0 0b011_000_00, // array element #22: string #0 0b011_000_00, // array element #23: string #0 0b011_000_00, // array element #24: string #0 0b011_000_00, // array element #25: string #0 0b011_000_00, // array element #26: string #0 0b011_000_00, // array element #27: string #0 0b011_000_00, // array element #28: string #0 0b011_000_00, // array element #29: string #0 0b011_000_00, // array element #30: string #0 0b011_000_00, // array element #31: string #0 ].as_ref()); // Strictly more than 255 different strings, // in order to achieve a multi-byte symbol count. let arr4: Vec<_> = (0..65535).map(|x| format!("{:04x}", x)).collect(); let arr4_bytes = to_bytes(&arr4)?; assert_eq!(arr4_bytes[..3], [0b000_000_01, 0b1111_1111, 0b1111_1111]); let arr5: Vec<_> = (0..65536).map(|x| format!("{:04x}", x)).collect(); let arr5_bytes = to_bytes(&arr5)?; assert_eq!( arr5_bytes[..5], [0b000_000_10, 0b0000_0000, 0b0000_0000, 0b0000_0001, 0b0000_0000] ); assert!(arr5_bytes[5..].len() >= 65536 * 5); for (i, chunk) in arr5_bytes[5..].chunks(5).enumerate() { if i >= 65536 { break; } // small string, single-use, 4 bytes assert_eq!( chunk[0], 0b100_001_00, "wrong length for chunk #{}", i ); assert_eq!( &chunk[1..], format!("{:04x}", i).as_bytes(), "wrong payload data for chunk #{}", i ); } // One string used strictly more than 255 times, // which is also strictly longer than 255 bytes. // Therefore, both the length and the use count require 2 bytes to store. // Also map the string into its own blob representation, so we can test // that they are interned to the same uniqued symbol. let arr6: Vec<_> = { let s: String = iter::repeat( "_").take(258).collect(); let b: Vec<_> = iter::repeat(b'_').take(258).collect(); iter::repeat((s, Value::Blob(b))).take(128).collect() // 128 * 2 = 256 }; let arr6_bytes = to_bytes(&arr6)?; assert_eq!(arr6_bytes[..8], [ 0b000_000_00, // symtab start, 2^00 = 1 byte of symtab count 0b0000_0001, // 1 symbol, 0b111_101_01, // long string, multi-use, 2^01 = 2 bytes of length 0b0000_0010, // length, low byte 0b0000_0001, // length, high byte 0b111_010_01, // use count: unsigned integer, 2^01 = 2 bytes 0b0000_0000, // use count, low byte 0b0000_0001, // use count, high byte ]); assert_eq!(&arr6_bytes[8..264], [b'_'; 256].as_ref()); // payload data let arr_7 = [-1_i8; 65537]; let arr7_bytes = to_bytes(arr_7.as_ref())?; assert_eq!(&arr7_bytes[..5], &[ 0b111_101_10, // big array, 2^0b10 = 2^2 = 4 bytes of length 0b0000_0001, // byte 0 of length (LSB) 0b0000_0000, // byte 1 of length 0b0000_0001, // byte 2 of length 0b0000_0000, // byte 3 of length (MSB) ]); assert_eq!(arr7_bytes[5..].len(), 65537); assert!(arr7_bytes[5..].iter().all(|&x| x == 0b001_111_11)); let map8: BTreeMap = (0..65538) .map(|i| (65536 + i, i % 2 != 0)) .collect(); let map8_bytes = to_bytes(&map8)?; assert_eq!(&map8_bytes[..5], &[ 0b111_110_10, // big map, 2^0b10 = 2^2 = 4 bytes of length 0b0000_0010, // byte 0 of length (LSB) 0b0000_0000, // byte 1 of length 0b0000_0001, // byte 2 of length 0b0000_0000, // byte 3 of length (MSB) ]); assert_eq!(map8_bytes[5..].len(), 6 * 65538); assert!(map8_bytes[5..] .chunks(6) .enumerate() .all(|(i, chunk)| { chunk[0] == 0b111_010_10 // type tag: u32 && chunk[1..5].iter().enumerate().all(|(j, &b)| { let key = i + 65536; b == (key >> (j * 8)) as u8 // little endian }) && // value: boolean "is odd?" chunk[5] == if i % 2 == 0 { 0b000_001_10 } else { 0b000_001_11 } })); Ok(()) } /// Tests for sequences that lie or are forgetful about their size. #[test] fn serialize_seq_binary_liar_and_forgetful() -> Result<()> { use std::result::Result; use serde::ser::{ Serialize, Serializer, SerializeSeq }; struct LiarSeq { size_hint: Option, seq: T, } impl<'a, T> Serialize for LiarSeq where T: IntoIterator + Copy, ::Item: Serialize, { fn serialize(&self, ser: S) -> Result { let mut seq = ser.serialize_seq(self.size_hint)?; for item in self.seq { seq.serialize_element(&item)?; } seq.end() } } // The actual number of elements is lower than the size hint. // In particular, the actual count can be encoded as a small integer, // whereas the size hint requires the type tag plus 2 bytes. let liar_seq_less = LiarSeq { size_hint: Some(1024), seq: &[8, 9, 10, 11], }; let liar_seq_less_bytes = to_bytes(&liar_seq_less)?; assert_eq!(liar_seq_less_bytes, &[ 0b111_101_01, // big array, 2^0b01 = 2^1 = 2 bytes of count 0b0000_0100, // array count, low byte = 4 (actual count) 0b0000_0000, // array count, high byte = 0 0b001_010_00, // small int, 8 0b001_010_01, // small int, 9 0b001_010_10, // small int, 10 0b001_010_11, // small int, 11 ]); // The actual number of elements is higher than the size hint. // However, they can be encoded using the same number of bytes. let liar_seq_more_fit = LiarSeq { size_hint: Some(0), seq: &[true, false, true, true, false], }; let liar_seq_more_fit_bytes = to_bytes(&liar_seq_more_fit)?; assert_eq!(liar_seq_more_fit_bytes, &[ 0b101_001_01, // small array, 5 elements 0b000_001_11, // true 0b000_001_10, // false 0b000_001_11, // true 0b000_001_11, // true 0b000_001_10, // false ]); // The actual number of elements is higher than the size hint, // so much that it doesn't fit into the space allocated based // on the size hint. let liar_seq_more_exceed = LiarSeq { size_hint: Some(255), seq: [None::<&str>; 256].as_ref(), }; let liar_seq_more_exceed_err = to_bytes(&liar_seq_more_exceed).unwrap_err(); assert!(liar_seq_more_exceed_err.to_string().contains( "can't encode count 256 into buffer of size 2; incorrect size hint")); // There is no size hint, so `usize::MAX` is assumed. let forgetful_seq = LiarSeq { size_hint: None, seq: [true; 16].as_ref(), }; let forgetful_seq_bytes = to_bytes(&forgetful_seq)?; assert_eq!(forgetful_seq_bytes, { let log_len_len = size_of::().trailing_zeros() as u8; let mut bytes = vec![0b111_101_00 | log_len_len]; // big array marker // length is encoded as little-endian bytes.push(16); bytes.extend(iter::repeat(0x00).take(size_of::() - 1)); // data: 16 repetitions of `true` bytes.extend(iter::repeat(0b000_001_11).take(16)); bytes }); Ok(()) } /// Tests for maps that lie or are forgetful about their size. #[test] fn serialize_map_binary_liar_and_forgetful() -> Result<()> { use std::result::Result; use serde::ser::{ Serialize, Serializer, SerializeMap }; struct LiarMap { size_hint: Option, map: T, } impl<'a, T, K, V> Serialize for LiarMap where T: IntoIterator + Copy, K: Serialize + 'a, V: Serialize + 'a, { fn serialize(&self, ser: S) -> Result { let mut map = ser.serialize_map(self.size_hint)?; for &(ref key, ref value) in self.map { map.serialize_entry(key, value)?; } map.end() } } // The actual number of elements is lower than the size hint. // In particular, the actual count can be encoded as a small integer, // whereas the size hint requires the type tag plus 2 bytes. let liar_map_less = LiarMap { size_hint: Some(1024), map: &[(12, true), (13, false), (14, true), (15, false)], }; let liar_map_less_bytes = to_bytes(&liar_map_less)?; assert_eq!(liar_map_less_bytes, &[ 0b111_110_01, // big map, 2^0b01 = 2^1 = 2 bytes of count 0b0000_0100, // map count, low byte = 4 (actual count) 0b0000_0000, // map count, high byte = 0 0b001_011_00, // small int, 12 0b000_001_11, // true 0b001_011_01, // small int, 13 0b000_001_10, // false 0b001_011_10, // small int, 14 0b000_001_11, // true 0b001_011_11, // small int, 15 0b000_001_10, // false ]); // The actual number of elements is higher than the size hint. // However, they can be encoded using the same number of bytes. let liar_map_more_fit_data: Vec<(i16, u16)> = (256..768).map(|k| (k, k as u16)).collect(); let liar_map_more_fit = LiarMap { size_hint: Some(256), map: &liar_map_more_fit_data, }; let liar_map_more_fit_bytes = to_bytes(&liar_map_more_fit)?; assert_eq!(liar_map_more_fit_bytes[..3], [ 0b111_110_01, // map, 2 bytes of length 0b0000_0000, // length, low byte 0b0000_0010, // length, high byte ]); assert_eq!(liar_map_more_fit_bytes[3..].len(), 512 * 2 * 3); assert!(liar_map_more_fit_bytes[3..] .chunks(6) .enumerate() .all(|(i, chunk)| { let n = 256 + i; let lo_byte = (n >> 0) as u8; let hi_byte = (n >> 8) as u8; let (key, val) = chunk.split_at(3); // marker (int/uint), low byte, high byte (little-endian) key == &[0b111_001_01, lo_byte, hi_byte] && val == &[0b111_010_01, lo_byte, hi_byte] })); // The actual number of elements is higher than the size hint, // so much that it doesn't fit into the space allocated based // on the size hint. let liar_map_more_exceed = LiarMap { size_hint: Some(255), map: [(None::<&str>, Some(false)); 256].as_ref(), }; let liar_map_more_exceed_err = to_bytes(&liar_map_more_exceed).unwrap_err(); assert!(liar_map_more_exceed_err.to_string().contains( "can't encode count 256 into buffer of size 2; incorrect size hint")); // There is no size hint, so `usize::MAX` is assumed. let forgetful_map = LiarMap { size_hint: None, map: &[(Some(17), None::)], }; let forgetful_map_bytes = to_bytes(&forgetful_map)?; assert_eq!(forgetful_map_bytes, { let log_len_len = size_of::().trailing_zeros() as u8; let mut bytes = vec![0b111_110_00 | log_len_len]; // big map marker // length is encoded as little-endian bytes.push(1); bytes.extend(iter::repeat(0x00).take(size_of::() - 1)); // key and value bytes.extend(&[ 0b000_001_01, // optional of... 0b111_001_00, // ...1-byte int 0b0001_0001, // 17 0b000_001_00, // null ]); bytes }); Ok(()) } #[test] fn deserialize_simple_binary_good() -> Result<()> { // Although the serializer never produces a symbol table with an empty // string or blob, we still do want to accept them for consistency, and // for interoperability with simpler serializers that do not implement // the special case optimization for empty strings and blobs. let empty_str_bytes = &[ 0b000_000_00, // symtab start, 2^00 = 1 bytes of length 0b000_000_01, // one symbol in symtab 0b100_000_00, // short string, single-use, 0 bytes 0b011_000_00, // small string #0 ]; let s_owned_slice: String = from_bytes(empty_str_bytes)?; let s_owned_stream: String = from_reader(Cursor::new(empty_str_bytes))?; let s_borrowed: &str = from_bytes(empty_str_bytes)?; assert!(s_owned_slice.is_empty()); assert!(s_owned_stream.is_empty()); assert!(s_borrowed.is_empty()); let empty_blob_bytes = &[ 0b000_000_00, // symtab start, 2^00 = 1 bytes of length 0b000_000_01, // one symbol in symtab 0b010_000_00, // short blob, single-use, 0 bytes 0b100_000_00, // small blob #0 ]; let b_owned_slice: Value = from_bytes(empty_blob_bytes)?; let b_owned_stream: Value = from_reader(Cursor::new(empty_blob_bytes))?; let b_borrowed: &[u8] = from_bytes(empty_blob_bytes)?; assert_eq!(b_owned_slice, Value::Blob(Vec::new())); assert_eq!(b_owned_stream, Value::Blob(Vec::new())); assert!(b_borrowed.is_empty()); Ok(()) } #[quickcheck] fn qc_fuzz_binary_deserializer(value: Value, ratio: Ratio) -> TestResult { let mut bytes = match to_bytes(&value) { Ok(bytes) => bytes, Err(error) => return TestResult::error(error.to_string()), }; let num_bytes_to_modify = (bytes.len() as f32 * ratio.as_f32()) as usize; if num_bytes_to_modify == 0 { return TestResult::discard(); } let mut gen = Gen::new(num_bytes_to_modify); let noise: Vec = (0..num_bytes_to_modify) .map(|_| u8::arbitrary(&mut gen)) .collect(); let indices = { let mut set = HashSet::new(); loop { if set.len() == num_bytes_to_modify { break set; } set.insert(usize::arbitrary(&mut gen) % num_bytes_to_modify); } }; assert_eq!(indices.len(), noise.len()); for (i, b) in indices.into_iter().zip(noise) { bytes[i] ^= b; } // Obviously, a messed up binary will probably be invalid, // but the point of fuzzing is that the deserializer should // catch all such errors and report the failure without crashing. // The modified value may be valid too (if e.g. only the value // of an integer is changed), so we allow for this case as well. let mut cursor = Cursor::new(&bytes); let result_slice: Result = from_bytes(&bytes); let result_stream: Result = from_reader(&mut cursor); // Slice and stream should behave identically, // except that the stream reader doesn't check for EOF upon // completing the deserialization, because it could then // potentially read something it's not supposed to read. // // However, in the situation where the slice deserializer // returns an error related to excess junk, restricting // the slice to exactly as many leading bytes as read by // the stream deserializer should result in success, and // results identical to those of the stream deserializer. match (result_slice, result_stream) { (Ok(v), Ok(w)) => TestResult::from_bool(v == w), (Err(_), Err(_)) => TestResult::passed(), (Ok(v), Err(f)) => TestResult::error(format!( "slice and stream serializers disagree:\nOk({})\nvs.\nErr({})", v, f )), (Err(e), Ok(w)) => { if e.to_string().contains("junk of length") { // Cursor::position() can't be higher than the length of // the original slice, so the below line will not panic let retried_bytes = &bytes[0..cursor.position() as usize]; let retried_result: Result = from_bytes(retried_bytes); match retried_result { Ok(v) => TestResult::from_bool(v == w), Err(e) => TestResult::error(format!( "restricted slice deserializer failed:\n{}\nvs.{}", e, w )) } } else { TestResult::error(format!( "slice and stream serializers disagree:\nErr({})\nvs.\nOk({})", e, w )) } } } } /// This test case checks for known and interesting types of currupted /// (too short/small) length/count/index data in a serialized binary value. /// /// In the case of the stream deserializer, we don't particularly care /// about the exact error message or it being as informative as possible, /// since we have much less information (e.g. we don't know the remaining /// size of the stream) anyway; so the error messages are only checked to /// ensure that they are at least not wrong/incorrect, and deterministic. #[test] fn deserialize_binary_bad() { let cases: &[(&[u8], &str, &str)] = &[ // The buffer length of the variable-width encoded symbol count is // wrong. ( &[ 0b000_000_10, // symbol table, 2^0b10 = 2^2 = 4 bytes of length 0b0000_0000, // Alas, only 3 bytes of length follow! 0b0000_0000, // 0b0000_0000, // ], "expected buffer of length 4, got 3", "failed to fill whole buffer", ), // The symbol count itself is obviously wrong (detected upfront). ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of symbol count 0b0000_0001, // 1 symbol promised // but 0 was delivered ], "symtab length is 1 but only 0 bytes of symbol data follow", "failed to fill whole buffer", ), // The symbol count is not obviously wrong, so it's only // detected while attempting to parse a missing symbol. ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of symbol count 0b0000_0010, // 2 symbols promised... 0b010_001_00, // symbol #0: short blob, single-use, 4 bytes 0b0001_0001, // (1) payload bytes 0b0010_0010, // (2) 0b0011_0011, // (3) 0b0100_0100, // (4) // ...but only 1 symbol was delivered, here's the premature EOF ], "missing symbol", "failed to fill whole buffer", ), // The buffer length of the actual symbol payload is too small. ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of count follows 0b0000_0001, // 1 symbol in symbol table 0b100_010_10, // short string, single-use, 10 bytes promised // But only 9 bytes follow b't', b'o', b'o', b' ', b's', b'h', b'o', b'r', b't', ], "symbol payload buffer too short or missing", "unexpected end of input", ), // Similar to the previous test case, but with some twists. ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of count follows 0b0000_0010, // 2 symbols in symbol table 0b010_000_01, // first symbol is correct (just for the gigs)... 0b1111_1111, // ...short blob, 1 byte, and this is that byte. 0b111_010_00, // long blob, single-use, 2^00 = 1 byte of length 0b1000_0000, // 128 bytes of payload promised... 0b1010_0101, // ... 0b0011_1100, // ...but only 2 bytes delivered. ], "symbol payload buffer too short or missing", "unexpected end of input", ), // The buffer length of the variable-width encoded symbol payload // length is wrong. ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of count follows 0b0000_0001, // 1 symbol in symbol table 0b111_010_01, // long blob, single-use, 2 bytes of length 0b0000_0000, // alas, only 1 further byte was provided ], "expected buffer of length 2, got 1", "failed to fill whole buffer", ), // The use count of a multiple-use symbol is missing. // (Symbol was cut between payload length and use count) ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of count follows 0b0000_0001, // 1 symbol in symbol table 0b011_000_00, // short blob, multiple uses, 0 bytes of payload // use count should have followed, but it hasn't ], "multi-use symbol missing use count", "failed to fill whole buffer", ), // The use count buffer of a multiple-use symbol is too short. ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of count follows 0b0000_0001, // 1 symbol in symbol table 0b011_000_00, // short blob, multiple uses, 0 bytes of payload 0b111_010_10, // payload length: 4 bytes of unsigned integer 0b0000_0000, // but actually only 3 follow 0b0000_0000, 0b0000_0000, ], "symbol use count buffer too short or missing", "failed to fill whole buffer", ), // The payload of a (multiple-use) symbol is missing. // (Symbol was cut between use count and payload) ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of count follows 0b0000_0001, // 1 symbol in symbol table 0b101_000_01, // short string, multiple uses, 1 byte of payload 0b010_000_10, // use count: small unsigned integer `2` // payload should have followed, but it hasn't ], "symbol payload buffer too short or missing", "unexpected end of input", ), // Big value header buffer length is wrong ( &[ 0b111_001_11, // signed integer, 8 bytes 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // but only 7 bytes of payload followed ], "unexpected end of input in big value", "failed to fill whole buffer", ), // Out-of-Bounds symbol index ( &[ 0b000_000_00, // symbol table, 2^00 = 1 byte of length 0b0000_0001, // 1 symbol 0b010_000_10, // short blob, single-use, 2 bytes of payload 0b0101_0101, // payload byte #0 0b1010_1010, // payload byte #1 0b100_000_10, // low-index blob, #2 (off by 2, only valid is 0) ], "blob #2 out of bounds for symtab of size 1", "blob #2 out of bounds for symtab of size 1", ), // Sequence item count too high (number of bytes remaining + 1) ( &[ 0b101_001_00, // small array, 4 items, but only 3 bytes follow 0b000_001_00, // 1: null 0b000_001_11, // 2: true 0b000_001_10, // 3: false ], "sequence count is 4 but only 3 bytes are remaining", "failed to fill whole buffer", ), // Map entry count too high, an even number of bytes follow ( &[ 0b111_110_01, // big map, 2^01 = 2 bytes of length follow 0b0000_0010, // low byte = 2 * 256^0 = 2, high byte = 0, so 0b0000_0000, // count = 2, but only 2 * 2 - 2 = 2 bytes left 0b001_111_11, // 1: small signed int = -1 0b010_000_01, // 2: small unsigned int = 1 ], "key-value count is 2 * 2 but only 2 bytes are remaining", "failed to fill whole buffer", ), // Map entry count too high, an odd number of bytes follow ( &[ 0b111_110_01, // big map, 2^01 = 2 bytes of length follow 0b0000_0010, // low byte = 2 * 256^0 = 2, high byte = 0, so 0b0000_0000, // count = 2, but only 2 * 2 - 1 = 3 bytes left 0b010_000_01, // 1: small unsigned int = 1 0b001_111_11, // 2: small signed int = -1 0b000_001_10, // 3: false ], "key-value count is 2 * 2 but only 3 bytes are remaining", "failed to fill whole buffer", ), // f32 negative NaN encoded as floating-point instead of `null` ( &[ 0b111_111_10, // floating-point, 2^0b10 = 4 bytes (f32) 0b1111_1111, // 4 bytes of all 1's: negative NaN 0b1111_1111, 0b1111_1111, 0b1111_1111, ], "NaN should have been encoded as `null`", "NaN should have been encoded as `null`", ), // f64 positive NaN encoded as floating-point instead of `null` ( &[ 0b111_111_11, // floating-point, 2^0b11 = 8 bytes (f64) 0b1111_1111, // 7 bytes of all 1's 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b1111_1111, 0b0111_1111, // MSB carrying a cleared sign bit -> positive ], "NaN should have been encoded as `null`", "NaN should have been encoded as `null`", ), ]; for &(bytes, slice_message, stream_message) in cases { { let result: Result = from_bytes(bytes); let err = result.expect_err(slice_message); let err_str = err.to_string(); assert!( err_str.contains("serialized binary corrupted"), "actual: {}, expected: {}", err_str, slice_message ); assert!( err_str.contains(slice_message), "actual: {}, expected: {}", err_str, slice_message ); } { let result: Result = from_reader(Cursor::new(bytes)); let err = result.expect_err("parsing corrupted binary succeeded"); let err_str = err.to_string(); assert!( err_str.contains(stream_message), "actual: {}, expected: {}", err_str, stream_message ); } } } /// Test the cases where capacities are very high. For all we know, /// this could be due to a corrupted binary. This is especially tricky /// to handle correctly in a stream deserializer because we don't know /// the length of the stream beforehand. /// /// Points where capacities should be tested: /// 1. Symbol Table - symbol count /// 2. Symbol Table - symbol payload length /// 3. Arrays - item count /// 4. Maps - entry count /// /// Points where I know how to test capacities: /// 1. Arrays - item count /// 2. Maps - entry count /// /// The latter cases are implemented in this function. I don't know /// how to test that the symbol table and individual symbol buffers /// are created with a limited initial allocation capacity. #[test] fn deserialize_binary_stream_size_hint() { use std::iter::repeat; use std::io::Read; use std::result::Result; use std::fmt::{ Formatter, Result as FmtResult }; use serde::de::{ Deserializer, Visitor, SeqAccess, MapAccess }; struct SizeHintChecking { actual_size_hint: Option, } impl<'de> Deserialize<'de> for SizeHintChecking { fn deserialize>(de: D) -> Result { de.deserialize_any(SizeHintCheckingVisitor) } } struct SizeHintCheckingVisitor; impl<'de> Visitor<'de> for SizeHintCheckingVisitor { type Value = SizeHintChecking; fn expecting(&self, formatter: &mut Formatter) -> FmtResult { formatter.pad("a sequence or a map") } fn visit_seq>(self, seq: A) -> Result { Ok(SizeHintChecking { actual_size_hint: seq.size_hint() }) } fn visit_map>(self, map: A) -> Result { Ok(SizeHintChecking { actual_size_hint: map.size_hint() }) } } fn test_case(expected_size_hint: usize, message: &str, reader: R) where R: Read, { let check: SizeHintChecking = from_reader(reader).expect(message); assert_eq!( check.actual_size_hint, Some(expected_size_hint), "{}", message ); } test_case(0, "empty array", Cursor::new(&[0b101_000_00])); test_case(0, "empty map", Cursor::new(&[0b110_000_00])); test_case(8, "small array", Cursor::new(&[ 0b101_010_00, 0b000_001_00, // `null` repeated 8 times 0b000_001_00, 0b000_001_00, 0b000_001_00, 0b000_001_00, 0b000_001_00, 0b000_001_00, 0b000_001_00, ])); test_case(8, "small map", Cursor::new(&[ 0b110_010_00, 0b010_000_00, // `i: true` repeated 8 times, for i in 0..=7 0b000_001_11, 0b010_000_01, 0b000_001_11, 0b010_000_10, 0b000_001_11, 0b010_000_11, 0b000_001_11, 0b010_001_00, 0b000_001_11, 0b010_001_01, 0b000_001_11, 0b010_001_10, 0b000_001_11, 0b010_001_11, 0b000_001_11, ])); // size hint of large sequences and maps should be capped at u16::MAX test_case(usize::from(u16::MAX), "large array", Cursor::new({ let mut vec = Vec::with_capacity((1 << 20) + 5); vec.extend(&[ 0b111_101_10, // array, 2^2 = 4 bytes of length 0b0000_0000, // 2^20 as 32-bit little-endian unsigned int 0b0000_0000, 0b0001_0000, 0b0000_0000, ]); vec.extend(repeat(0b000_001_00).take(1 << 20)); // `null`s vec })); test_case(usize::from(u16::MAX), "large map", Cursor::new({ let mut vec = Vec::with_capacity((1 << 21) + 5); vec.extend(&[ 0b111_110_10, // map, 2^2 = 4 bytes of length 0b0000_0000, // 2^20 as 32-bit little-endian unsigned int 0b0000_0000, 0b0001_0000, 0b0000_0000, ]); vec.extend(repeat(0b000_001_00).take(1 << 21)); // `null` keys & values vec })); } /// This tests for some particular edge cases /// that should be accepted as correct. #[test] fn deserialize_binary_good_edge_cases() -> Result<()> { let cases: &[(&[u8], Value)] = &[ // Tightly fit but correct sequence count: 2 items and 2 bytes // remaining ( &[ 0b111_101_00, // big array, 2^00 = 1 byte of length follows 0b0000_0010, // length = 2 0b001_100_01, // item #0 = small signed int = -15 0b000_001_00, // item #1 = null ], Value::Array(vec![ Value::Int(-15), Value::Null, ]) ), // Tightly fit but correct map count: 2 pairs and 4 bytes remaining ( &[ 0b111_110_01, // big map, 2^01 = 2 bytes of length follow 0b0000_0010, // low byte = 2 * 256^0 = 2, high byte = 0, 0b0000_0000, // so count = 2 and 2 * 2 = 4 bytes follow 0b000_001_10, // key #0: false 0b000_001_11, // val #0: true 0b000_001_11, // key #1: true 0b000_001_10, // val #1: false ], Value::from_iter(vec![ (false, true), (true, false), ]) ), // Not-so-tight fit for map count: 2 pairs and 5 bytes remaining ( &[ 0b101_000_10, // small array, 2 items 0b110_000_10, // item #0: small map, 2 pairs; 5 bytes follow 0b000_001_11, // key #0: true 0b001_111_11, // val #0: small signed int -1 0b000_001_10, // key #1: false 0b010_100_00, // val #1: small unsigned int 16 0b000_001_00, // item #1: null ], Value::Array(vec![ Value::from_iter(vec![ (true, Value::Int(-1)), (false, Value::Uint(16)), ]), Value::Null, ]) ), // Empty sequence ( &[ 0b101_000_00, // small array, 0 items ], Value::Array(Vec::new()) ), // Empty sequence with way more bytes than necessary ( &[ 0b111_101_11, // big array, 2^0b11 = 8 bytes of length 0b0000_0000, // 0 items 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, ], Value::Array(Vec::new()) ), // Empty map ( &[ 0b110_000_00, // small map, 0 items ], Value::Map(BTreeMap::new()) ), // Empty map with way more bytes than necessary ( &[ 0b111_110_11, // big map, 2^0b11 = 8 bytes of length 0b0000_0000, // 0 items 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0000_0000, ], Value::Map(BTreeMap::new()) ), ]; for &(bytes, ref expected) in cases { let actual_slice: Value = from_bytes(bytes)?; let actual_stream: Value = from_reader(Cursor::new(bytes))?; assert_eq!(&actual_slice, expected); assert_eq!(&actual_stream, expected); } Ok(()) } /// Test that identifiers being serialized as strings doesn't confuse the /// string interning mechanism, and in particular symbol use counts match /// up exactly. /// /// It is possible to mess up string use counts by e.g. accidentally trying /// to deserialize an identifier as a string that wasn't in fact serialized /// as a string. /// /// We only test the binary format because it is the only one that supports /// string interning. /// /// Note that currently, deserializing from a slice should never fail for /// reasons of incorrect use counts. This is due to the fact that use /// counts are maintained for purposes of optimization: deserializing from /// a stream requires allocating buffers for each symbol, but in order to /// avoid unnecessary copying of buffers, the stream deserializer moves /// the buffer upon the last use of the symbol, then passes it to the impl /// of the deserialized type by value. /// /// Since the slice deserializer never copies the buffers, it disregards /// use counts completely. We're still testing it here in order to ensure /// future-proofness in case its behavior ever changes. #[test] fn identifier_in_binary_stream_interned_correctly() { use std::result::Result; fn test_case(message: &str, value: T) where T: PartialEq + Debug + Serialize + for<'de> Deserialize<'de> { let buf = to_bytes(&value).expect(message); let copy_slice: T = from_bytes(&buf).expect(message); let copy_stream: T = from_reader(Cursor::new(buf)).expect(message); assert_eq!(value, copy_slice, "slice: {}", message); assert_eq!(value, copy_stream, "stream: {}", message); } fn make_map(values: T) -> HashMap where T: AsRef<[(&'static str, &'static str)]> { values .as_ref() .iter() .map(|&(k, v)| (String::from(k), String::from(v))) .collect() } #[derive(Debug, Default, PartialEq, Eq)] struct Bytes(Vec); impl Serialize for Bytes { fn serialize(&self, ser: S) -> Result { ser.serialize_bytes(&self.0) } } impl<'de> Deserialize<'de> for Bytes { fn deserialize>(de: D) -> Result { de.deserialize_any(BytesVisitor) } } struct BytesVisitor; impl<'de> Visitor<'de> for BytesVisitor { type Value = Bytes; fn expecting(&self, f: &mut Formatter) -> FmtResult { f.pad("byte buffer") } fn visit_bytes(self, bytes: &[u8]) -> Result { Ok(Bytes(bytes.to_owned())) } fn visit_byte_buf(self, bytes: Vec) -> Result { Ok(Bytes(bytes)) } } #[derive(Debug, Default, PartialEq, Eq, Serialize, Deserialize)] struct S { field_one: HashMap, field_two: u32, field_three: String, } #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] enum E { Newtype(HashMap), #[serde(rename = "field_three")] Stuff(S), Blob(Bytes), } test_case("field name in own value", S { field_three: String::from("field_three"), ..S::default() }); test_case("field name in other field", S { field_three: String::from("field_one"), ..S::default() }); test_case("field name in map key", S { field_one: make_map([ ("field_one", "foo"), ("field_two", "bar"), ]), ..S::default() }); test_case("field name in map value", S { field_one: make_map([ ("qux", "field_three"), ]), ..S::default() }); test_case("variant name same as field name", E::Stuff(S { field_three: String::from("inner"), ..S::default() })); test_case("identifier repeated many times, also as a blob", vec![ E::Newtype(make_map([ ("field_one", "field_two"), ("field_three", "field_one"), ])), E::Stuff(S { field_one: make_map([ ("field_one", "Newtype"), ("E", "S"), ("S", "E"), ("Blob", "field_two"), ]), field_two: 84571, field_three: String::from("field_three"), }), E::Blob(Bytes(b"field_one".to_vec())), E::Blob(Bytes(b"field_two".to_vec())), E::Blob(Bytes(b"field_two".to_vec())), E::Blob(Bytes(b"field_two".to_vec())), ]); } #[test] fn interned_buffer_moved_on_last_use() -> Result<()> { use std::result::Result; use serde::de::{ DeserializeSeed, SeqAccess, MapAccess, IgnoredAny }; use neodyn_xc::de::binary::BinaryStreamDeserializer; #[derive(Debug, Clone, Copy, PartialEq, Eq)] struct OwnedBorrowedCounter { last_borrowed_visit: Option, owned_visit: Option, expected: &'static str, } impl OwnedBorrowedCounter { fn new(expected: &'static str) -> Self { OwnedBorrowedCounter { last_borrowed_visit: None, owned_visit: None, expected: expected, } } fn visit_borrowed( &mut self, ty: &str, value: &[u8], ) -> Result { match self.owned_visit { Some(i) => Err(E::custom(format_args!( "visit borrowed {} after owned buffer stolen at {}", ty, i ))), None => { let index = self.last_borrowed_visit.map_or(0, |i| i + 1); self.last_borrowed_visit.replace(index); let actual = String::from_utf8_lossy(value); assert_eq!(actual, self.expected); Ok(IgnoredAny) } } } fn visit_owned( &mut self, ty: &str, value: &[u8], ) -> Result { let index = self.last_borrowed_visit.map_or(0, |i| i + 1); match self.owned_visit.replace(index) { None => { let actual = String::from_utf8_lossy(value); assert_eq!(actual, self.expected); Ok(IgnoredAny) }, Some(i) => Err(E::custom(format_args!( "owned {} already stolen at {}", ty, i ))), } } } impl<'de> DeserializeSeed<'de> for &mut OwnedBorrowedCounter { type Value = IgnoredAny; fn deserialize>(self, de: D) -> Result { de.deserialize_any(self) } } impl<'de> Visitor<'de> for &mut OwnedBorrowedCounter { type Value = IgnoredAny; fn expecting(&self, f: &mut Formatter) -> FmtResult { write!(f, "borrowed or owned string or blob `{}`", self.expected) } /// implemented in order to allow other types as well, /// not only strings and blobs. This is essentially a /// no-op, as deserializing a bool is stateless. fn visit_bool(self, _: bool) -> Result { Ok(IgnoredAny) } /// implemented in order to allow other types as well, /// not only strings and blobs. This is essentially a /// no-op, as deserializing an integer is stateless. fn visit_i64(self, _: i64) -> Result { Ok(IgnoredAny) } /// implemented in order to allow other types as well, /// not only strings and blobs. This is essentially a /// no-op, as deserializing an integer is stateless. fn visit_u64(self, _: u64) -> Result { Ok(IgnoredAny) } fn visit_str(self, v: &str) -> Result { self.visit_borrowed("string", v.as_ref()) } fn visit_string(self, v: String) -> Result { self.visit_owned("string", v.as_ref()) } fn visit_bytes(self, v: &[u8]) -> Result { self.visit_borrowed("bytes", v) } fn visit_byte_buf(self, v: Vec) -> Result { self.visit_owned("bytes", &v) } fn visit_seq>(self, mut seq: A) -> Result { while let Some(IgnoredAny) = seq.next_element_seed(&mut *self)? {} Ok(IgnoredAny) } fn visit_map>(self, mut map: A) -> Result { while let Some(IgnoredAny) = map.next_key_seed(&mut *self)? { let IgnoredAny = map.next_value_seed(&mut *self)?; } Ok(IgnoredAny) } } fn test_case( name: &str, num_borrows: usize, expected: &'static str, bytes: &[u8], ) -> Result<(), Error> { let mut visitor = OwnedBorrowedCounter::new(expected); let mut de = BinaryStreamDeserializer::new(Cursor::new(bytes))?; let IgnoredAny = visitor.deserialize(&mut de)?; assert_eq!( visitor.last_borrowed_visit, num_borrows.checked_sub(1), "{}", name ); assert_eq!( visitor.owned_visit, Some(num_borrows), "{}", name ); Ok(()) } test_case("single-use string, implicit", 0, "I'm a string", &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol in symtab 0b100_011_00, // short string, single-use, 12 bytes b'I', b'\'', b'm', b' ', b'a', b' ', b's', b't', b'r', b'i', b'n', b'g', 0b011_000_00, // string symbol #0 ])?; test_case("single-use string, explicit", 0, "explicit", &[ 0b000_000_01, // symtab start, 2 bytes of length 0b0000_0001, // 1 symbol in symtab 0b0000_0000, // (in little-endian) 0b101_010_00, // short string, multiple uses, 8 bytes 0b010_000_01, // explicit use count = 1 b'e', b'x', b'p', b'l', b'i', b'c', b'i', b't', 0b011_000_00, // string symbol #0 ])?; test_case("multi-use string", 1, "foo", &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol in symtab 0b101_000_11, // short string, multiple uses, 3 bytes 0b010_000_10, // use count = 2 (small unsigned integer) b'f', b'o', b'o', // payload 0b101_000_10, // small array, 2 items 0b011_000_00, // string #0 0b011_000_00, // string #0 ])?; test_case("multi-use mixed (string & blob)", 1, "MiXeD", &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol in symtab 0b101_001_01, // short string, multiple uses, 5 bytes 0b010_000_10, // use count = 2 (small unsigned integer) b'M', b'i', b'X', b'e', b'D', // payload 0b101_000_10, // small array, 2 items 0b011_000_00, // string #0 0b100_000_00, // blob #0 ])?; test_case("multi-use blob in map", 3, "blob stuff", &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0010, // 2 symbols in symtab 0b010_001_11, // #0: short blob, single-use, 7 bytes b'i', b'g', b'n', b'o', b'r', b'e', b'd', 0b101_010_10, // #1: short string, multiple uses, 10 bytes 0b010_001_00, // use count = 4 (small unsigned integer) b'b', b'l', b'o', b'b', b' ', b's', b't', b'u', b'f', b'f', 0b110_000_11, // small map, 3 entries 0b100_000_01, // key 0: blob #1 0b000_001_11, // val 0: true 0b011_000_01, // key 1: string #1 0b100_000_01, // val 1: blob #1 0b001_111_10, // key 2: small int = -2 0b011_000_01, // val 2: string #1 ])?; Ok(()) } #[test] fn interned_buffer_used_too_many_times_error() { fn test_case(name: &str, is_string: bool, bytes: &[u8]) { use serde::de::IgnoredAny; let r: Result = from_reader(Cursor::new(bytes)); let err = r.expect_err(&format!("{}: d13n should have failed", name)); let act_err_msg = err.to_string(); let exp_err_msg = format!("used {} symbol after last declared use", if is_string { "string" } else { "blob" }); assert!(act_err_msg.contains(&exp_err_msg), "{}: expected error message: '{}'; got: '{}'", name, exp_err_msg, act_err_msg); } test_case("0-use blob used once", false, &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol 0b011_000_11, // short blob, multi-use, 3 bytes 0b010_000_00, // 0 uses 0b0000_0001, // 0x01 (first payload byte) 0b0000_0010, // 0x02 0b0000_0011, // 0x03 (last payload byte) 0b100_000_00, // 0th blob symbol ]); test_case("0-use string used once", true, &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol 0b101_001_00, // short string, multi-use, 4 bytes 0b010_000_00, // 0 uses 0b0000_0100, // 0x04 (first payload byte) 0b0000_0011, // 0x03 0b0000_0010, // 0x02 0b0000_0001, // 0x01 (last payload byte) 0b011_000_00, // 0th string symbol ]); test_case("single-use string used twice", true, &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol 0b100_000_10, // short string, single use, 2 bytes b'X', b'Y', // 2-byte payload "XY" 0b101_000_10, // 2-element array 0b011_000_00, // use 0th symbol as string 0b011_000_00, // use 0th symbol as string ]); test_case("single-use string used as blob and as string too", true, &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol 0b100_000_10, // short string, single use, 2 bytes b'W', b'Z', // 2-byte payload "WZ" 0b110_000_01, // 1-entry map 0b100_000_00, // key: use 0th symbol as blob 0b011_000_00, // value: use 0th symbol as string ]); test_case("multi-use blob used one too many times", false, &[ 0b000_000_00, // symtab start, 1 byte of length 0b0000_0001, // 1 symbol 0b111_011_00, // long blob, multiple uses, 1 byte of length 0b0000_0110, // 6 bytes of payload 0b010_000_10, // 2 uses, 0xf0, 0xf1, // payload 0xf2, 0xf3, // payload 0xf4, 0xf5, // payload 0b101_000_11, // small array, 3 elements 0b111_100_00, // high-index blob, 1 byte of index 0b0000_0000, // blob #0 0b111_100_01, // high-index blob, 2 bytes of index 0b0000_0000, // blob #0 0b0000_0000, // (little endian u16) 0b100_000_00, // low-index blob, #0 ]); } #[test] fn into_value_agrees_with_serialize_impl() { use uuid::Uuid; use serde_json::json; use chrono::prelude::*; use quickcheck::QuickCheck as QC; fn test_case_inner(value: U) -> Result where U: Debug + Serialize + Into { let serialized = to_value(&value)?; let converted = value.into(); if serialized == converted { Ok(TestResult::passed()) } else { Ok(TestResult::error(format!( "Serialize and Into disagree.\nSerialize: {}\n Into: {}", serialized, converted ))) } } fn test_case_qc(state: &mut QC) where T: Debug + Serialize + Into + Arbitrary { state.quickcheck(test_case_inner:: as fn(T) -> Result) } fn test_case_manual(value: T) where T: Debug + Serialize + Into { match test_case_inner(value) { Ok(ref r) if r.is_failure() => panic!("{:?}", r), Ok(_) => {}, Err(e) => panic!("{}", e), } } let mut state = QC::new(); // Atomic types test_case_qc::<()>(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); test_case_qc::(&mut state); // Optionals test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); // Sequences test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>>>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); // test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>>>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); // test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>(&mut state); test_case_qc::>>>(&mut state); // Maps test_case_qc::>(&mut state); test_case_qc::, String>, Vec>>(&mut state); test_case_qc::, HashSet>>(&mut state); test_case_qc::, Vec<()>>>(&mut state); // Non-`Arbitrary` types test_case_manual(Uuid::new_v4()); test_case_manual(Utc::now()); test_case_manual(Local::now()); test_case_manual(json!(null)); test_case_manual(json!(true)); test_case_manual(json!(-812346)); test_case_manual(json!(0)); test_case_manual(json!(1)); test_case_manual(json!((1_u64 << 63) + 1)); test_case_manual(json!(-91235.88342)); test_case_manual(json!("this is a JSON string")); test_case_manual(json!([-3, null, "a heterogeneous array"])); test_case_manual(json!({"maps": "work too", "1/alpha": 137})); } #[quickcheck] fn qc_char_is_serialized_deserialized_as_string(ch: char) -> Result<()> { let s = ch.to_string(); let val_repr_of_ch = to_value(&ch)?; let val_repr_of_s = to_value(&s)?; let str_repr_of_ch = to_string(&ch, Some(" "))?; let str_repr_of_s = to_string(&s, Some(" "))?; let bin_repr_of_ch = to_bytes(&ch)?; let bin_repr_of_s = to_bytes(&s)?; assert_eq!(val_repr_of_ch, val_repr_of_s); assert_eq!(str_repr_of_ch, str_repr_of_s); assert_eq!(bin_repr_of_ch, bin_repr_of_s); let ch_from_val_ch_ref: char = from_value_ref(&val_repr_of_ch)?; let ch_from_val_str_ref: char = from_value_ref(&val_repr_of_s)?; assert_eq!(ch_from_val_ch_ref, ch_from_val_str_ref); let ch_from_val_ch: char = from_value(val_repr_of_ch)?; let ch_from_val_str: char = from_value(val_repr_of_s)?; assert_eq!(ch_from_val_ch, ch_from_val_str); let ch_from_str_ch: char = from_str(&str_repr_of_ch)?; let ch_from_str_str: char = from_str(&str_repr_of_s)?; assert_eq!(ch_from_str_ch, ch_from_str_str); let ch_from_bin_ch: char = from_bytes(&bin_repr_of_ch)?; let ch_from_bin_str: char = from_bytes(&bin_repr_of_s)?; assert_eq!(ch_from_bin_ch, ch_from_bin_str); // also assert that the all three pairs produce the same character assert_eq!(ch_from_val_ch, ch_from_str_ch); assert_eq!(ch_from_str_ch, ch_from_bin_ch); Ok(()) } #[test] fn qc_collect_str_same_as_to_string() { use std::fmt::Display; use std::num::TryFromIntError; use std::result::Result; use std::convert::TryFrom; use quickcheck::QuickCheck as QC; #[derive(Debug, Clone, PartialEq, Eq)] enum Displayable { Bool(bool), Byte(u8), BigInt(i64), String(String), Error(TryFromIntError), } impl Display for Displayable { fn fmt(&self, f: &mut Formatter) -> FmtResult { let d: &dyn Display = match *self { Displayable::Bool(ref b) => b, Displayable::Byte(ref b) => b, Displayable::BigInt(ref i) => i, Displayable::String(ref s) => s, Displayable::Error(ref err) => err, }; d.fmt(f) } } impl Serialize for Displayable { fn serialize(&self, ser: S) -> Result { ser.collect_str(self) } } impl Arbitrary for Displayable { fn arbitrary(g: &mut Gen) -> Self { // I know, I know, bias and stuff. // But `Gen::gen_range()` has been made private, // and this is not a crypto application anyway. // So this naive, modulo-based way is good enough. match usize::arbitrary(g) % 5 { 0 => Displayable::Bool(Arbitrary::arbitrary(g)), 1 => Displayable::Byte(Arbitrary::arbitrary(g)), 2 => Displayable::BigInt(Arbitrary::arbitrary(g)), 3 => Displayable::String(Arbitrary::arbitrary(g)), 4 => Displayable::Error(u64::try_from(-1).unwrap_err()), _ => unreachable!("remainder mod 5 is outside of 0..5 ?!") } } } fn test_case(arg: Displayable) -> Result<(), Error> { // First, use the `Display`/`ToString` impl directly // in order to create a string representation, // and serialize it let direct = arg.to_string(); let val_direct = to_value(&direct)?; let str_direct = to_string(&direct, Some(" "))?; let bin_direct = to_bytes(&direct)?; // Then, use the `Serialize` impl in order to create // the same (hopefully) serialized string representation let val_ser = to_value(&arg)?; let str_ser = to_string(&arg, Some(" "))?; let bin_ser = to_bytes(&arg)?; // In each format, assert that there's no difference between // serializing to a string and serializing directly. assert_eq!(val_direct, val_ser); assert_eq!(str_direct, str_ser); assert_eq!(bin_direct, bin_ser); Ok(()) } QC::new().quickcheck(test_case as fn(Displayable) -> Result<(), Error>) } #[test] fn can_serialize_flattened_and_skipped() -> Result<()> { // Verify that flattening potentially screwing up the size hints // doesn't affect serialization #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] struct Dynamic { static_field: bool, #[serde(flatten)] dynamic_fields: BTreeMap, } let dynamic = Dynamic { static_field: true, dynamic_fields: BTreeMap::from_iter(vec![ (String::from("foo"), -99), (String::from("bar_qux"), 8765), ]), }; let dynamic_val = to_value(&dynamic)?; let dynamic_str = to_string(&dynamic, None)?; let dynamic_bin = to_bytes(&dynamic)?; assert_eq!( dynamic_val, Value::from_iter(vec![ (Value::from("static_field"), Value::from(true)), (Value::from("foo"), Value::from(-99)), (Value::from("bar_qux"), Value::from(8765)), ]) ); assert_eq!( dynamic_str, r#"{"static_field":true,"bar_qux":+8765,"foo":-99,}"# ); let dynamic_copy_ref: Dynamic = from_value_ref(&dynamic_val)?; let dynamic_copy_val: Dynamic = from_value(dynamic_val)?; let dynamic_copy_str: Dynamic = from_str(&dynamic_str)?; let dynamic_copy_bin: Dynamic = from_reader(Cursor::new(dynamic_bin))?; assert_eq!(dynamic, dynamic_copy_ref); assert_eq!(dynamic, dynamic_copy_val); assert_eq!(dynamic, dynamic_copy_str); assert_eq!(dynamic, dynamic_copy_bin); #[derive(Debug, PartialEq, Serialize, Deserialize)] struct Gappy { always: f64, #[serde(skip_serializing, default)] sometimes: Option, } let gappy = Gappy { always: 39.476, sometimes: Some(256), }; let gappy_val = to_value(&gappy)?; let gappy_str = to_string(&gappy, None)?; let gappy_bin = to_bytes(&gappy)?; assert_eq!(gappy_val, Value::from_iter(vec![ (String::from("always"), 39.476), ])); assert_eq!(gappy_str, r#"{"always":+39.476,}"#); let gappy_default = Gappy { sometimes: None, ..gappy }; let gappy_copy_ref: Gappy = from_value_ref(&gappy_val)?; let gappy_copy_val: Gappy = from_value(gappy_val)?; let gappy_copy_str: Gappy = from_str(&gappy_str)?; let gappy_copy_bin: Gappy = from_bytes(&gappy_bin)?; assert_eq!(gappy_copy_ref, gappy_default); assert_eq!(gappy_copy_val, gappy_default); assert_eq!(gappy_copy_str, gappy_default); assert_eq!(gappy_copy_bin, gappy_default); Ok(()) } #[test] fn simple_enum_tagging() -> Result<()> { fn test_case(name: &'static str, value: T) -> Result<()> where T: Eq + Debug + Serialize + for<'de> Deserialize<'de> { let augment_err = |ty, cause| { Error::with_cause(format!("{} ({})", name, ty), cause) }; let val = to_value(&value) .map_err(|c| augment_err("ser, value", c))?; let str = to_string(&value, Some(" ")) .map_err(|c| augment_err("ser, str", c))?; let bin = to_bytes(&value) .map_err(|c| augment_err("ser, bin", c))?; let copy_ref: T = from_value_ref(&val) .map_err(|c| augment_err("de, ref", c))?; let copy_val: T = from_value(val) .map_err(|c| augment_err("de, val", c))?; let copy_str: T = from_str(&str) .map_err(|c| augment_err("de, str", c))?; let copy_bin: T = from_bytes(&bin) .map_err(|c| augment_err("de, slice", c))?; let copy_rdr: T = from_reader(Cursor::new(bin)) .map_err(|c| augment_err("de, stream", c))?; assert_eq!(copy_ref, value, "{}", name); assert_eq!(copy_val, value, "{}", name); assert_eq!(copy_str, value, "{}", name); assert_eq!(copy_bin, value, "{}", name); assert_eq!(copy_rdr, value, "{}", name); Ok(()) } #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(untagged)] enum Untagged { Unit, Newtype(T), Tuple(T, U), Struct { field_one: U, field_two: T, }, StructWithUnit { u: (), }, } test_case("untagged unit", Untagged::Unit::<(), Box>)?; test_case("untagged newtype", Untagged::Newtype::<_, i16>(String::from("test")))?; test_case("untagged tuple", Untagged::Tuple(String::new(), 768))?; test_case("untagged struct", Untagged::Struct { field_one: 55391, field_two: String::from("nothing interesting here") })?; test_case("untagged struct containing unit", Untagged::StructWithUnit:: { u: () })?; #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "type")] enum InternallyTagged { Unit, NewtypeStruct(InternallyTaggedHelper), NewtypeMap(HashMap), Struct { the_field: (), }, } #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] struct InternallyTaggedHelper { a_field: String, other_field: String, } test_case("internally-tagged unit", InternallyTagged::Unit)?; test_case("internally-tagged newtype of struct", InternallyTagged::NewtypeStruct(InternallyTaggedHelper { a_field: String::from("value 1"), other_field: String::from("value 2"), }))?; test_case("internally-tagged newtype of map", InternallyTagged::NewtypeMap(HashMap::from_iter(vec![ (true, 42), (false, 24), ])))?; test_case("internally-tagged struct", InternallyTagged::Struct { the_field: () })?; #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "type", content = "value")] enum AdjacentlyTagged { Unit, Newtype(bool), NewtypeStruct(AdjacentlyTaggedHelper), NewtypeMap(BTreeMap), Struct { a_field: Option, }, StructWithUnit { u: (), }, } #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] struct AdjacentlyTaggedHelper { f: [u32; 2], g: [u8; 8], } test_case("adjacently-tagged unit", AdjacentlyTagged::Unit)?; test_case("adjacently-tagged newtype", AdjacentlyTagged::Newtype(false))?; test_case("adjacently-tagged newtype of struct", AdjacentlyTagged::NewtypeStruct(AdjacentlyTaggedHelper { f: [999999, 765432198], g: [14, 12, 10, 9, 7, 3, 5, 0], }))?; test_case("adjacently-tagged newtype of map", AdjacentlyTagged::NewtypeMap(BTreeMap::from_iter(vec![ (2, (-2, 2)), (-13, (37, 98)), (0, (9, 1111111)), ])))?; test_case("adjacently-tagged struct", AdjacentlyTagged::Struct { a_field: Some(true) })?; test_case("adjacently-tagged struct containing unit", AdjacentlyTagged::StructWithUnit { u: () })?; Ok(()) } #[test] #[ignore] fn serde_bugs() { // 1st variant of https://github.com/serde-rs/serde/issues/1587 #[derive(Debug, Deserialize)] struct Person { first_name: String, last_name: String, } let value = Value::Array(vec![ Value::String(String::from("John")), Value::String(String::from("Doe")), ]); let res_value_ref: Result = from_value_ref(&value); let res_value: Result = from_value(value); assert!(res_value_ref.is_err()); assert!(res_value.is_err()); // 2nd variant of https://github.com/serde-rs/serde/issues/1587 #[derive(Debug, Deserialize)] #[serde(untagged)] enum Container { String(Wrapper), StringList(Vec), } #[derive(Debug, Deserialize)] struct Wrapper { value: String } let c: Container = from_str("[\"xyz\"]").expect("Container"); match c { Container::String(_) => panic!("should deserialize to StringList"), Container::StringList(v) => assert_eq!(v, ["xyz"]), } // 1st variant of https://github.com/serde-rs/serde/issues/1183 #[derive(Debug, PartialEq, Eq, Deserialize)] #[serde(tag = "type")] enum E { M(HashMap), } let e: E = from_str("{\"type\": \"M\", 1: null}").expect("enum E::M"); assert_eq!(e, E::M(HashMap::from_iter(vec![(1, ())]))); // 2nd variant of https://github.com/serde-rs/serde/issues/1183 #[derive(Debug, PartialEq, Eq, Deserialize)] #[serde(tag = "type")] enum F { M(HashMap), } let f: F = from_str("{\"type\": \"M\", 1: true}").expect("enum F::M"); assert_eq!(f, F::M(HashMap::from_iter(vec![(1, true)]))); }