// Copyright (c) 2015-2024 Georg Brandl. Licensed under the Apache License, // Version 2.0 // or the MIT license , at // your option. This file may not be copied, modified, or distributed except // according to those terms. mod arby; macro_rules! pyobj { (n=None) => { Value::None }; (b=True) => { Value::Bool(true) }; (b=False) => { Value::Bool(false) }; (i=$i:expr) => { Value::I64($i) }; (ii=$i:expr) => { Value::Int($i.clone()) }; (f=$f:expr) => { Value::F64($f) }; (bb=$b:expr) => { Value::Bytes($b.to_vec()) }; (s=$s:expr) => { Value::String($s.into()) }; (t=($($m:ident=$v:tt),*)) => { Value::Tuple(vec![$(pyobj!($m=$v)),*]) }; (l=[$($m:ident=$v:tt),*]) => { Value::List(vec![$(pyobj!($m=$v)),*]) }; (ss=($($m:ident=$v:tt),*)) => { Value::Set(BTreeSet::from_iter(vec![$(hpyobj!($m=$v)),*])) }; (fs=($($m:ident=$v:tt),*)) => { Value::FrozenSet(BTreeSet::from_iter(vec![$(hpyobj!($m=$v)),*])) }; (d={$($km:ident=$kv:tt => $vm:ident=$vv:tt),*}) => { Value::Dict(BTreeMap::from_iter(vec![$((hpyobj!($km=$kv), pyobj!($vm=$vv))),*])) }; } macro_rules! hpyobj { (n=None) => { HashableValue::None }; (b=True) => { HashableValue::Bool(true) }; (b=False) => { HashableValue::Bool(false) }; (i=$i:expr) => { HashableValue::I64($i) }; (ii=$i:expr) => { HashableValue::Int($i.clone()) }; (f=$f:expr) => { HashableValue::F64($f) }; (bb=$b:expr) => { HashableValue::Bytes($b.to_vec()) }; (s=$s:expr) => { HashableValue::String($s.into()) }; (t=($($m:ident=$v:tt),*)) => { HashableValue::Tuple(vec![$(hpyobj!($m=$v)),*]) }; (fs=($($m:ident=$v:tt),*)) => { HashableValue::FrozenSet(BTreeSet::from_iter(vec![$(hpyobj!($m=$v)),*])) }; } mod struct_tests { use crate::{ from_slice, from_value, to_value, to_vec, value_from_slice, value_to_vec, HashableValue, SerOptions, Value, }; use serde::{de, ser}; use serde_derive::{Deserialize, Serialize}; use std::collections::BTreeMap; use std::fmt; use std::iter::FromIterator; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Inner { a: (), b: usize, c: Vec, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Outer { inner: Vec, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Unit; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Newtype(i32); #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Tuple(i32, bool); #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] enum Animal { Dog, AntHive(Vec), Frog(String, Vec), Cat { age: usize, name: String }, } fn test_encode_ok(value: T, target: Value) where T: PartialEq + ser::Serialize, { // Test serialization via pickle. let vec = to_vec(&value, Default::default()).unwrap(); let py_val: Value = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(py_val, target); // Test direct serialization to Value. let py_val: Value = to_value(&value).unwrap(); assert_eq!(py_val, target); } fn test_encode_ok_with_opt(value: T, target: Value, options: SerOptions) where T: PartialEq + ser::Serialize, { let vec = to_vec(&value, options).unwrap(); let py_val: Value = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(py_val, target); } fn test_decode_ok<'de, T>(pyvalue: Value, target: T) where T: PartialEq + fmt::Debug + de::Deserialize<'de>, { // Test deserialization from pickle. let vec = value_to_vec(&pyvalue, Default::default()).unwrap(); let val: T = from_slice(&vec, Default::default()).unwrap(); assert_eq!(val, target); // Test direct deserialization from Value. let val: T = from_value(pyvalue).unwrap(); assert_eq!(val, target); } #[test] fn encode_types() { test_encode_ok((), pyobj!(n = None)); test_encode_ok(true, pyobj!(b = True)); test_encode_ok(None::, pyobj!(n = None)); test_encode_ok(Some(false), pyobj!(b = False)); test_encode_ok(10000000000_i64, pyobj!(i = 10000000000)); test_encode_ok(4.5_f64, pyobj!(f = 4.5)); test_encode_ok('ä', pyobj!(s = "ä")); test_encode_ok("string", pyobj!(s = "string")); // serde doesn't encode into bytes... test_encode_ok(&b"\x00\x01"[..], pyobj!(l = [i = 0, i = 1])); test_encode_ok(vec![1, 2, 3], pyobj!(l = [i = 1, i = 2, i = 3])); test_encode_ok((1, 2, 3), pyobj!(t = (i = 1, i = 2, i = 3))); test_encode_ok(&[1, 2, 3][..], pyobj!(l = [i = 1, i = 2, i = 3])); // serde 1.0: fixed-size arrays are now tuples... test_encode_ok([1, 2, 3], pyobj!(t = (i = 1, i = 2, i = 3))); test_encode_ok(BTreeMap::from_iter(vec![(1, 2), (3, 4)]), pyobj!(d={i=1 => i=2, i=3 => i=4})); } #[test] fn encode_struct() { test_encode_ok(Unit, pyobj!(n = None)); test_encode_ok(Newtype(42), pyobj!(i = 42)); test_encode_ok(Tuple(42, false), pyobj!(t = (i = 42, b = False))); test_encode_ok( Inner { a: (), b: 32, c: vec!["doc".into()] }, pyobj!(d={s="a" => n=None, s="b" => i=32, s="c" => l=[s="doc"]}), ); } #[test] fn encode_enum() { test_encode_ok(Animal::Dog, pyobj!(s = "Dog")); test_encode_ok( Animal::AntHive(vec!["ant".into(), "aunt".into()]), pyobj!(d={s="AntHive" => l=[s="ant", s="aunt"]}), ); test_encode_ok( Animal::Frog("Henry".into(), vec![1, 5]), pyobj!(d={s="Frog" => l=[s="Henry", l=[i=1, i=5]]}), ); test_encode_ok( Animal::Cat { age: 5, name: "Molyneux".into() }, pyobj!(d={s="Cat" => d={s="age" => i=5, s="name" => s="Molyneux"}}), ); } #[test] fn encode_enum_compat() { test_encode_ok_with_opt(Animal::Dog, pyobj!(t = (s = "Dog")), SerOptions::new().compat_enum_repr()); test_encode_ok_with_opt( Animal::AntHive(vec!["ant".into(), "aunt".into()]), pyobj!(t = (s = "AntHive", l = [s = "ant", s = "aunt"])), SerOptions::new().compat_enum_repr(), ); test_encode_ok_with_opt( Animal::Frog("Henry".into(), vec![1, 5]), pyobj!(t = (s = "Frog", l = [s = "Henry", l = [i = 1, i = 5]])), SerOptions::new().compat_enum_repr(), ); test_encode_ok_with_opt( Animal::Cat { age: 5, name: "Molyneux".into() }, pyobj!(t=(s="Cat", d={s="age" => i=5, s="name" => s="Molyneux"})), SerOptions::new().compat_enum_repr(), ); } #[test] fn decode_types() { test_decode_ok(pyobj!(n = None), ()); test_decode_ok(pyobj!(b = True), true); test_decode_ok(pyobj!(b = True), Some(true)); test_decode_ok::>(pyobj!(n = None), None); test_decode_ok(pyobj!(i = 10000000000), 10000000000_i64); test_decode_ok(pyobj!(f = 4.5), 4.5_f64); test_decode_ok(pyobj!(s = "ä"), 'ä'); test_decode_ok(pyobj!(s = "string"), String::from("string")); // Vec doesn't decode from serde bytes... test_decode_ok(pyobj!(bb = b"bytes"), String::from("bytes")); test_decode_ok(pyobj!(l = [i = 1, i = 2, i = 3]), vec![1, 2, 3]); test_decode_ok(pyobj!(t = (i = 1, i = 2, i = 3)), (1, 2, 3)); test_decode_ok(pyobj!(l = [i = 1, i = 2, i = 3]), [1, 2, 3]); test_decode_ok(pyobj!(d={i=1 => i=2, i=3 => i=4}), BTreeMap::from_iter(vec![(1, 2), (3, 4)])); } #[test] fn decode_struct() { test_decode_ok(pyobj!(n = None), Unit); test_decode_ok(pyobj!(i = 42), Newtype(42)); test_decode_ok(pyobj!(t = (i = 42, b = False)), Tuple(42, false)); test_decode_ok( pyobj!(d={s="a" => n=None, s="b" => i=32, s="c" => l=[s="doc"]}), Inner { a: (), b: 32, c: vec!["doc".into()] }, ); } #[test] fn decode_enum() { // tuple representation test_decode_ok(pyobj!(t = (s = "Dog")), Animal::Dog); test_decode_ok( pyobj!(t = (s = "AntHive", l = [s = "ant", s = "aunt"])), Animal::AntHive(vec!["ant".into(), "aunt".into()]), ); test_decode_ok( pyobj!(t = (s = "Frog", l = [s = "Henry", l = [i = 1, i = 5]])), Animal::Frog("Henry".into(), vec![1, 5]), ); test_decode_ok( pyobj!(t=(s="Cat", d={s="age" => i=5, s="name" => s="Molyneux"})), Animal::Cat { age: 5, name: "Molyneux".into() }, ); test_decode_ok( pyobj!(l=[t=(s="Dog"), t=(s="Dog"), t=(s="Cat", d={s="age" => i=5, s="name" => s="?"})]), vec![Animal::Dog, Animal::Dog, Animal::Cat { age: 5, name: "?".into() }], ); // string/dict representation test_decode_ok(pyobj!(s = "Dog"), Animal::Dog); test_decode_ok( pyobj!(d={s="AntHive" => l=[s="ant", s="aunt"]}), Animal::AntHive(vec!["ant".into(), "aunt".into()]), ); test_decode_ok( pyobj!(d={s="Frog" => l=[s="Henry", l=[i=1, i=5]]}), Animal::Frog("Henry".into(), vec![1, 5]), ); test_decode_ok( pyobj!(d={s="Cat" => d={s="age" => i=5, s="name" => s="Molyneux"}}), Animal::Cat { age: 5, name: "Molyneux".into() }, ); test_decode_ok( pyobj!(l=[s="Dog", s="Dog", d={s="Cat" => d={s="age" => i=5, s="name" => s="?"}}]), vec![Animal::Dog, Animal::Dog, Animal::Cat { age: 5, name: "?".into() }], ); } } mod value_tests { use crate::error::{Error, ErrorCode}; use crate::Deserializer; use crate::{from_slice, to_vec, value_from_reader, value_from_slice, value_to_vec}; use crate::{DeOptions, HashableValue, SerOptions, Value}; use num_bigint::BigInt; use quickcheck::{QuickCheck, StdGen}; use rand::{thread_rng, RngCore}; use std::collections::{BTreeMap, BTreeSet}; use std::fs::File; use std::iter::FromIterator; // combinations of (python major, pickle proto) to test const TEST_CASES: &[(u32, u32)] = &[ (2, 0), (2, 1), (2, 2), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), ]; fn get_test_object(pyver: u32) -> Value { // Reproduces the test_object from test/data/generate.py. let longish = BigInt::from(10000000000u64) * BigInt::from(10000000000u64); let mut obj = pyobj!(d={ n=None => n=None, b=False => t=(b=False, b=True), i=1000 => i=100000, ii=longish => ii=longish, f=1.0 => f=1.0, bb=b"bytes" => bb=b"bytes", s="string" => s="string", fs=(i=0, i=42) => fs=(i=0, i=42), t=(i=1, i=2) => t=(i=1, i=2, i=3), t=() => l=[ l=[i=1, i=2, i=3], ss=(i=0, i=42), d={}, bb=b"\x00\x55\xaa\xff" ] }); // Unfortunately, __dict__ keys are strings and so are pickled // differently depending on major version. match &mut obj { Value::Dict(map) => { if pyver == 2 { map.insert(hpyobj!(i = 7), pyobj!(d={bb=b"attr" => i=5})); } else { map.insert(hpyobj!(i = 7), pyobj!(d={s="attr" => i=5})); map.insert(hpyobj!(i = 8), pyobj!(t = (s = "abc", i = 10))); map.insert(hpyobj!(i = 9), pyobj!(d={s="type" => s="abcd", s="quantity" => i=100})); map.insert(hpyobj!(i = 42), pyobj!(t = (i = 30))); map.insert(hpyobj!(i = 43), pyobj!(t = (i = 20))); } } _ => unreachable!(), } obj } #[test] fn unpickle_all() { for &(major, proto) in TEST_CASES { let file = File::open(format!("test/data/tests_py{}_proto{}.pickle", major, proto)).unwrap(); let comparison = get_test_object(major); let unpickled = value_from_reader(file, DeOptions::default().keep_restore_state()).unwrap(); assert_eq!(unpickled, comparison, "py {}, proto {}", major, proto); } } #[test] fn roundtrip() { let dict = get_test_object(2); let vec: Vec<_> = value_to_vec(&dict, Default::default()).unwrap(); let tripped = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(dict, tripped); } #[test] fn recursive() { for proto in &[0, 1, 2, 3, 4, 5] { let file = File::open(format!("test/data/test_recursive_proto{}.pickle", proto)).unwrap(); match value_from_reader(file, Default::default()) { Err(Error::Syntax(ErrorCode::Recursive)) => {} _ => panic!("wrong/no error returned for recursive structure"), } let replace_opt = DeOptions::default().replace_recursive_structures(); let file = File::open(format!("test/data/test_recursive_proto{}.pickle", proto)).unwrap(); assert_eq!( value_from_reader(file, replace_opt).unwrap(), Value::List(vec![Value::Tuple(vec![Value::List(vec![Value::None])])]) ); } } #[test] fn fuzzing() { // Tries to ensure that we don't panic when encountering strange streams. for _ in 0..1000 { let mut stream = [0u8; 1000]; thread_rng().fill_bytes(&mut stream); if *stream.last().unwrap() == b'.' { continue; } // These must all fail with an error, since we skip the check if the // last byte is a STOP opcode. assert!(value_from_slice(&stream, Default::default()).is_err()); } } #[test] fn qc_roundtrip() { fn roundtrip(original: Value) { let vec: Vec<_> = value_to_vec(&original, Default::default()).unwrap(); let tripped = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(original, tripped); } QuickCheck::new() .gen(StdGen::new(thread_rng(), 10)) .tests(5000) .quickcheck(roundtrip as fn(_)); } #[test] fn roundtrip_json() { let original: serde_json::Value = serde_json::from_str( r#"[ {"null": null, "false": false, "true": true, "int": -1238571, "float": 1.5e10, "list": [false, 5, "true", 3.8] } ]"#, ) .unwrap(); let vec: Vec<_> = to_vec(&original, Default::default()).unwrap(); let tripped: serde_json::Value = from_slice(&vec, Default::default()).unwrap(); assert_eq!(original, tripped); } #[test] fn bytestring_v2_py3_roundtrip() { let original = Value::Bytes(b"123\xff\xfe".to_vec()); let vec: Vec<_> = value_to_vec(&original, SerOptions::new().proto_v2()).unwrap(); // Python 3 default deserializer attempts to decode strings let mut de = Deserializer::new(vec.as_slice(), DeOptions::new().decode_strings()); let tripped: Value = de.deserialize_value().unwrap(); assert_eq!(original, tripped); de.end().unwrap(); } #[test] fn unresolvable_global() { let data = std::fs::read("test/data/test_unresolvable_global.pickle").unwrap(); assert!(value_from_slice(&data, Default::default()).is_err()); let val = value_from_slice(&data, DeOptions::new().replace_unresolved_globals()).unwrap(); assert_eq!(val, Value::None); let serde_val: serde_json::Value = from_slice(&data, DeOptions::new().replace_unresolved_globals()).unwrap(); assert_eq!(serde_val, serde_json::Value::Null); } }