// Copyright (c) 2015-2021 Georg Brandl. Licensed under the Apache License, // Version 2.0 // or the MIT license , at // your option. This file may not be copied, modified, or distributed except // according to those terms. mod arby; macro_rules! pyobj { (n=None) => { Value::None }; (b=True) => { Value::Bool(true) }; (b=False) => { Value::Bool(false) }; (i=$i:expr) => { Value::I64($i) }; (ii=$i:expr) => { Value::Int($i.clone()) }; (f=$f:expr) => { Value::F64($f) }; (bb=$b:expr) => { Value::Bytes($b.to_vec()) }; (s=$s:expr) => { Value::String($s.into()) }; (t=($($m:ident=$v:tt),*)) => { Value::Tuple(vec![$(pyobj!($m=$v)),*]) }; (l=[$($m:ident=$v:tt),*]) => { Value::List(vec![$(pyobj!($m=$v)),*]) }; (ss=($($m:ident=$v:tt),*)) => { Value::Set(BTreeSet::from_iter(vec![$(hpyobj!($m=$v)),*])) }; (fs=($($m:ident=$v:tt),*)) => { Value::FrozenSet(BTreeSet::from_iter(vec![$(hpyobj!($m=$v)),*])) }; (d={$($km:ident=$kv:tt => $vm:ident=$vv:tt),*}) => { Value::Dict(BTreeMap::from_iter(vec![$((hpyobj!($km=$kv), pyobj!($vm=$vv))),*])) }; } macro_rules! hpyobj { (n=None) => { HashableValue::None }; (b=True) => { HashableValue::Bool(true) }; (b=False) => { HashableValue::Bool(false) }; (i=$i:expr) => { HashableValue::I64($i) }; (ii=$i:expr) => { HashableValue::Int($i.clone()) }; (f=$f:expr) => { HashableValue::F64($f) }; (bb=$b:expr) => { HashableValue::Bytes($b.to_vec()) }; (s=$s:expr) => { HashableValue::String($s.into()) }; (t=($($m:ident=$v:tt),*)) => { HashableValue::Tuple(vec![$(hpyobj!($m=$v)),*]) }; (fs=($($m:ident=$v:tt),*)) => { HashableValue::FrozenSet(BTreeSet::from_iter(vec![$(hpyobj!($m=$v)),*])) }; } mod struct_tests { use std::fmt; use std::iter::FromIterator; use std::collections::BTreeMap; use serde::{ser, de}; use serde_derive::{Serialize, Deserialize}; use crate::{HashableValue, SerOptions, Value, from_slice, from_value, to_value, to_vec, value_from_slice, value_to_vec}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Inner { a: (), b: usize, c: Vec, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Outer { inner: Vec, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Unit; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Newtype(i32); #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] struct Tuple(i32, bool); #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] enum Animal { Dog, AntHive(Vec), Frog(String, Vec), Cat { age: usize, name: String }, } fn test_encode_ok(value: T, target: Value) where T: PartialEq + ser::Serialize, { // Test serialization via pickle. let vec = to_vec(&value, Default::default()).unwrap(); let py_val: Value = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(py_val, target); // Test direct serialization to Value. let py_val: Value = to_value(&value).unwrap(); assert_eq!(py_val, target); } fn test_encode_ok_with_opt(value: T, target: Value, options: SerOptions) where T: PartialEq + ser::Serialize, { let vec = to_vec(&value, options).unwrap(); let py_val: Value = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(py_val, target); } fn test_decode_ok<'de, T>(pyvalue: Value, target: T) where T: PartialEq + fmt::Debug + de::Deserialize<'de>, { // Test deserialization from pickle. let vec = value_to_vec(&pyvalue, Default::default()).unwrap(); let val: T = from_slice(&vec, Default::default()).unwrap(); assert_eq!(val, target); // Test direct deserialization from Value. let val: T = from_value(pyvalue).unwrap(); assert_eq!(val, target); } #[test] fn encode_types() { test_encode_ok((), pyobj!(n=None)); test_encode_ok(true, pyobj!(b=True)); test_encode_ok(None::, pyobj!(n=None)); test_encode_ok(Some(false), pyobj!(b=False)); test_encode_ok(10000000000_i64, pyobj!(i=10000000000)); test_encode_ok(4.5_f64, pyobj!(f=4.5)); test_encode_ok('ä', pyobj!(s="ä")); test_encode_ok("string", pyobj!(s="string")); // serde doesn't encode into bytes... test_encode_ok(&b"\x00\x01"[..], pyobj!(l=[i=0, i=1])); test_encode_ok(vec![1, 2, 3], pyobj!(l=[i=1, i=2, i=3])); test_encode_ok((1, 2, 3), pyobj!(t=(i=1, i=2, i=3))); test_encode_ok(&[1, 2, 3][..], pyobj!(l=[i=1, i=2, i=3])); // serde 1.0: fixed-size arrays are now tuples... test_encode_ok([1, 2, 3], pyobj!(t=(i=1, i=2, i=3))); test_encode_ok(BTreeMap::from_iter(vec![(1, 2), (3, 4)]), pyobj!(d={i=1 => i=2, i=3 => i=4})); } #[test] fn encode_struct() { test_encode_ok(Unit, pyobj!(n=None)); test_encode_ok(Newtype(42), pyobj!(i=42)); test_encode_ok(Tuple(42, false), pyobj!(t=(i=42, b=False))); test_encode_ok(Inner { a: (), b: 32, c: vec!["doc".into()] }, pyobj!(d={s="a" => n=None, s="b" => i=32, s="c" => l=[s="doc"]})); } #[test] fn encode_enum() { test_encode_ok(Animal::Dog, pyobj!(s="Dog")); test_encode_ok(Animal::AntHive(vec!["ant".into(), "aunt".into()]), pyobj!(d={s="AntHive" => l=[s="ant", s="aunt"]})); test_encode_ok(Animal::Frog("Henry".into(), vec![1, 5]), pyobj!(d={s="Frog" => l=[s="Henry", l=[i=1, i=5]]})); test_encode_ok(Animal::Cat { age: 5, name: "Molyneux".into() }, pyobj!(d={s="Cat" => d={s="age" => i=5, s="name" => s="Molyneux"}})); } #[test] fn encode_enum_compat() { test_encode_ok_with_opt(Animal::Dog, pyobj!(t=(s="Dog")), SerOptions::new().compat_enum_repr()); test_encode_ok_with_opt(Animal::AntHive(vec!["ant".into(), "aunt".into()]), pyobj!(t=(s="AntHive", l=[s="ant", s="aunt"])), SerOptions::new().compat_enum_repr()); test_encode_ok_with_opt(Animal::Frog("Henry".into(), vec![1, 5]), pyobj!(t=(s="Frog", l=[s="Henry", l=[i=1, i=5]])), SerOptions::new().compat_enum_repr()); test_encode_ok_with_opt(Animal::Cat { age: 5, name: "Molyneux".into() }, pyobj!(t=(s="Cat", d={s="age" => i=5, s="name" => s="Molyneux"})), SerOptions::new().compat_enum_repr()); } #[test] fn decode_types() { test_decode_ok(pyobj!(n=None), ()); test_decode_ok(pyobj!(b=True), true); test_decode_ok(pyobj!(b=True), Some(true)); test_decode_ok::>(pyobj!(n=None), None); test_decode_ok(pyobj!(i=10000000000), 10000000000_i64); test_decode_ok(pyobj!(f=4.5), 4.5_f64); test_decode_ok(pyobj!(s="ä"), 'ä'); test_decode_ok(pyobj!(s="string"), String::from("string")); // Vec doesn't decode from serde bytes... test_decode_ok(pyobj!(bb=b"bytes"), String::from("bytes")); test_decode_ok(pyobj!(l=[i=1, i=2, i=3]), vec![1, 2, 3]); test_decode_ok(pyobj!(t=(i=1, i=2, i=3)), (1, 2, 3)); test_decode_ok(pyobj!(l=[i=1, i=2, i=3]), [1, 2, 3]); test_decode_ok(pyobj!(d={i=1 => i=2, i=3 => i=4}), BTreeMap::from_iter(vec![(1, 2), (3, 4)])); } #[test] fn decode_struct() { test_decode_ok(pyobj!(n=None), Unit); test_decode_ok(pyobj!(i=42), Newtype(42)); test_decode_ok(pyobj!(t=(i=42, b=False)), Tuple(42, false)); test_decode_ok(pyobj!(d={s="a" => n=None, s="b" => i=32, s="c" => l=[s="doc"]}), Inner { a: (), b: 32, c: vec!["doc".into()] }); } #[test] fn decode_enum() { // tuple representation test_decode_ok(pyobj!(t=(s="Dog")), Animal::Dog); test_decode_ok(pyobj!(t=(s="AntHive", l=[s="ant", s="aunt"])), Animal::AntHive(vec!["ant".into(), "aunt".into()])); test_decode_ok(pyobj!(t=(s="Frog", l=[s="Henry", l=[i=1, i=5]])), Animal::Frog("Henry".into(), vec![1, 5])); test_decode_ok(pyobj!(t=(s="Cat", d={s="age" => i=5, s="name" => s="Molyneux"})), Animal::Cat { age: 5, name: "Molyneux".into() }); test_decode_ok(pyobj!(l=[t=(s="Dog"), t=(s="Dog"), t=(s="Cat", d={s="age" => i=5, s="name" => s="?"})]), vec![Animal::Dog, Animal::Dog, Animal::Cat { age: 5, name: "?".into() }]); // string/dict representation test_decode_ok(pyobj!(s="Dog"), Animal::Dog); test_decode_ok(pyobj!(d={s="AntHive" => l=[s="ant", s="aunt"]}), Animal::AntHive(vec!["ant".into(), "aunt".into()])); test_decode_ok(pyobj!(d={s="Frog" => l=[s="Henry", l=[i=1, i=5]]}), Animal::Frog("Henry".into(), vec![1, 5])); test_decode_ok(pyobj!(d={s="Cat" => d={s="age" => i=5, s="name" => s="Molyneux"}}), Animal::Cat { age: 5, name: "Molyneux".into() }); test_decode_ok(pyobj!(l=[s="Dog", s="Dog", d={s="Cat" => d={s="age" => i=5, s="name" => s="?"}}]), vec![Animal::Dog, Animal::Dog, Animal::Cat { age: 5, name: "?".into() }]); } } mod value_tests { use std::fs::File; use std::collections::{BTreeMap, BTreeSet}; use std::iter::FromIterator; use num_bigint::BigInt; use rand::{RngCore, thread_rng}; use quickcheck::{QuickCheck, StdGen}; use serde_json; use crate::{value_from_reader, value_to_vec, value_from_slice, to_vec, from_slice}; use crate::{Value, HashableValue, SerOptions, DeOptions}; use crate::Deserializer; use crate::error::{Error, ErrorCode}; // combinations of (python major, pickle proto) to test const TEST_CASES: &[(u32, u32)] = &[ (2, 0), (2, 1), (2, 2), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), ]; fn get_test_object(pyver: u32) -> Value { // Reproduces the test_object from test/data/generate.py. let longish = BigInt::from(10000000000u64) * BigInt::from(10000000000u64); let mut obj = pyobj!(d={ n=None => n=None, b=False => t=(b=False, b=True), i=10 => i=100000, ii=longish => ii=longish, f=1.0 => f=1.0, bb=b"bytes" => bb=b"bytes", s="string" => s="string", fs=(i=0, i=42) => fs=(i=0, i=42), t=(i=1, i=2) => t=(i=1, i=2, i=3), t=() => l=[ l=[i=1, i=2, i=3], ss=(i=0, i=42), d={}, bb=b"\x00\x55\xaa\xff" ] }); // Unfortunately, __dict__ keys are strings and so are pickled // differently depending on major version. match &mut obj { Value::Dict(map) => if pyver == 2 { map.insert(hpyobj!(i=7), pyobj!(d={bb=b"attr" => i=5})); } else { map.insert(hpyobj!(i=7), pyobj!(d={s="attr" => i=5})); }, _ => unreachable!() } obj } #[test] fn unpickle_all() { for &(major, proto) in TEST_CASES { let file = File::open(format!("test/data/tests_py{}_proto{}.pickle", major, proto)).unwrap(); let comparison = get_test_object(major); let unpickled = value_from_reader(file, Default::default()).unwrap(); assert_eq!(unpickled, comparison, "py {}, proto {}", major, proto); } } #[test] fn roundtrip() { let dict = get_test_object(2); let vec: Vec<_> = value_to_vec(&dict, Default::default()).unwrap(); let tripped = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(dict, tripped); } #[test] fn recursive() { for proto in &[0, 1, 2, 3, 4, 5] { let file = File::open(format!("test/data/test_recursive_proto{}.pickle", proto)).unwrap(); match value_from_reader(file, Default::default()) { Err(Error::Syntax(ErrorCode::Recursive)) => { } _ => assert!(false, "wrong/no error returned for recursive structure") } } } #[test] fn fuzzing() { // Tries to ensure that we don't panic when encountering strange streams. for _ in 0..1000 { let mut stream = [0u8; 1000]; thread_rng().fill_bytes(&mut stream); if *stream.last().unwrap() == b'.' { continue; } // These must all fail with an error, since we skip the check if the // last byte is a STOP opcode. assert!(value_from_slice(&stream, Default::default()).is_err()); } } #[test] fn qc_roundtrip() { fn roundtrip(original: Value) { let vec: Vec<_> = value_to_vec(&original, Default::default()).unwrap(); let tripped = value_from_slice(&vec, Default::default()).unwrap(); assert_eq!(original, tripped); } QuickCheck::new().gen(StdGen::new(thread_rng(), 10)) .tests(5000) .quickcheck(roundtrip as fn(_)); } #[test] fn roundtrip_json() { let original: serde_json::Value = serde_json::from_str(r#"[ {"null": null, "false": false, "true": true, "int": -1238571, "float": 1.5e10, "list": [false, 5, "true", 3.8] } ]"#).unwrap(); let vec: Vec<_> = to_vec(&original, Default::default()).unwrap(); let tripped: serde_json::Value = from_slice(&vec, Default::default()).unwrap(); assert_eq!(original, tripped); } #[test] fn bytestring_v2_py3_roundtrip() { let original = Value::Bytes(b"123\xff\xfe".to_vec()); let vec: Vec<_> = value_to_vec(&original, SerOptions::new().proto_v2()).unwrap(); // Python 3 default deserializer attempts to decode strings let mut de = Deserializer::new(vec.as_slice(), DeOptions::new().decode_strings()); let tripped: Value = de.deserialize_value().unwrap(); assert_eq!(original, tripped); de.end().unwrap(); } #[test] fn unresolvable_global() { let data = std::fs::read("test/data/test_unresolvable_global.pickle").unwrap(); assert!(value_from_slice(&data, Default::default()).is_err()); let val = value_from_slice(&data, DeOptions::new().replace_unresolved_globals()).unwrap(); assert_eq!(val, Value::None); let serde_val: serde_json::Value = from_slice(&data, DeOptions::new().replace_unresolved_globals()).unwrap(); assert_eq!(serde_val, serde_json::Value::Null); } }