use arrow2::array::*; use arrow2_convert::deserialize::{arrow_array_deserialize_iterator, TryIntoCollection}; use arrow2_convert::serialize::TryIntoArrow; /// Complex example that uses the following features: /// /// - Deeply Nested structs and lists /// - Custom types use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; use std::borrow::Borrow; #[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)] pub struct Root { name: Option, is_deleted: bool, a1: Option, a2: i64, // binary a3: Option>, // date32 a4: chrono::NaiveDate, // timestamp(ns, None) a5: chrono::NaiveDateTime, // timestamp(ns, None) a6: Option, // array of date times date_time_list: Vec, // optional list array of optional strings nullable_list: Option>>, // optional list array of required strings required_list: Vec>, // custom type custom: CustomType, // custom optional type nullable_custom: Option, // vec custom type custom_list: Vec, // nested struct child: Child, // int 32 array int32_array: Vec, // large binary #[arrow_field(type = "arrow2_convert::field::LargeBinary")] large_binary: Vec, // fixed size binary #[arrow_field(type = "arrow2_convert::field::FixedSizeBinary<3>")] fixed_size_binary: Vec, // large string #[arrow_field(type = "arrow2_convert::field::LargeString")] large_string: String, // large vec #[arrow_field(type = "arrow2_convert::field::LargeVec")] large_vec: Vec, // fixed size vec #[arrow_field(type = "arrow2_convert::field::FixedSizeVec")] fixed_size_vec: Vec, } #[derive(Debug, Clone, PartialEq, Eq, ArrowField, ArrowSerialize, ArrowDeserialize)] pub struct Child { a1: i64, a2: String, // nested struct array child_array: Vec, } #[derive(Debug, Clone, PartialEq, Eq, ArrowField, ArrowSerialize, ArrowDeserialize)] pub struct ChildChild { a1: i32, bool_array: Vec, int64_array: Vec, } #[derive(Debug, Clone, PartialEq, Eq)] /// A newtype around a u64 pub struct CustomType(u64); /// To use with Arrow three traits need to be implemented: /// - ArrowField /// - ArrowSerialize /// - ArrowDeserialize impl arrow2_convert::field::ArrowField for CustomType { type Type = Self; #[inline] fn data_type() -> arrow2::datatypes::DataType { arrow2::datatypes::DataType::Extension( "custom".to_string(), Box::new(arrow2::datatypes::DataType::UInt64), None, ) } } impl arrow2_convert::serialize::ArrowSerialize for CustomType { type MutableArrayType = arrow2::array::MutablePrimitiveArray; #[inline] fn new_array() -> Self::MutableArrayType { Self::MutableArrayType::from(::data_type()) } #[inline] fn arrow_serialize(v: &Self, array: &mut Self::MutableArrayType) -> arrow2::error::Result<()> { array.try_push(Some(v.0)) } } impl arrow2_convert::deserialize::ArrowDeserialize for CustomType { type ArrayType = arrow2::array::PrimitiveArray; #[inline] fn arrow_deserialize(v: Option<&u64>) -> Option { v.map(|t| CustomType(*t)) } } // enable Vec arrow2_convert::arrow_enable_vec_for_type!(CustomType); fn item1() -> Root { use chrono::{NaiveDate, NaiveDateTime}; Root { name: Some("a".to_string()), is_deleted: false, a1: Some(0.1), a2: 1, a3: Some(b"aa".to_vec()), a4: NaiveDate::from_ymd_opt(1970, 1, 2).unwrap(), a5: NaiveDateTime::from_timestamp_opt(10000, 0).unwrap(), a6: Some(NaiveDateTime::from_timestamp_opt(10001, 0)).unwrap(), date_time_list: vec![ NaiveDateTime::from_timestamp_opt(10000, 10).unwrap(), NaiveDateTime::from_timestamp_opt(10000, 11).unwrap(), ], nullable_list: Some(vec![Some("cc".to_string()), Some("dd".to_string())]), required_list: vec![Some("aa".to_string()), Some("bb".to_string())], custom: CustomType(10), nullable_custom: Some(CustomType(11)), custom_list: vec![CustomType(12), CustomType(13)], child: Child { a1: 10, a2: "hello".to_string(), child_array: vec![ ChildChild { a1: 100, bool_array: vec![false], int64_array: vec![45555, 2124214, 224, 24214, 2424], }, ChildChild { a1: 101, bool_array: vec![true, true, true], int64_array: vec![4533, 22222, 2323, 333, 33322], }, ], }, int32_array: vec![0, 1, 3], large_binary: b"aa".to_vec(), fixed_size_binary: b"aaa".to_vec(), large_string: "abcdefg".to_string(), large_vec: vec![1, 2, 3, 4], fixed_size_vec: vec![10, 20, 30], } } fn item2() -> Root { use chrono::{NaiveDate, NaiveDateTime}; Root { name: Some("b".to_string()), is_deleted: true, a1: Some(0.1), a2: 1, a3: Some(b"aa".to_vec()), a4: NaiveDate::from_ymd_opt(1970, 1, 2).unwrap(), a5: NaiveDateTime::from_timestamp_opt(10000, 0).unwrap(), a6: None, date_time_list: vec![ NaiveDateTime::from_timestamp_opt(10000, 10).unwrap(), NaiveDateTime::from_timestamp_opt(10000, 11).unwrap(), ], nullable_list: None, required_list: vec![Some("ee".to_string()), Some("ff".to_string())], custom: CustomType(11), nullable_custom: None, custom_list: vec![CustomType(14), CustomType(13)], child: Child { a1: 11, a2: "hello again".to_string(), child_array: vec![ ChildChild { a1: 100, bool_array: vec![true, false, false, true], int64_array: vec![111111, 2222, 33], }, ChildChild { a1: 102, bool_array: vec![false], int64_array: vec![45555, 2124214, 224, 24214, 2424], }, ], }, int32_array: vec![111, 1], large_binary: b"bb".to_vec(), fixed_size_binary: b"bbb".to_vec(), large_string: "abdefag".to_string(), large_vec: vec![5, 4, 3, 2], fixed_size_vec: vec![11, 21, 32], } } #[test] fn test_round_trip() -> arrow2::error::Result<()> { // serialize to an arrow array let original_array = [item1(), item2()]; let array: Box = original_array.try_into_arrow()?; let struct_array = array .as_any() .downcast_ref::() .unwrap(); assert_eq!(struct_array.len(), 2); let values = struct_array.values(); assert_eq!(values.len(), 21); assert_eq!(struct_array.len(), 2); // can iterate one struct at a time without collecting for _i in arrow_array_deserialize_iterator::(array.borrow())? { // do something } // or can back to our original vector let foo_array: Vec = array.try_into_collection()?; assert_eq!(foo_array, original_array); Ok(()) }