use std::sync::Arc; use re_arrow2::datatypes::IntegerType; use re_arrow2::{ array::*, bitmap::Bitmap, buffer::Buffer, datatypes::{DataType, Field, Metadata, Schema, TimeUnit}, error::Result, }; use super::*; macro_rules! test { ($array:expr, $expected:expr) => {{ let buf = write_batch(Box::new($array))?; assert_eq!(String::from_utf8(buf).unwrap(), $expected); Ok(()) }}; } #[test] fn int32() -> Result<()> { let array = Int32Array::from([Some(1), Some(2), Some(3), None, Some(5)]); let expected = r#"[1,2,3,null,5]"#; test!(array, expected) } #[test] fn null() -> Result<()> { let array = NullArray::new(DataType::Null, 3); let expected = r#"[null,null,null]"#; test!(array, expected) } #[test] fn f32() -> Result<()> { let array = Float32Array::from([ Some(1.5), Some(2.5), Some(f32::NAN), Some(f32::INFINITY), Some(f32::NEG_INFINITY), None, Some(5.5), ]); let expected = r#"[1.5,2.5,null,null,null,null,5.5]"#; test!(array, expected) } #[test] fn f64() -> Result<()> { let array = Float64Array::from([ Some(1.5), Some(2.5), Some(f64::NAN), Some(f64::INFINITY), Some(f64::NEG_INFINITY), None, Some(5.5), ]); let expected = r#"[1.5,2.5,null,null,null,null,5.5]"#; test!(array, expected) } #[test] fn utf8() -> Result<()> { let array = Utf8Array::::from([Some("a"), Some("b"), Some("c"), Some("d"), None]); let expected = r#"["a","b","c","d",null]"#; test!(array, expected) } #[test] fn dictionary_utf8() -> Result<()> { let values = Utf8Array::::from([Some("a"), Some("b"), Some("c"), Some("d")]); let keys = PrimitiveArray::from_slice([0u32, 1, 2, 3, 1]); let array = DictionaryArray::try_new( DataType::Dictionary(IntegerType::UInt32, Arc::new(DataType::LargeUtf8), false), keys, Box::new(values), ) .unwrap(); let expected = r#"["a","b","c","d","b"]"#; test!(array, expected) } #[test] fn struct_() -> Result<()> { let c1 = Int32Array::from([Some(1), Some(2), Some(3), None, Some(5)]); let c2 = Utf8Array::::from([Some("a"), Some("b"), Some("c"), Some("d"), None]); let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ])); let array = StructArray::new(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); let expected = r#"[{"c1":1,"c2":"a"},{"c1":2,"c2":"b"},{"c1":3,"c2":"c"},{"c1":null,"c2":"d"},{"c1":5,"c2":null}]"#; test!(array, expected) } #[test] fn nested_struct_with_validity() -> Result<()> { let inner = Arc::new(vec![ Field::new("c121", DataType::Utf8, false), Field::new("c122", DataType::Int32, false), ]); let fields = Arc::new(vec![ Field::new("c11", DataType::Int32, false), Field::new("c12", DataType::Struct(inner.clone()), false), ]); let c1 = StructArray::new( DataType::Struct(fields), vec![ Int32Array::from(&[Some(1), None, Some(5)]).boxed(), StructArray::new( DataType::Struct(inner), vec![ Utf8Array::::from([None, Some("f"), Some("g")]).boxed(), Int32Array::from(&[Some(20), None, Some(43)]).boxed(), ], Some(Bitmap::from([false, true, true])), ) .boxed(), ], Some(Bitmap::from([true, true, false])), ); let c2 = Utf8Array::::from([Some("a"), Some("b"), Some("c")]); let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":{"c11":1,"c12":null},"c2":"a"},{"c1":{"c11":null,"c12":{"c121":"f","c122":null}},"c2":"b"},{"c1":null,"c2":"c"}]"#; test!(array, expected) } #[test] fn nested_struct() -> Result<()> { let c121 = Field::new("c121", DataType::Utf8, false); let fields = Arc::new(vec![ Field::new("c11", DataType::Int32, false), Field::new("c12", DataType::Struct(Arc::new(vec![c121.clone()])), false), ]); let c1 = StructArray::new( DataType::Struct(fields), vec![ Int32Array::from(&[Some(1), None, Some(5)]).boxed(), StructArray::new( DataType::Struct(Arc::new(vec![c121])), vec![Box::new(Utf8Array::::from([ Some("e"), Some("f"), Some("g"), ]))], None, ) .boxed(), ], None, ); let c2 = Utf8Array::::from([Some("a"), Some("b"), Some("c")]); let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"},{"c1":{"c11":null,"c12":{"c121":"f"}},"c2":"b"},{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}]"#; test!(array, expected) } #[test] fn struct_with_list_field() -> Result<()> { let iter = vec![vec!["a", "a1"], vec!["b"], vec!["c"], vec!["d"], vec!["e"]]; let iter = iter .into_iter() .map(|x| x.into_iter().map(Some).collect::>()) .map(Some); let mut a = MutableListArray::>::new_with_field( MutableUtf8Array::::new(), "c_list", false, ); a.try_extend(iter).unwrap(); let c1: ListArray = a.into(); let c2 = PrimitiveArray::from_slice([1, 2, 3, 4, 5]); let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":["a","a1"],"c2":1},{"c1":["b"],"c2":2},{"c1":["c"],"c2":3},{"c1":["d"],"c2":4},{"c1":["e"],"c2":5}]"#; test!(array, expected) } #[test] fn nested_list() -> Result<()> { let iter = vec![ vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])], vec![], vec![Some(vec![Some(4), Some(5), Some(6)])], ]; let iter = iter.into_iter().map(Some); let inner = MutableListArray::>::new_with_field( MutablePrimitiveArray::::new(), "b", false, ); let mut c1 = MutableListArray::>>::new_with_field( inner, "a", false, ); c1.try_extend(iter).unwrap(); let c1: ListArray = c1.into(); let c2 = Utf8Array::::from([Some("foo"), Some("bar"), None]); let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":[[1,2],[3]],"c2":"foo"},{"c1":[],"c2":"bar"},{"c1":[[4,5,6]],"c2":null}]"#; test!(array, expected) } #[test] fn nested_list_records() -> Result<()> { let iter = vec![ vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])], vec![], vec![Some(vec![Some(4), Some(5), Some(6)])], ]; let iter = iter.into_iter().map(Some); let inner = MutableListArray::>::new_with_field( MutablePrimitiveArray::::new(), "b", false, ); let mut c1 = MutableListArray::>>::new_with_field( inner, "c1", false, ); c1.try_extend(iter).unwrap(); let c1: ListArray = c1.into(); let c2 = Utf8Array::::from([Some("foo"), Some("bar"), None]); let schema: Schema = vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ] .into(); let arrays: Vec> = vec![Box::new(c1), Box::new(c2)]; let chunk = Chunk::new(arrays); let expected = r#"[{"c1":[[1,2],[3]],"c2":"foo"},{"c1":[],"c2":"bar"},{"c1":[[4,5,6]],"c2":null}]"#; let buf = write_record_batch(schema, chunk)?; assert_eq!(String::from_utf8(buf).unwrap(), expected); Ok(()) } #[test] fn fixed_size_list_records() -> Result<()> { let iter = vec![ vec![Some(1), Some(2), Some(3)], vec![Some(4), Some(5), Some(6)], ]; let iter = iter.into_iter().map(Some); let mut inner = MutableFixedSizeListArray::>::new_with_field( MutablePrimitiveArray::new(), "vs", false, 3, ); inner.try_extend(iter).unwrap(); let inner: FixedSizeListArray = inner.into(); let schema = Schema { fields: vec![Field::new("vs", inner.data_type().clone(), true)], metadata: Metadata::default(), }; let arrays: Vec> = vec![Box::new(inner)]; let chunk = Chunk::new(arrays); let expected = r#"[{"vs":[1,2,3]},{"vs":[4,5,6]}]"#; let buf = write_record_batch(schema, chunk)?; assert_eq!(String::from_utf8(buf).unwrap(), expected); Ok(()) } #[test] fn list_of_struct() -> Result<()> { let inner = Arc::new(vec![Field::new("c121", DataType::Utf8, false)]); let fields = Arc::new(vec![ Field::new("c11", DataType::Int32, false), Field::new("c12", DataType::Struct(inner.clone()), false), ]); let c1_datatype = DataType::List(std::sync::Arc::new(Field::new( "s", DataType::Struct(fields.clone()), false, ))); let s = StructArray::new( DataType::Struct(fields), vec![ Int32Array::from(&[Some(1), None, Some(5)]).boxed(), StructArray::new( DataType::Struct(inner), vec![Box::new(Utf8Array::::from([ Some("e"), Some("f"), Some("g"), ]))], Some(Bitmap::from([false, true, true])), ) .boxed(), ], Some(Bitmap::from([true, true, false])), ); // list column rows (c1): // [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}], // null, // [{"c11": 5, "c12": {"c121": "g"}}] let c1 = ListArray::::new( c1_datatype, Buffer::from(vec![0, 2, 2, 3]).try_into().unwrap(), s.boxed(), Some(Bitmap::from([true, false, true])), ); let c2 = Int32Array::from_slice([1, 2, 3]); let data_type = DataType::Struct(Arc::new(vec![ Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ])); let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":[{"c11":1,"c12":null},{"c11":null,"c12":{"c121":"f"}}],"c2":1},{"c1":null,"c2":2},{"c1":[null],"c2":3}]"#; test!(array, expected) } #[test] fn escaped_end_of_line_in_utf8() -> Result<()> { let array = Utf8Array::::from([Some("a\na"), None]); let expected = r#"["a\na",null]"#; test!(array, expected) } #[test] fn escaped_quotation_marks_in_utf8() -> Result<()> { let array = Utf8Array::::from([Some("a\"a"), None]); let expected = r#"["a\"a",null]"#; test!(array, expected) } #[test] fn write_date32() -> Result<()> { let array = PrimitiveArray::new(DataType::Date32, vec![1000i32, 8000, 10000].into(), None); let expected = r#"["1972-09-27","1991-11-27","1997-05-19"]"#; test!(array, expected) } #[test] fn write_timestamp() -> Result<()> { let array = PrimitiveArray::new( DataType::Timestamp(TimeUnit::Second, None), vec![10i64, 1 << 32, 1 << 33].into(), None, ); let expected = r#"["1970-01-01 00:00:10","2106-02-07 06:28:16","2242-03-16 12:56:32"]"#; test!(array, expected) } #[test] fn write_timestamp_with_tz_secs() -> Result<()> { let array = PrimitiveArray::new( DataType::Timestamp(TimeUnit::Second, Some("UTC".to_owned().into())), vec![10i64, 1 << 32, 1 << 33].into(), None, ); let expected = r#"["1970-01-01T00:00:10+00:00","2106-02-07T06:28:16+00:00","2242-03-16T12:56:32+00:00"]"#; test!(array, expected) } #[test] fn write_timestamp_with_tz_micros() -> Result<()> { let array = PrimitiveArray::new( DataType::Timestamp(TimeUnit::Microsecond, Some("+02:00".to_owned().into())), vec![ 10i64 * 1_000_000, (1 << 32) * 1_000_000, (1 << 33) * 1_000_000, 1_234_567_890_123_450, 1_234_567_890_120_000, ] .into(), None, ); // Note, default chrono DateTime string conversion strips off milli/micro/nanoseconds parts // if they are zero let expected = r#"["1970-01-01T02:00:10+02:00","2106-02-07T08:28:16+02:00","2242-03-16T14:56:32+02:00","2009-02-14T01:31:30.123450+02:00","2009-02-14T01:31:30.120+02:00"]"#; test!(array, expected) } #[cfg(feature = "chrono-tz")] #[test] fn write_timestamp_with_chrono_tz_millis() -> Result<()> { let array = PrimitiveArray::new( DataType::Timestamp(TimeUnit::Millisecond, Some("Europe/Oslo".to_owned().into())), vec![ 10i64 * 1_000, (1 << 32) * 1_000, (1 << 33) * 1_000, 1_234_567_890_123, 1_239_874_560_120, ] .into(), None, ); // Note, default chrono DateTime string conversion strips off milli/micro/nanoseconds parts // if they are zero let expected = r#"["1970-01-01T01:00:10+01:00","2106-02-07T07:28:16+01:00","2242-03-16T13:56:32+01:00","2009-02-14T00:31:30.123+01:00","2009-04-16T11:36:00.120+02:00"]"#; test!(array, expected) }