// Copyright Amazon.com, Inc. or its affiliates. #![cfg(feature = "experimental-ion-hash")] use digest::consts::U4096; use digest::{FixedOutput, Reset, Update}; use ion_rs::ion_hash::IonHasher; use ion_rs::{AnyEncoding, Element, IonResult, Sequence, Struct}; use ion_rs::IonError; use ion_rs::Reader; use std::convert::From; use std::fmt::Debug; use std::fs::read; use std::iter::FromIterator; use thiserror::Error; #[derive(Default, Clone)] struct IdentityDigest(Vec); impl IdentityDigest { fn output_size() -> usize { 4096usize } } impl Update for IdentityDigest { fn update(&mut self, data: impl AsRef<[u8]>) { // When we we get the finalized output of another `IdentityDigest`, it's // going to have a ton of trailing zeros. Sometimes Ion Hash actually // does call update with trailing zeros (e.g. float negative zero). So // we have a problem: when should we use the data verbatim and when // should we strip the trailing zeros that are *only there because of // the fixed output trait*? The hack is to strip trailing zeroes if we // get called with something that looks like it came from the result of // another IdentityDigest. Note that `>=` is used here due to escaping // (which ironically, means the output isn't fixed size). let data = data.as_ref(); let actual_update = if data.len() >= IdentityDigest::output_size() { without_trailing_zeros(data) } else { data }; self.0.extend(actual_update); } } impl FixedOutput for IdentityDigest { type OutputSize = U4096; fn finalize_into(self, out: &mut digest::generic_array::GenericArray) { for (i, byte) in self.0.iter().enumerate() { out[i] = *byte; } } fn finalize_into_reset( &mut self, out: &mut digest::generic_array::GenericArray, ) { for (i, byte) in self.0.iter().enumerate() { out[i] = *byte; } self.reset(); } } impl Reset for IdentityDigest { fn reset(&mut self) { self.0.clear(); } } fn without_trailing_zeros(data: &[u8]) -> &[u8] { if data.is_empty() { return data; } let index = data .as_ref() .iter() .rposition(|byte| *byte != 0x00) .unwrap(); &data[0..=index] } const IGNORE_LIST: &[&str] = &[ // Uses md5 (not identity) r#"{Metrics:{'Event.Catchup':[{Value:0,Unit:ms}],'FanoutCache.Time':[{Value:1,Unit:ms}]}}"#, // ion-rust doesn't have full support for unknown symbols yet, so we can't properly read these test cases. // The ion-hash crate, however, does have full support for symbols with unknown text. // See `mod unknown_symbol_text_tests` at the end of this file for equivalent tests. "'$0'", "{'$0':1}", "'$0'::{}", // ion-rust reads ints up to 128 bits "intLength512", ]; fn should_ignore(test_name: &str) -> bool { /// Some tests don't have names (in Ion annotations) and are named after the /// Ion text representation of the case itself. We turn that (name) into a /// String (see `test_case_name`) in a non-deterministic way (e.g. struct /// field ordering is random). This method does some normalization for the /// sake of comparing two test names! fn normalize(name: &str) -> String { let mut chars: Vec<_> = name.chars().filter(|ch| !ch.is_whitespace()).collect(); chars.sort(); String::from_iter(chars.iter()) } let normalized_test_name = normalize(test_name); IGNORE_LIST .iter() .any(|ignore| normalize(ignore) == normalized_test_name) } #[test] fn ion_hash_tests() -> IonHashTestResult<()> { test_file("ion-hash-test/ion_hash_tests.ion") } fn test_file(file_name: &str) -> IonHashTestResult<()> { let data = read(file_name).map_err(IonError::from)?; let mut reader = Reader::new(AnyEncoding, data)?; let mut elems = Vec::new(); while let Some(value) = reader.next()? { // Similar logic to skip test cases with a name on the skip list also appears in the // `test_case` method, but at that point in the execution it's too late to skip files that // cannot be parsed. The logic in `test_case` also handles unnamed test cases. if let Some(annotation) = value.annotations().next() { let test_case_name = annotation .expect("failed reading annotation") .text() .expect("test name without text"); if should_ignore(test_case_name) { println!("skipping: {}", test_case_name); continue; } } let element = Element::try_from(value)?; elems.push(element); } test_all(Sequence::from(elems)) } fn test_all(elems: Sequence) -> IonHashTestResult<()> { let mut failures: Vec = vec![]; for case in &elems { let annotated_test_name = test_case_name_from_annotation(case); let case = case.as_struct().expect("test cases are structs"); let expect = case .get("expect") .expect("test cases have an `expect` value"); // The case example can be text or binary but not both. Text is already // decoded at this point. // // Implementation note: the arms call into `test_case` directly (vs. // returning an `Element`) because of ownership. In the text case, we // already have a decoded, borrowed Element. In the binary case, we // first need to decode one and thus need to store the decoded Element // somewhere. Thus, the two cases have different types: `&E` vs `E`. // While we could clone the text case, it's just as simple to just // inline the call to `test_case` and avoid the clone. match (case.get("ion"), case.get("10n")) { (Some(text), None) => test_case(annotated_test_name, text, expect), (None, Some(binary)) => { // The sexp contains a binary value without the BVM. let value = seq_to_bytes(binary); // TODO: the binary IVM is pub(crate) in ion-rust let mut bytes = vec![0xE0, 0x01, 0x00, 0xEA]; bytes.extend(value); let loaded = Element::read_all(&bytes)?; let elem = loaded .into_iter() .next() .expect("10n test case should have a single element (there were none)"); test_case(annotated_test_name, &elem, expect) } _ => { unreachable!("test case structs must have either an `ion` field or an `10n` field") } } .unwrap_or_else(|e| failures.push(e)); } match failures.len() { 0 => Ok(()), 1 => Err(failures.pop().unwrap()), _ => Err(IonHashTestError::MultipleTestsFailed { causes: failures }), } } /// A single test case. `input` is the input to ion-hash, while `expect` is a /// "program" that we use to validate ion-hash computed the right result. // There are two generics here due to the difference between text and binary // cases. Binary cases need another call to `element_reader` and the calling // function might be generic over `IonElement`. fn test_case( test_case_name: Option, input: &Element, expect: &Element, ) -> IonHashTestResult<()> { let test_case_name = match test_case_name { Some(name) => name, None => input.to_string(), }; if should_ignore(&test_case_name) { println!("skipping: {}", test_case_name); return Ok(()); } // Uncomment me if you're trying to debug a panic! // eprintln!("running: {}", test_case_name); let struct_ = expect.as_struct().expect("`expect` should be a struct"); let expected = expected_hash(struct_)?; let result = IdentityDigest::hash_element(input)?; // Ignore trailing empty bytes caused by the identity digest producing a // variable sized result. Without this, any test failure will write lots of // stuff to your console which can be annoying since it takes forever. let expected_string = format!("{:02x?}", without_trailing_zeros(&expected[..])); let actual_string = format!("{:02x?}", without_trailing_zeros(&result[..])); if expected_string != actual_string { Err(IonHashTestError::TestFailed { test_case_name, message: Some(format!( "expected: {}\nwas: {}", expected_string, actual_string )), }) } else { Ok(()) } } fn expected_hash(struct_: &Struct) -> IonResult> { let identity = if let Some(identity) = struct_.get("identity") { identity.as_sequence().expect("`identity` should be a sexp") } else { todo!("only identity tests are implemented") }; if let Some(expectation) = identity.elements().last() { let method = expectation .annotations() .iter() .next() .expect("identity sexps have one annotation") .text() .expect("identity sexps contain elements with text annotations"); let bytes = seq_to_bytes(expectation); match method { "digest" | "final_digest" => Ok(bytes), _ => panic!("unknown expectation `{}`", method), } } else { panic!("expected at least expectation!") } } fn test_case_name_from_annotation(test_case_ion: &Element) -> Option { test_case_ion .annotations() .iter() .next() .map(|tok| tok.text().unwrap().into()) } /// The test file is full of byte arrays represented as SExps of hex-formatted /// bytes. This method extracts the bytes into a Vec. fn seq_to_bytes(elem: &Element) -> Vec { elem.as_sequence() .expect("expected a sequence") .elements() .map(|it| { it.as_int() .and_then(|i| i.as_i64()) .expect("expected a sequence of bytes") as u8 }) .collect() } type IonHashTestResult = Result; #[derive(Debug, Error)] pub enum IonHashTestError { /// Indicates that an IO error was encountered while reading or writing. #[error("{source:?}")] IonRsError { #[from] source: ion_rs::IonError, }, #[error("FAIL: {test_case_name}: {message:?}")] TestFailed { test_case_name: String, message: Option, }, #[error("ERROR: {test_case_name:?}: {message:?} {cause:?}")] TestError { test_case_name: Option, message: Option, cause: Option>, }, #[error("FAIL: {causes:?}")] MultipleTestsFailed { causes: Vec }, } mod unknown_symbol_text_tests { use super::*; use ion_rs::Symbol; #[test] fn test_unknown_symbol_text() { let unknown_symbol = Element::from(Symbol::unknown_text()); let digest = IdentityDigest::hash_element(&unknown_symbol).unwrap(); let expected_string = format!("{:02x?}", &[0x0b, 0x71, 0x0e]); let actual_string = format!("{:02x?}", without_trailing_zeros(&digest[..])); assert_eq!(expected_string, actual_string) } #[test] fn test_unknown_field_name_text() { let fields_itr = vec![(Symbol::unknown_text(), Element::from(1))]; let struct_ = Struct::from_iter(fields_itr); let element = Element::from(struct_); let digest = IdentityDigest::hash_element(&element).unwrap(); let expected_string = format!( "{:02x?}", &[0x0b, 0xd0, 0x0c, 0x0b, 0x71, 0x0c, 0x0e, 0x0c, 0x0b, 0x20, 0x01, 0x0c, 0x0e, 0x0e] ); let actual_string = format!("{:02x?}", without_trailing_zeros(&digest[..])); assert_eq!(expected_string, actual_string) } #[test] fn test_unknown_annotation_text() { let mut element = Element::read_one("{}".as_bytes()).unwrap(); element = element.with_annotations(vec![Symbol::unknown_text()]); let digest = IdentityDigest::hash_element(&element).unwrap(); let expected_string = format!( "{:02x?}", &[0x0b, 0xe0, 0x0b, 0x71, 0x0e, 0x0b, 0xd0, 0x0e, 0x0e] ); let actual_string = format!("{:02x?}", without_trailing_zeros(&digest[..])); assert_eq!(expected_string, actual_string) } }