// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use icu_normalizer::properties::CanonicalCombiningClassMap; use icu_normalizer::properties::CanonicalCombiningClassMapBorrowed; use icu_normalizer::properties::CanonicalComposition; use icu_normalizer::properties::CanonicalCompositionBorrowed; use icu_normalizer::properties::CanonicalDecomposition; use icu_normalizer::properties::CanonicalDecompositionBorrowed; use icu_normalizer::properties::Decomposed; use icu_normalizer::uts46::Uts46Mapper; use icu_normalizer::uts46::Uts46MapperBorrowed; use icu_normalizer::ComposingNormalizer; use icu_normalizer::ComposingNormalizerBorrowed; use icu_normalizer::DecomposingNormalizer; use icu_normalizer::DecomposingNormalizerBorrowed; #[test] fn test_nfd_basic() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); assert_eq!(normalizer.normalize("ä"), "a\u{0308}"); assert_eq!(normalizer.normalize("Ä"), "A\u{0308}"); assert_eq!(normalizer.normalize("ệ"), "e\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("Ệ"), "E\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width unchanged assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width unchanged assert_eq!(normalizer.normalize("fi"), "fi"); // ligature unchanged assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged assert_eq!(normalizer.normalize("㈎"), "㈎"); // parenthetical unchanged assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfd_owned() { let owned = DecomposingNormalizer::try_new_nfd_unstable(&icu_normalizer::provider::Baked).unwrap(); let normalizer = owned.as_borrowed(); assert_eq!(normalizer.normalize("ä"), "a\u{0308}"); assert_eq!(normalizer.normalize("Ä"), "A\u{0308}"); assert_eq!(normalizer.normalize("ệ"), "e\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("Ệ"), "E\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width unchanged assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width unchanged assert_eq!(normalizer.normalize("fi"), "fi"); // ligature unchanged assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged assert_eq!(normalizer.normalize("㈎"), "㈎"); // parenthetical unchanged assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfkd_basic() { let normalizer = DecomposingNormalizerBorrowed::new_nfkd(); assert_eq!(normalizer.normalize("ä"), "a\u{0308}"); assert_eq!(normalizer.normalize("Ä"), "A\u{0308}"); assert_eq!(normalizer.normalize("ệ"), "e\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("Ệ"), "E\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ヘ\u{3099}"); // half-width to full-width assert_eq!(normalizer.normalize("ペ"), "ヘ\u{309A}"); // half-width to full-width assert_eq!(normalizer.normalize("fi"), "fi"); // ligature expanded assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{635}\u{644}\u{649} \u{627}\u{644}\u{644}\u{647} \u{639}\u{644}\u{64A}\u{647} \u{648}\u{633}\u{644}\u{645}"); // ligature expanded assert_eq!(normalizer.normalize("㈎"), "(\u{1100}\u{1161})"); // parenthetical expanded assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfkd_owned() { let owned = DecomposingNormalizer::try_new_nfkd_unstable(&icu_normalizer::provider::Baked).unwrap(); let normalizer = owned.as_borrowed(); assert_eq!(normalizer.normalize("ä"), "a\u{0308}"); assert_eq!(normalizer.normalize("Ä"), "A\u{0308}"); assert_eq!(normalizer.normalize("ệ"), "e\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("Ệ"), "E\u{0323}\u{0302}"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ヘ\u{3099}"); // half-width to full-width assert_eq!(normalizer.normalize("ペ"), "ヘ\u{309A}"); // half-width to full-width assert_eq!(normalizer.normalize("fi"), "fi"); // ligature expanded assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{635}\u{644}\u{649} \u{627}\u{644}\u{644}\u{647} \u{639}\u{644}\u{64A}\u{647} \u{648}\u{633}\u{644}\u{645}"); // ligature expanded assert_eq!(normalizer.normalize("㈎"), "(\u{1100}\u{1161})"); // parenthetical expanded assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfc_basic() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); assert_eq!(normalizer.normalize("a\u{0308}"), "ä"); assert_eq!(normalizer.normalize("A\u{0308}"), "Ä"); assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "ệ"); assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "Ệ"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); // Composition exclusion assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width unchanged assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width unchanged assert_eq!(normalizer.normalize("fi"), "fi"); // ligature unchanged assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged assert_eq!(normalizer.normalize("㈎"), "㈎"); // parenthetical unchanged assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfc_owned() { let owned = ComposingNormalizer::try_new_nfc_unstable(&icu_normalizer::provider::Baked).unwrap(); let normalizer = owned.as_borrowed(); assert_eq!(normalizer.normalize("a\u{0308}"), "ä"); assert_eq!(normalizer.normalize("A\u{0308}"), "Ä"); assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "ệ"); assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "Ệ"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); // Composition exclusion assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width unchanged assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width unchanged assert_eq!(normalizer.normalize("fi"), "fi"); // ligature unchanged assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged assert_eq!(normalizer.normalize("㈎"), "㈎"); // parenthetical unchanged assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfkc_basic() { let normalizer = ComposingNormalizerBorrowed::new_nfkc(); assert_eq!(normalizer.normalize("a\u{0308}"), "ä"); assert_eq!(normalizer.normalize("A\u{0308}"), "Ä"); assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "ệ"); assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "Ệ"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); // Composition exclusion assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width to full-width, the compose assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width to full-width, the compose assert_eq!(normalizer.normalize("fi"), "fi"); // ligature expanded assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}"); // ligature expanded assert_eq!(normalizer.normalize("㈎"), "(가)"); // parenthetical expanded and partially recomposed assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_nfkc_owned() { let owned = ComposingNormalizer::try_new_nfkc_unstable(&icu_normalizer::provider::Baked).unwrap(); let normalizer = owned.as_borrowed(); assert_eq!(normalizer.normalize("a\u{0308}"), "ä"); assert_eq!(normalizer.normalize("A\u{0308}"), "Ä"); assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "ệ"); assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "Ệ"); assert_eq!(normalizer.normalize("𝅗𝅥"), "𝅗\u{1D165}"); // Composition exclusion assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign assert_eq!(normalizer.normalize("ベ"), "ベ"); // half-width to full-width, the compose assert_eq!(normalizer.normalize("ペ"), "ペ"); // half-width to full-width, the compose assert_eq!(normalizer.normalize("fi"), "fi"); // ligature expanded assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}"); // ligature expanded assert_eq!(normalizer.normalize("㈎"), "(가)"); // parenthetical expanded and partially recomposed assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript } #[test] fn test_uts46_map_normalize() { let mapper = Uts46MapperBorrowed::new(); assert_eq!( mapper .map_normalize("a\u{0308}".chars()) .collect::(), "ä" ); assert_eq!( mapper .map_normalize("A\u{0308}".chars()) .collect::(), "ä" ); assert_eq!( mapper .map_normalize("e\u{0323}\u{0302}".chars()) .collect::(), "ệ" ); assert_eq!( mapper .map_normalize("E\u{0323}\u{0302}".chars()) .collect::(), "ệ" ); assert_eq!( mapper.map_normalize("𝅗𝅥".chars()).collect::(), "𝅗\u{1D165}" ); // Composition exclusion assert_eq!( mapper.map_normalize("\u{2126}".chars()).collect::(), "ω" ); // ohm sign assert_eq!(mapper.map_normalize("ベ".chars()).collect::(), "ベ"); // half-width to full-width, the compose assert_eq!(mapper.map_normalize("ペ".chars()).collect::(), "ペ"); // half-width to full-width, the compose assert_eq!(mapper.map_normalize("fi".chars()).collect::(), "fi"); // ligature expanded assert_eq!(mapper.map_normalize("\u{FDFA}".chars()).collect::(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}"); // ligature expanded assert_eq!( mapper.map_normalize("㈎".chars()).collect::(), "(가)" ); // parenthetical expanded and partially recomposed // Deviations (UTS 46, 6 Mapping Table Derivation, Step 4) assert_eq!( mapper.map_normalize("\u{200C}".chars()).collect::(), "\u{200C}" ); assert_eq!( mapper.map_normalize("\u{200D}".chars()).collect::(), "\u{200D}" ); assert_eq!(mapper.map_normalize("ß".chars()).collect::(), "ß"); assert_eq!(mapper.map_normalize("ς".chars()).collect::(), "ς"); // Iota subscript assert_eq!( mapper.map_normalize("\u{0345}".chars()).collect::(), "ι" ); // Disallowed assert_eq!( mapper.map_normalize("\u{061C}".chars()).collect::(), "\u{FFFD}" ); // Ignored assert_eq!( mapper .map_normalize("a\u{180B}b".chars()) .collect::(), "ab" ); } #[test] fn test_uts46_owned() { let owned = Uts46Mapper::try_new(&icu_normalizer::provider::Baked).unwrap(); let mapper = owned.as_borrowed(); assert_eq!( mapper .map_normalize("a\u{0308}".chars()) .collect::(), "ä" ); assert_eq!( mapper .map_normalize("A\u{0308}".chars()) .collect::(), "ä" ); assert_eq!( mapper .map_normalize("e\u{0323}\u{0302}".chars()) .collect::(), "ệ" ); assert_eq!( mapper .map_normalize("E\u{0323}\u{0302}".chars()) .collect::(), "ệ" ); assert_eq!( mapper.map_normalize("𝅗𝅥".chars()).collect::(), "𝅗\u{1D165}" ); // Composition exclusion assert_eq!( mapper.map_normalize("\u{2126}".chars()).collect::(), "ω" ); // ohm sign assert_eq!(mapper.map_normalize("ベ".chars()).collect::(), "ベ"); // half-width to full-width, the compose assert_eq!(mapper.map_normalize("ペ".chars()).collect::(), "ペ"); // half-width to full-width, the compose assert_eq!(mapper.map_normalize("fi".chars()).collect::(), "fi"); // ligature expanded assert_eq!(mapper.map_normalize("\u{FDFA}".chars()).collect::(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}"); // ligature expanded assert_eq!( mapper.map_normalize("㈎".chars()).collect::(), "(가)" ); // parenthetical expanded and partially recomposed // Deviations (UTS 46, 6 Mapping Table Derivation, Step 4) assert_eq!( mapper.map_normalize("\u{200C}".chars()).collect::(), "\u{200C}" ); assert_eq!( mapper.map_normalize("\u{200D}".chars()).collect::(), "\u{200D}" ); assert_eq!(mapper.map_normalize("ß".chars()).collect::(), "ß"); assert_eq!(mapper.map_normalize("ς".chars()).collect::(), "ς"); // Iota subscript assert_eq!( mapper.map_normalize("\u{0345}".chars()).collect::(), "ι" ); // Disallowed assert_eq!( mapper.map_normalize("\u{061C}".chars()).collect::(), "\u{FFFD}" ); // Ignored assert_eq!( mapper .map_normalize("a\u{180B}b".chars()) .collect::(), "ab" ); } #[test] fn test_uts46_normalize_validate() { let mapper = Uts46MapperBorrowed::new(); assert_eq!( mapper .normalize_validate("a\u{0308}".chars()) .collect::(), "ä" ); assert_eq!( mapper .normalize_validate("A\u{0308}".chars()) .collect::(), "ä" ); assert_eq!( mapper .normalize_validate("e\u{0323}\u{0302}".chars()) .collect::(), "ệ" ); assert_eq!( mapper .normalize_validate("E\u{0323}\u{0302}".chars()) .collect::(), "ệ" ); assert_eq!( mapper.normalize_validate("𝅗𝅥".chars()).collect::(), "𝅗\u{1D165}" ); // Composition exclusion assert_eq!( mapper .normalize_validate("\u{2126}".chars()) .collect::(), "ω" ); // ohm sign assert_eq!( mapper.normalize_validate("ベ".chars()).collect::(), "ベ" ); // half-width to full-width, the compose assert_eq!( mapper.normalize_validate("ペ".chars()).collect::(), "ペ" ); // half-width to full-width, the compose assert_eq!( mapper.normalize_validate("fi".chars()).collect::(), "fi" ); // ligature expanded assert_eq!(mapper.normalize_validate("\u{FDFA}".chars()).collect::(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}"); // ligature expanded assert_eq!( mapper.normalize_validate("㈎".chars()).collect::(), "(가)" ); // parenthetical expanded and partially recomposed // Deviations (UTS 46, 6 Mapping Table Derivation, Step 4) assert_eq!( mapper .normalize_validate("\u{200C}".chars()) .collect::(), "\u{200C}" ); assert_eq!( mapper .normalize_validate("\u{200D}".chars()) .collect::(), "\u{200D}" ); assert_eq!( mapper.normalize_validate("ß".chars()).collect::(), "ß" ); assert_eq!( mapper.normalize_validate("ς".chars()).collect::(), "ς" ); // Iota subscript assert_eq!( mapper .normalize_validate("\u{0345}".chars()) .collect::(), "ι" ); // Disallowed assert_eq!( mapper .normalize_validate("\u{061C}".chars()) .collect::(), "\u{FFFD}" ); // Ignored assert_eq!( mapper .normalize_validate("a\u{180B}b".chars()) .collect::(), "a\u{FFFD}b" ); } type StackString = arraystring::ArrayString; #[test] fn test_nfd_str_to() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); let mut buf = StackString::new(); assert!(normalizer.normalize_to("ä", &mut buf).is_ok()); assert_eq!(&buf, "a\u{0308}"); buf.clear(); assert!(normalizer.normalize_to("ệ", &mut buf).is_ok()); assert_eq!(&buf, "e\u{0323}\u{0302}"); } #[test] fn test_nfd_utf8_to() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); let mut buf = StackString::new(); assert!(normalizer .normalize_utf8_to("ä".as_bytes(), &mut buf) .is_ok()); assert_eq!(&buf, "a\u{0308}"); buf.clear(); assert!(normalizer .normalize_utf8_to("ệ".as_bytes(), &mut buf) .is_ok()); assert_eq!(&buf, "e\u{0323}\u{0302}"); } type StackVec = arrayvec::ArrayVec; #[test] fn test_nfd_utf16_to() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); let mut buf = StackVec::new(); assert!(normalizer .normalize_utf16_to([0x00E4u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0x0061u16, 0x0308u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to([0x1EC7u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0x0065u16, 0x0323u16, 0x0302u16].as_slice()); } #[test] fn test_nfc_str_to() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); let mut buf = StackString::new(); assert!(normalizer.normalize_to("a\u{0308}", &mut buf).is_ok()); assert_eq!(&buf, "ä"); buf.clear(); assert!(normalizer .normalize_to("e\u{0323}\u{0302}", &mut buf) .is_ok()); assert_eq!(&buf, "ệ"); } #[test] fn test_nfc_utf8_to() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); let mut buf = StackString::new(); assert!(normalizer .normalize_utf8_to("a\u{0308}".as_bytes(), &mut buf) .is_ok()); assert_eq!(&buf, "ä"); buf.clear(); assert!(normalizer .normalize_utf8_to("e\u{0323}\u{0302}".as_bytes(), &mut buf) .is_ok()); assert_eq!(&buf, "ệ"); } #[test] fn test_nfc_utf16_to() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); let mut buf = StackVec::new(); assert!(normalizer .normalize_utf16_to([0x0061u16, 0x0308u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0x00E4u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to([0x0065u16, 0x0323u16, 0x0302u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0x1EC7u16].as_slice()); } #[test] fn test_nfc_utf8_to_errors() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); let mut buf = StackString::new(); assert!(normalizer .normalize_utf8_to(b"\xFFa\xCC\x88\xFF", &mut buf) .is_ok()); assert_eq!(&buf, "\u{FFFD}ä\u{FFFD}"); buf.clear(); assert!(normalizer .normalize_utf8_to(b"\x80e\xCC\xA3\xCC\x82\x80", &mut buf) .is_ok()); assert_eq!(&buf, "\u{FFFD}ệ\u{FFFD}"); buf.clear(); assert!(normalizer .normalize_utf8_to(b"aaa\xFFaaa\xFFaaa", &mut buf) .is_ok()); assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa"); buf.clear(); assert!(normalizer .normalize_utf8_to(b"aaa\xE2\x98aaa\xE2\x98aaa", &mut buf) .is_ok()); assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa"); } #[test] fn test_nfd_utf8_to_errors() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); let mut buf = StackString::new(); assert!(normalizer .normalize_utf8_to(b"\xFF\xC3\xA4\xFF", &mut buf) .is_ok()); assert_eq!(&buf, "\u{FFFD}a\u{0308}\u{FFFD}"); buf.clear(); assert!(normalizer .normalize_utf8_to(b"\x80\xE1\xBB\x87\x80", &mut buf) .is_ok()); assert_eq!(&buf, "\u{FFFD}e\u{0323}\u{0302}\u{FFFD}"); buf.clear(); assert!(normalizer .normalize_utf8_to(b"aaa\xFFaaa\xFFaaa", &mut buf) .is_ok()); assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa"); buf.clear(); assert!(normalizer .normalize_utf8_to(b"aaa\xE2\x98aaa\xE2\x98aaa", &mut buf) .is_ok()); assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa"); } #[test] fn test_nfc_utf16_to_errors() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); let mut buf = StackVec::new(); assert!(normalizer .normalize_utf16_to([0xD800u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0xFFFDu16, 0x00E4u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to([0xDC00u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0xFFFDu16, 0x00E4u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf ) .is_ok()); assert_eq!(&buf, [0x0061u16, 0xFFFDu16, 0x00E4u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf ) .is_ok()); assert_eq!(&buf, [0x0061u16, 0xFFFDu16, 0x00E4u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x0061u16, 0x0308u16, 0xD800u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x00E4u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x0061u16, 0x0308u16, 0xDC00u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x00E4u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x0061u16, 0x0061u16, 0xD800u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x0061u16, 0x0061u16, 0xDC00u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x0308u16, 0xD800u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x0308u16, 0xDC00u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice() ); } #[test] fn test_nfd_utf16_to_errors() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); let mut buf = StackVec::new(); assert!(normalizer .normalize_utf16_to([0xD800u16, 0x00E4u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0xFFFDu16, 0x0061u16, 0x0308u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to([0xDC00u16, 0x00E4u16].as_slice(), &mut buf) .is_ok()); assert_eq!(&buf, [0xFFFDu16, 0x0061u16, 0x0308u16].as_slice()); buf.clear(); assert!(normalizer .normalize_utf16_to([0x0061u16, 0xD800u16, 0x00E4u16].as_slice(), &mut buf) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to([0x0061u16, 0xDC00u16, 0x00E4u16].as_slice(), &mut buf) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x00E4u16, 0xD800u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x00E4u16, 0xDC00u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x0061u16, 0x0061u16, 0xD800u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x0061u16, 0x0061u16, 0xDC00u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xD800u16, 0x0308u16, 0xD800u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice() ); buf.clear(); assert!(normalizer .normalize_utf16_to( [0x0061u16, 0xDC00u16, 0x0308u16, 0xDC00u16].as_slice(), &mut buf ) .is_ok()); assert_eq!( &buf, [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice() ); } use atoi::FromRadix16; use icu_properties::props::CanonicalCombiningClass; /// Parse five semicolon-terminated strings consisting of space-separated hexadecimal scalar values fn parse_hex(mut hexes: &[u8]) -> [StackString; 5] { let mut strings = [ StackString::new(), StackString::new(), StackString::new(), StackString::new(), StackString::new(), ]; let mut current = 0; loop { let (scalar, mut offset) = u32::from_radix_16(hexes); let c = core::char::from_u32(scalar).unwrap(); strings[current].try_push(c).unwrap(); match hexes[offset] { b';' => { current += 1; if current == strings.len() { return strings; } offset += 1; } b' ' => { offset += 1; } _ => { panic!("Bad format: Garbage"); } } hexes = &hexes[offset..]; } } #[test] fn test_conformance() { let nfd = DecomposingNormalizerBorrowed::new_nfd(); let nfkd = DecomposingNormalizerBorrowed::new_nfkd(); let nfc = ComposingNormalizerBorrowed::new_nfc(); let nfkc = ComposingNormalizerBorrowed::new_nfkc(); let mut prev = 0u32; let mut part = 0u8; let data = include_bytes!("data/NormalizationTest.txt"); let lines = data.split(|b| b == &b'\n'); for line in lines { if line.is_empty() { continue; } if line.starts_with(b"#") { continue; } if line.starts_with(&b"@Part"[..]) { part = line[5] - b'0'; if part == 2 { for u in prev + 1..=0x10FFFF { if let Some(c) = char::from_u32(u) { assert!(nfd .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); assert!(nfkd .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); assert!(nfc .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); assert!(nfkc .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); } } } continue; } let strings = parse_hex(line); // 0: source // 1: NFC // 2: NFD // 3: NFKC // 4: NFKD if part == 1 { let mut iter = strings[0].chars(); let current = iter.next().unwrap(); assert_eq!(iter.next(), None); let current_u = u32::from(current); for u in prev + 1..current_u { if let Some(c) = char::from_u32(u) { assert!(nfd .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); assert!(nfkd .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); assert!(nfc .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); assert!(nfkc .normalize_iter(core::iter::once(c)) .eq(core::iter::once(c))); } } prev = current_u; } // NFC assert!(nfc .normalize_iter(strings[0].chars()) .eq(strings[1].chars())); assert!(nfc .normalize_iter(strings[1].chars()) .eq(strings[1].chars())); assert!(nfc .normalize_iter(strings[2].chars()) .eq(strings[1].chars())); assert!(nfc .normalize_iter(strings[3].chars()) .eq(strings[3].chars())); assert!(nfc .normalize_iter(strings[4].chars()) .eq(strings[3].chars())); // NFD assert!(nfd .normalize_iter(strings[0].chars()) .eq(strings[2].chars())); assert!(nfd .normalize_iter(strings[1].chars()) .eq(strings[2].chars())); assert!(nfd .normalize_iter(strings[2].chars()) .eq(strings[2].chars())); assert!(nfd .normalize_iter(strings[3].chars()) .eq(strings[4].chars())); assert!(nfd .normalize_iter(strings[4].chars()) .eq(strings[4].chars())); // NFKC assert!(nfkc .normalize_iter(strings[0].chars()) .eq(strings[3].chars())); assert!(nfkc .normalize_iter(strings[1].chars()) .eq(strings[3].chars())); assert!(nfkc .normalize_iter(strings[2].chars()) .eq(strings[3].chars())); assert!(nfkc .normalize_iter(strings[3].chars()) .eq(strings[3].chars())); assert!(nfkc .normalize_iter(strings[4].chars()) .eq(strings[3].chars())); // NFKD assert!(nfkd .normalize_iter(strings[0].chars()) .eq(strings[4].chars())); assert!(nfkd .normalize_iter(strings[1].chars()) .eq(strings[4].chars())); assert!(nfkd .normalize_iter(strings[2].chars()) .eq(strings[4].chars())); assert!(nfkd .normalize_iter(strings[3].chars()) .eq(strings[4].chars())); assert!(nfkd .normalize_iter(strings[4].chars()) .eq(strings[4].chars())); } } // Commented out, because we don't currently have a way to force a no-op set for testing. // #[test] // fn test_hangul() { // use icu_collections::codepointinvlist::{CodePointSet, CodePointSetBuilder}; // use zerofrom::ZeroFrom; // let builder = CodePointSetBuilder::new(); // let set: CodePointSet = builder.build(); // let normalizer: ComposingNormalizer = ComposingNormalizerBorrowed::new_nfc(); // { // let mut norm_iter = normalizer.normalize_iter("A\u{AC00}\u{11A7}".chars()); // // Pessimize passthrough to avoid hiding bugs. // norm_iter // .decomposition // .potential_passthrough_and_not_backward_combining = Some(ZeroFrom::zero_from(&set)); // assert!(norm_iter.eq("A\u{AC00}\u{11A7}".chars())); // } // { // let mut norm_iter = normalizer.normalize_iter("A\u{AC00}\u{11C2}".chars()); // // Pessimize passthrough to avoid hiding bugs. // norm_iter // .decomposition // .potential_passthrough_and_not_backward_combining = Some(ZeroFrom::zero_from(&set)); // assert!(norm_iter.eq("A\u{AC1B}".chars())); // } // } fn str_to_utf16(s: &str, sink: &mut StackVec) { sink.clear(); let mut buf = [0u16; 2]; for c in s.chars() { sink.try_extend_from_slice(c.encode_utf16(&mut buf)) .unwrap(); } } fn char_to_utf16(c: char, sink: &mut StackVec) { sink.clear(); let mut buf = [0u16; 2]; sink.try_extend_from_slice(c.encode_utf16(&mut buf)) .unwrap(); } fn str_to_str(s: &str, sink: &mut StackString) { sink.clear(); sink.try_push_str(s).unwrap(); } fn char_to_str(c: char, sink: &mut StackString) { sink.clear(); sink.try_push(c).unwrap(); } #[test] fn test_conformance_utf16() { let nfd = DecomposingNormalizerBorrowed::new_nfd(); let nfkd = DecomposingNormalizerBorrowed::new_nfkd(); let nfc = ComposingNormalizerBorrowed::new_nfc(); let nfkc = ComposingNormalizerBorrowed::new_nfkc(); let mut input = StackVec::new(); let mut normalized = StackVec::new(); let mut expected = StackVec::new(); let mut prev = 0u32; let mut part = 0u8; let data = include_bytes!("data/NormalizationTest.txt"); let lines = data.split(|b| b == &b'\n'); for line in lines { if line.is_empty() { continue; } if line.starts_with(b"#") { continue; } if line.starts_with(&b"@Part"[..]) { part = line[5] - b'0'; if part == 2 { for u in prev + 1..=0x10FFFF { if let Some(c) = char::from_u32(u) { normalized.clear(); char_to_utf16(c, &mut input); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_utf16(c, &mut input); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_utf16(c, &mut input); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_utf16(c, &mut input); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); } } } continue; } let strings = parse_hex(line); // 0: source // 1: NFC // 2: NFD // 3: NFKC // 4: NFKD if part == 1 { let mut iter = strings[0].chars(); let current = iter.next().unwrap(); assert_eq!(iter.next(), None); let current_u = u32::from(current); for u in prev + 1..current_u { if let Some(c) = char::from_u32(u) { normalized.clear(); char_to_utf16(c, &mut input); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_utf16(c, &mut input); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_utf16(c, &mut input); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_utf16(c, &mut input); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &input); } } prev = current_u; } // NFC normalized.clear(); str_to_utf16(&strings[0], &mut input); str_to_utf16(&strings[1], &mut expected); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[1], &mut input); str_to_utf16(&strings[1], &mut expected); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[2], &mut input); str_to_utf16(&strings[1], &mut expected); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[3], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[4], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); // NFD normalized.clear(); str_to_utf16(&strings[0], &mut input); str_to_utf16(&strings[2], &mut expected); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[1], &mut input); str_to_utf16(&strings[2], &mut expected); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[2], &mut input); str_to_utf16(&strings[2], &mut expected); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[3], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[4], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); // NFKC normalized.clear(); str_to_utf16(&strings[0], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[1], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[2], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[3], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[4], &mut input); str_to_utf16(&strings[3], &mut expected); assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); // NFKD normalized.clear(); str_to_utf16(&strings[0], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[1], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[2], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[3], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_utf16(&strings[4], &mut input); str_to_utf16(&strings[4], &mut expected); assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok()); assert_eq!(&normalized, &expected); } } #[test] fn test_conformance_utf8() { let nfd = DecomposingNormalizerBorrowed::new_nfd(); let nfkd = DecomposingNormalizerBorrowed::new_nfkd(); let nfc = ComposingNormalizerBorrowed::new_nfc(); let nfkc = ComposingNormalizerBorrowed::new_nfkc(); let mut input = StackString::new(); let mut normalized = StackString::new(); let mut expected = StackString::new(); let mut prev = 0u32; let mut part = 0u8; let data = include_bytes!("data/NormalizationTest.txt"); let lines = data.split(|b| b == &b'\n'); for line in lines { if line.is_empty() { continue; } if line.starts_with(b"#") { continue; } if line.starts_with(&b"@Part"[..]) { part = line[5] - b'0'; if part == 2 { for u in prev + 1..=0x10FFFF { if let Some(c) = char::from_u32(u) { normalized.clear(); char_to_str(c, &mut input); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_str(c, &mut input); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_str(c, &mut input); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_str(c, &mut input); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); } } } continue; } let strings = parse_hex(line); // 0: source // 1: NFC // 2: NFD // 3: NFKC // 4: NFKD if part == 1 { let mut iter = strings[0].chars(); let current = iter.next().unwrap(); assert_eq!(iter.next(), None); let current_u = u32::from(current); for u in prev + 1..current_u { if let Some(c) = char::from_u32(u) { normalized.clear(); char_to_str(c, &mut input); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_str(c, &mut input); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_str(c, &mut input); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); normalized.clear(); char_to_str(c, &mut input); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &input); } } prev = current_u; } // NFC normalized.clear(); str_to_str(&strings[0], &mut input); str_to_str(&strings[1], &mut expected); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[1], &mut input); str_to_str(&strings[1], &mut expected); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[2], &mut input); str_to_str(&strings[1], &mut expected); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[3], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[4], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); // NFD normalized.clear(); str_to_str(&strings[0], &mut input); str_to_str(&strings[2], &mut expected); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[1], &mut input); str_to_str(&strings[2], &mut expected); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[2], &mut input); str_to_str(&strings[2], &mut expected); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[3], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[4], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); // NFKC normalized.clear(); str_to_str(&strings[0], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[1], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[2], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[3], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[4], &mut input); str_to_str(&strings[3], &mut expected); assert!(nfkc .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); // NFKD normalized.clear(); str_to_str(&strings[0], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[1], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[2], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[3], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); normalized.clear(); str_to_str(&strings[4], &mut input); str_to_str(&strings[4], &mut expected); assert!(nfkd .normalize_utf8_to(input.as_bytes(), &mut normalized) .is_ok()); assert_eq!(&normalized, &expected); } } #[test] fn test_canonical_composition() { let comp = CanonicalCompositionBorrowed::new(); assert_eq!(comp.compose('a', 'b'), None); // Just two starters assert_eq!(comp.compose('a', '\u{0308}'), Some('ä')); assert_eq!(comp.compose('A', '\u{0308}'), Some('Ä')); assert_eq!(comp.compose('ẹ', '\u{0302}'), Some('ệ')); assert_eq!(comp.compose('Ẹ', '\u{0302}'), Some('Ệ')); assert_eq!(comp.compose('\u{1D157}', '\u{1D165}'), None); // Composition exclusion assert_eq!(comp.compose('ে', 'া'), Some('ো')); // Second is starter; BMP assert_eq!(comp.compose('𑄱', '𑄧'), Some('𑄮')); // Second is starter; non-BMP assert_eq!(comp.compose('ᄀ', 'ᅡ'), Some('가')); // Hangul LV assert_eq!(comp.compose('가', 'ᆨ'), Some('각')); // Hangul LVT } #[test] fn test_canonical_composition_owned() { let owned = CanonicalComposition::try_new_unstable(&icu_normalizer::provider::Baked).unwrap(); let comp = owned.as_borrowed(); assert_eq!(comp.compose('a', 'b'), None); // Just two starters assert_eq!(comp.compose('a', '\u{0308}'), Some('ä')); assert_eq!(comp.compose('A', '\u{0308}'), Some('Ä')); assert_eq!(comp.compose('ẹ', '\u{0302}'), Some('ệ')); assert_eq!(comp.compose('Ẹ', '\u{0302}'), Some('Ệ')); assert_eq!(comp.compose('\u{1D157}', '\u{1D165}'), None); // Composition exclusion assert_eq!(comp.compose('ে', 'া'), Some('ো')); // Second is starter; BMP assert_eq!(comp.compose('𑄱', '𑄧'), Some('𑄮')); // Second is starter; non-BMP assert_eq!(comp.compose('ᄀ', 'ᅡ'), Some('가')); // Hangul LV assert_eq!(comp.compose('가', 'ᆨ'), Some('각')); // Hangul LVT } #[test] fn test_canonical_decomposition() { let decomp = CanonicalDecompositionBorrowed::new(); assert_eq!( decomp.decompose('ä'), Decomposed::Expansion('a', '\u{0308}') ); assert_eq!( decomp.decompose('Ä'), Decomposed::Expansion('A', '\u{0308}') ); assert_eq!( decomp.decompose('ệ'), Decomposed::Expansion('ẹ', '\u{0302}') ); assert_eq!( decomp.decompose('Ệ'), Decomposed::Expansion('Ẹ', '\u{0302}') ); assert_eq!( decomp.decompose('\u{1D15E}'), Decomposed::Expansion('\u{1D157}', '\u{1D165}') ); assert_eq!(decomp.decompose('ো'), Decomposed::Expansion('ে', 'া')); assert_eq!(decomp.decompose('𑄮'), Decomposed::Expansion('𑄱', '𑄧')); assert_eq!(decomp.decompose('가'), Decomposed::Expansion('ᄀ', 'ᅡ')); assert_eq!(decomp.decompose('각'), Decomposed::Expansion('가', 'ᆨ')); assert_eq!(decomp.decompose('\u{212B}'), Decomposed::Singleton('Å')); // ANGSTROM SIGN assert_eq!(decomp.decompose('\u{2126}'), Decomposed::Singleton('Ω')); // OHM SIGN assert_eq!(decomp.decompose('\u{1F71}'), Decomposed::Singleton('ά')); // oxia assert_eq!( decomp.decompose('\u{1F72}'), Decomposed::Expansion('ε', '\u{0300}') ); // not oxia but in the oxia range assert_eq!( decomp.decompose('ά'), Decomposed::Expansion('α', '\u{0301}') ); // tonos } #[test] fn test_canonical_decomposition_owned() { let owned = CanonicalDecomposition::try_new_unstable(&icu_normalizer::provider::Baked).unwrap(); let decomp = owned.as_borrowed(); assert_eq!( decomp.decompose('ä'), Decomposed::Expansion('a', '\u{0308}') ); assert_eq!( decomp.decompose('Ä'), Decomposed::Expansion('A', '\u{0308}') ); assert_eq!( decomp.decompose('ệ'), Decomposed::Expansion('ẹ', '\u{0302}') ); assert_eq!( decomp.decompose('Ệ'), Decomposed::Expansion('Ẹ', '\u{0302}') ); assert_eq!( decomp.decompose('\u{1D15E}'), Decomposed::Expansion('\u{1D157}', '\u{1D165}') ); assert_eq!(decomp.decompose('ো'), Decomposed::Expansion('ে', 'া')); assert_eq!(decomp.decompose('𑄮'), Decomposed::Expansion('𑄱', '𑄧')); assert_eq!(decomp.decompose('가'), Decomposed::Expansion('ᄀ', 'ᅡ')); assert_eq!(decomp.decompose('각'), Decomposed::Expansion('가', 'ᆨ')); assert_eq!(decomp.decompose('\u{212B}'), Decomposed::Singleton('Å')); // ANGSTROM SIGN assert_eq!(decomp.decompose('\u{2126}'), Decomposed::Singleton('Ω')); // OHM SIGN assert_eq!(decomp.decompose('\u{1F71}'), Decomposed::Singleton('ά')); // oxia assert_eq!( decomp.decompose('\u{1F72}'), Decomposed::Expansion('ε', '\u{0300}') ); // not oxia but in the oxia range assert_eq!( decomp.decompose('ά'), Decomposed::Expansion('α', '\u{0301}') ); // tonos } #[test] fn test_ccc() { let map = CanonicalCombiningClassMapBorrowed::new(); for u in 0..=0x10FFFF { assert_eq!( map.get32(u), icu_properties::CodePointMapData::::new().get32(u) ); } } #[test] fn test_ccc_owned() { let owned = CanonicalCombiningClassMap::try_new_unstable(&icu_normalizer::provider::Baked).unwrap(); let map = owned.as_borrowed(); for u in 0..=0x10FFFF { assert_eq!( map.get32(u), icu_properties::CodePointMapData::::new().get32(u) ); } } #[test] fn test_utf16_basic() { let normalizer = ComposingNormalizerBorrowed::new_nfc(); assert_eq!( normalizer.normalize_utf16(&[0x0061]).as_slice(), [0x0061].as_slice() ); assert_eq!( normalizer.normalize_utf16(&[0x0300, 0x0323]).as_slice(), [0x0323, 0x0300].as_slice() ); } #[test] fn test_accented_digraph() { let normalizer = DecomposingNormalizerBorrowed::new_nfkd(); assert_eq!( normalizer.normalize("\u{01C4}\u{0323}"), "DZ\u{0323}\u{030C}" ); assert_eq!( normalizer.normalize("DZ\u{030C}\u{0323}"), "DZ\u{0323}\u{030C}" ); } #[test] fn test_ddd() { let normalizer = DecomposingNormalizerBorrowed::new_nfd(); assert_eq!( normalizer.normalize("\u{0DDD}\u{0334}"), "\u{0DD9}\u{0DCF}\u{0334}\u{0DCA}" ); } #[test] fn test_is_normalized() { let nfd = DecomposingNormalizerBorrowed::new_nfd(); let nfkd = DecomposingNormalizerBorrowed::new_nfkd(); let nfc = ComposingNormalizerBorrowed::new_nfc(); let nfkc = ComposingNormalizerBorrowed::new_nfkc(); let aaa = "aaa"; assert!(nfd.is_normalized(aaa)); assert!(nfkd.is_normalized(aaa)); assert!(nfc.is_normalized(aaa)); assert!(nfkc.is_normalized(aaa)); assert!(nfd.is_normalized_utf8(aaa.as_bytes())); assert!(nfkd.is_normalized_utf8(aaa.as_bytes())); assert!(nfc.is_normalized_utf8(aaa.as_bytes())); assert!(nfkc.is_normalized_utf8(aaa.as_bytes())); let aaa16 = [0x0061u16, 0x0061u16, 0x0061u16].as_slice(); assert!(nfd.is_normalized_utf16(aaa16)); assert!(nfkd.is_normalized_utf16(aaa16)); assert!(nfc.is_normalized_utf16(aaa16)); assert!(nfkc.is_normalized_utf16(aaa16)); let affa = b"a\xFFa"; assert!(nfd.is_normalized_utf8(affa)); assert!(nfkd.is_normalized_utf8(affa)); assert!(nfc.is_normalized_utf8(affa)); assert!(nfkc.is_normalized_utf8(affa)); let a_surrogate_a = [0x0061u16, 0xD800u16, 0x0061u16].as_slice(); assert!(nfd.is_normalized_utf16(a_surrogate_a)); assert!(nfkd.is_normalized_utf16(a_surrogate_a)); assert!(nfc.is_normalized_utf16(a_surrogate_a)); assert!(nfkc.is_normalized_utf16(a_surrogate_a)); let note = "a𝅗\u{1D165}a"; assert!(nfd.is_normalized(note)); assert!(nfkd.is_normalized(note)); assert!(nfc.is_normalized(note)); assert!(nfkc.is_normalized(note)); assert!(nfd.is_normalized_utf8(note.as_bytes())); assert!(nfkd.is_normalized_utf8(note.as_bytes())); assert!(nfc.is_normalized_utf8(note.as_bytes())); assert!(nfkc.is_normalized_utf8(note.as_bytes())); let note16 = [ 0x0061u16, 0xD834u16, 0xDD57u16, 0xD834u16, 0xDD65u16, 0x0061u16, ] .as_slice(); assert!(nfd.is_normalized_utf16(note16)); assert!(nfkd.is_normalized_utf16(note16)); assert!(nfc.is_normalized_utf16(note16)); assert!(nfkc.is_normalized_utf16(note16)); let umlaut = "aäa"; assert!(!nfd.is_normalized(umlaut)); assert!(!nfkd.is_normalized(umlaut)); assert!(nfc.is_normalized(umlaut)); assert!(nfkc.is_normalized(umlaut)); assert!(!nfd.is_normalized_utf8(umlaut.as_bytes())); assert!(!nfkd.is_normalized_utf8(umlaut.as_bytes())); assert!(nfc.is_normalized_utf8(umlaut.as_bytes())); assert!(nfkc.is_normalized_utf8(umlaut.as_bytes())); let umlaut16 = [0x0061u16, 0x00E4u16, 0x0061u16].as_slice(); assert!(!nfd.is_normalized_utf16(umlaut16)); assert!(!nfkd.is_normalized_utf16(umlaut16)); assert!(nfc.is_normalized_utf16(umlaut16)); assert!(nfkc.is_normalized_utf16(umlaut16)); let fraction = "a½a"; assert!(nfd.is_normalized(fraction)); assert!(!nfkd.is_normalized(fraction)); assert!(nfc.is_normalized(fraction)); assert!(!nfkc.is_normalized(fraction)); assert!(nfd.is_normalized_utf8(fraction.as_bytes())); assert!(!nfkd.is_normalized_utf8(fraction.as_bytes())); assert!(nfc.is_normalized_utf8(fraction.as_bytes())); assert!(!nfkc.is_normalized_utf8(fraction.as_bytes())); let fraction16 = [0x0061u16, 0x00BDu16, 0x0061u16].as_slice(); assert!(nfd.is_normalized_utf16(fraction16)); assert!(!nfkd.is_normalized_utf16(fraction16)); assert!(nfc.is_normalized_utf16(fraction16)); assert!(!nfkc.is_normalized_utf16(fraction16)); } #[test] fn test_is_normalized_up_to() { let nfd = DecomposingNormalizerBorrowed::new_nfd(); let nfkd = DecomposingNormalizerBorrowed::new_nfkd(); let nfc = ComposingNormalizerBorrowed::new_nfc(); let nfkc = ComposingNormalizerBorrowed::new_nfkc(); // Check a string slice is normalized up to where is_normalized_up_to reports let check_str = |input: &str| { // Check nfd let up_to = nfd.is_normalized_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from(head); let _ = nfd.normalize_to(tail, &mut normalized); assert!(nfd.is_normalized(&normalized)); // Check nfkd let up_to = nfkd.is_normalized_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from(head); let _ = nfkd.normalize_to(tail, &mut normalized); assert!(nfkd.is_normalized(&normalized)); // Check nfc let up_to = nfc.is_normalized_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from(head); let _ = nfc.normalize_to(tail, &mut normalized); assert!(nfc.is_normalized(&normalized)); // Check nfkc let up_to = nfkc.is_normalized_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from(head); let _ = nfkc.normalize_to(tail, &mut normalized); assert!(nfkc.is_normalized(&normalized)); }; // Check a string of UTF8 bytes is normalized up to where is_normalized_up_to reports // note: from_utf8 can panic with invalid UTF8 input let check_utf8 = |input: &[u8]| { // Check nfd let up_to = nfd.is_normalized_utf8_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from_utf8(head.to_vec()).unwrap(); let _ = nfd.normalize_utf8_to(tail, &mut normalized); assert!(nfd.is_normalized(&normalized)); // Check nfkd let up_to = nfkd.is_normalized_utf8_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from_utf8(head.to_vec()).unwrap(); let _ = nfkd.normalize_utf8_to(tail, &mut normalized); assert!(nfkd.is_normalized(&normalized)); // Check nfc let up_to = nfc.is_normalized_utf8_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from_utf8(head.to_vec()).unwrap(); let _ = nfc.normalize_utf8_to(tail, &mut normalized); assert!(nfc.is_normalized(&normalized)); // Check nfkc let up_to = nfkc.is_normalized_utf8_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = String::from_utf8(head.to_vec()).unwrap(); let _ = nfkc.normalize_utf8_to(tail, &mut normalized); assert!(nfkc.is_normalized(&normalized)); }; // Check a string of UTF-16 code units is normalized up to where is_normalized_up_to reports let check_utf16 = |input: &[u16]| { // Check nfd let up_to = nfd.is_normalized_utf16_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = head.to_vec(); let _ = nfd.normalize_utf16_to(tail, &mut normalized); assert!(nfd.is_normalized_utf16(&normalized)); // Check nfkd let up_to = nfkd.is_normalized_utf16_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = head.to_vec(); let _ = nfkd.normalize_utf16_to(tail, &mut normalized); assert!(nfkd.is_normalized_utf16(&normalized)); // Check nfc let up_to = nfc.is_normalized_utf16_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = head.to_vec(); let _ = nfc.normalize_utf16_to(tail, &mut normalized); assert!(nfc.is_normalized_utf16(&normalized)); // Check nfkc let up_to = nfkc.is_normalized_utf16_up_to(input); let (head, tail) = input.split_at(up_to); let mut normalized = head.to_vec(); let _ = nfkc.normalize_utf16_to(tail, &mut normalized); assert!(nfkc.is_normalized_utf16(&normalized)); }; let aaa = "aaa"; check_str(aaa); let aaa_utf8 = aaa.as_bytes(); check_utf8(aaa_utf8); let aaa_utf16: Vec = aaa.encode_utf16().collect(); check_utf16(&aaa_utf16); assert!(nfd.is_normalized_up_to(aaa) == aaa.len()); assert!(nfkd.is_normalized_up_to(aaa) == aaa.len()); assert!(nfc.is_normalized_up_to(aaa) == aaa.len()); assert!(nfkc.is_normalized_up_to(aaa) == aaa.len()); assert!(nfd.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); assert!(nfkd.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); assert!(nfc.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); assert!(nfkc.is_normalized_utf8_up_to(aaa_utf8) == aaa_utf8.len()); assert!(nfd.is_normalized_utf16_up_to(&aaa_utf16) == aaa_utf16.len()); assert!(nfkd.is_normalized_utf16_up_to(&aaa_utf16) == aaa_utf16.len()); assert!(nfc.is_normalized_utf16_up_to(&aaa_utf16) == aaa_utf16.len()); assert!(nfkc.is_normalized_utf16_up_to(&aaa_utf16) == aaa_utf16.len()); let note = "a𝅗\u{1D165}a"; check_str(note); let note_utf8 = note.as_bytes(); check_utf8(note_utf8); let note_utf16: Vec = note.encode_utf16().collect(); check_utf16(¬e_utf16); assert!(nfd.is_normalized_up_to(note) == note.len()); assert!(nfkd.is_normalized_up_to(note) == note.len()); assert!(nfc.is_normalized_up_to(note) == note.len()); assert!(nfkc.is_normalized_up_to(note) == note.len()); assert!(nfd.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); assert!(nfkd.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); assert!(nfc.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); assert!(nfkc.is_normalized_utf8_up_to(note_utf8) == note_utf8.len()); assert!(nfd.is_normalized_utf16_up_to(¬e_utf16) == note_utf16.len()); assert!(nfkd.is_normalized_utf16_up_to(¬e_utf16) == note_utf16.len()); assert!(nfc.is_normalized_utf16_up_to(¬e_utf16) == note_utf16.len()); assert!(nfkc.is_normalized_utf16_up_to(¬e_utf16) == note_utf16.len()); let umlaut = "aäa"; check_str(umlaut); let umlaut_utf8 = umlaut.as_bytes(); check_utf8(umlaut_utf8); let umlaut_utf16: Vec = umlaut.encode_utf16().collect(); check_utf16(¨aut_utf16); assert_eq!(nfd.is_normalized_up_to(umlaut), 1); assert_eq!(nfkd.is_normalized_up_to(umlaut), 1); assert_eq!(nfc.is_normalized_up_to(umlaut), 4); assert_eq!(nfkc.is_normalized_up_to(umlaut), 4); assert_eq!(nfd.is_normalized_utf8_up_to(umlaut_utf8), 1); assert_eq!(nfkd.is_normalized_utf8_up_to(umlaut_utf8), 1); assert_eq!(nfc.is_normalized_utf8_up_to(umlaut_utf8), 4); assert_eq!(nfkc.is_normalized_utf8_up_to(umlaut_utf8), 4); assert_eq!(nfd.is_normalized_utf16_up_to(¨aut_utf16), 1); assert_eq!(nfkd.is_normalized_utf16_up_to(¨aut_utf16), 1); assert_eq!(nfc.is_normalized_utf16_up_to(¨aut_utf16), 3); assert_eq!(nfkc.is_normalized_utf16_up_to(¨aut_utf16), 3); let fraction = "a½a"; check_str(fraction); let fraction_utf8 = fraction.as_bytes(); check_utf8(fraction_utf8); let fraction_utf16: Vec = fraction.encode_utf16().collect(); check_utf16(&fraction_utf16); assert_eq!(nfd.is_normalized_up_to(fraction), 4); assert_eq!(nfkd.is_normalized_up_to(fraction), 1); assert_eq!(nfc.is_normalized_up_to(fraction), 4); assert_eq!(nfkc.is_normalized_up_to(fraction), 1); assert_eq!(nfd.is_normalized_utf8_up_to(fraction_utf8), 4); assert_eq!(nfkd.is_normalized_utf8_up_to(fraction_utf8), 1); assert_eq!(nfc.is_normalized_utf8_up_to(fraction_utf8), 4); assert_eq!(nfkc.is_normalized_utf8_up_to(fraction_utf8), 1); assert_eq!(nfd.is_normalized_utf16_up_to(&fraction_utf16), 3); assert_eq!(nfkd.is_normalized_utf16_up_to(&fraction_utf16), 1); assert_eq!(nfc.is_normalized_utf16_up_to(&fraction_utf16), 3); assert_eq!(nfkc.is_normalized_utf16_up_to(&fraction_utf16), 1); let reversed_vietnamese = "e\u{0302}\u{0323}"; check_str(reversed_vietnamese); let reversed_vietnamese_utf8 = reversed_vietnamese.as_bytes(); check_utf8(reversed_vietnamese_utf8); let reversed_vietnamese_utf16: Vec = reversed_vietnamese.encode_utf16().collect(); check_utf16(&reversed_vietnamese_utf16); assert_eq!(nfd.is_normalized_up_to(reversed_vietnamese), 1); assert_eq!(nfkd.is_normalized_up_to(reversed_vietnamese), 1); assert_eq!(nfc.is_normalized_up_to(reversed_vietnamese), 0); assert_eq!(nfkc.is_normalized_up_to(reversed_vietnamese), 0); assert_eq!(nfd.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 1); assert_eq!(nfkd.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 1); assert_eq!(nfc.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 0); assert_eq!(nfkc.is_normalized_utf8_up_to(reversed_vietnamese_utf8), 0); assert_eq!(nfd.is_normalized_utf16_up_to(&reversed_vietnamese_utf16), 1); assert_eq!( nfkd.is_normalized_utf16_up_to(&reversed_vietnamese_utf16), 1 ); assert_eq!(nfc.is_normalized_utf16_up_to(&reversed_vietnamese_utf16), 0); assert_eq!( nfkc.is_normalized_utf16_up_to(&reversed_vietnamese_utf16), 0 ); let truncated_vietnamese = "e\u{0302}"; check_str(truncated_vietnamese); let truncated_vietnamese_utf8 = truncated_vietnamese.as_bytes(); check_utf8(truncated_vietnamese_utf8); let truncated_vietnamese_utf16: Vec = truncated_vietnamese.encode_utf16().collect(); check_utf16(&truncated_vietnamese_utf16); assert_eq!(nfd.is_normalized_up_to(truncated_vietnamese), 3); assert_eq!(nfkd.is_normalized_up_to(truncated_vietnamese), 3); assert_eq!(nfc.is_normalized_up_to(truncated_vietnamese), 0); assert_eq!(nfkc.is_normalized_up_to(truncated_vietnamese), 0); assert_eq!(nfd.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 3); assert_eq!(nfkd.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 3); assert_eq!(nfc.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 0); assert_eq!(nfkc.is_normalized_utf8_up_to(truncated_vietnamese_utf8), 0); assert_eq!( nfd.is_normalized_utf16_up_to(&truncated_vietnamese_utf16), 2 ); assert_eq!( nfkd.is_normalized_utf16_up_to(&truncated_vietnamese_utf16), 2 ); assert_eq!( nfc.is_normalized_utf16_up_to(&truncated_vietnamese_utf16), 0 ); assert_eq!( nfkc.is_normalized_utf16_up_to(&truncated_vietnamese_utf16), 0 ); }