// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use quickcheck::quickcheck; use unicode_segmentation::UnicodeSegmentation; #[rustfmt::skip] mod testdata; #[test] fn test_graphemes() { use crate::testdata::{TEST_DIFF, TEST_SAME}; pub const EXTRA_DIFF: &[(&str, &[&str], &[&str])] = &[ // Official test suite doesn't include two Prepend chars between two other chars. ( "\u{20}\u{600}\u{600}\u{20}", &["\u{20}", "\u{600}\u{600}\u{20}"], &["\u{20}", "\u{600}", "\u{600}", "\u{20}"], ), // Test for Prepend followed by two Any chars ( "\u{600}\u{20}\u{20}", &["\u{600}\u{20}", "\u{20}"], &["\u{600}", "\u{20}", "\u{20}"], ), ]; pub const EXTRA_SAME: &[(&str, &[&str])] = &[ // family emoji (more than two emoji joined by ZWJ) ( "\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}", &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"], ), // cartwheel emoji followed by two fitzpatrick skin tone modifiers // (test case from issue #19) ( "\u{1F938}\u{1F3FE}\u{1F3FE}", &["\u{1F938}\u{1F3FE}\u{1F3FE}"], ), ]; for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) { // test forward iterator let our_extended: Vec<_> = UnicodeSegmentation::graphemes(s, true).collect(); let our_legacy: Vec<_> = UnicodeSegmentation::graphemes(s, false).collect(); assert_eq!(our_extended, g, "{s:?} extended"); assert_eq!(our_legacy, g, "{s:?} legacy"); // test reverse iterator assert!(UnicodeSegmentation::graphemes(s, true) .rev() .eq(g.iter().rev().cloned())); assert!(UnicodeSegmentation::graphemes(s, false) .rev() .eq(g.iter().rev().cloned())); } for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) { // test forward iterator assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned())); assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned())); // test reverse iterator assert!(UnicodeSegmentation::graphemes(s, true) .rev() .eq(gt.iter().rev().cloned())); assert!(UnicodeSegmentation::graphemes(s, false) .rev() .eq(gf.iter().rev().cloned())); } // test the indices iterators let s = "a̐éö̲\r\n"; let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::>(); let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; assert_eq!(gr_inds, b); let gr_inds = UnicodeSegmentation::grapheme_indices(s, true) .rev() .collect::>(); let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")]; assert_eq!(gr_inds, b); let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true); { let gr_inds = gr_inds_iter.by_ref(); let e1 = gr_inds.size_hint(); assert_eq!(e1, (1, Some(13))); let c = gr_inds.count(); assert_eq!(c, 4); } let e2 = gr_inds_iter.size_hint(); assert_eq!(e2, (0, Some(0))); // make sure the reverse iterator does the right thing with "\n" at beginning of string let s = "\n\r\n\r"; let gr = UnicodeSegmentation::graphemes(s, true) .rev() .collect::>(); let b: &[_] = &["\r", "\r\n", "\n"]; assert_eq!(gr, b); } #[test] fn test_words() { use crate::testdata::TEST_WORD; // Unicode's official tests don't really test longer chains of flag emoji // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ const EXTRA_TESTS: &[(&str, &[&str])] = &[ ( "🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦🇴", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦🇴"], ), ("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]), ( "🇦a🇫🇦🇽a🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦", "a", "🇫🇦", "🇽", "a", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"], ), ( "\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}", &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"], ), ("😌👎🏼", &["😌", "👎🏼"]), // perhaps wrong, spaces should not be included? ("hello world", &["hello", " ", "world"]), ("🇨🇦🇨🇭🇿🇲🇿 hi", &["🇨🇦", "🇨🇭", "🇿🇲", "🇿", " ", "hi"]), ]; for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) { macro_rules! assert_ { ($test:expr, $exp:expr, $name:expr) => { // collect into vector for better diagnostics in failure case let testing = $test.collect::>(); let expected = $exp.collect::>(); assert_eq!( testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w ) }; } // test forward iterator assert_!( s.split_word_bounds(), w.iter().cloned(), "Forward word boundaries" ); // test reverse iterator assert_!( s.split_word_bounds().rev(), w.iter().rev().cloned(), "Reverse word boundaries" ); // generate offsets from word string lengths let mut indices = vec![0]; for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) { indices.push(i); } indices.pop(); let indices = indices; // test forward indices iterator assert_!( s.split_word_bound_indices().map(|(l, _)| l), indices.iter().cloned(), "Forward word indices" ); // test backward indices iterator assert_!( s.split_word_bound_indices().rev().map(|(l, _)| l), indices.iter().rev().cloned(), "Reverse word indices" ); } } #[test] fn test_sentences() { use crate::testdata::TEST_SENTENCE; for &(s, w) in TEST_SENTENCE.iter() { macro_rules! assert_ { ($test:expr, $exp:expr, $name:expr) => { // collect into vector for better diagnostics in failure case let testing = $test.collect::>(); let expected = $exp.collect::>(); assert_eq!( testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w ) }; } assert_!( s.split_sentence_bounds(), w.iter().cloned(), "Forward sentence boundaries" ); } } quickcheck! { fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool { let a = s.graphemes(true).collect::>(); let mut b = s.graphemes(true).rev().collect::>(); b.reverse(); a == b } fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool { let a = s.graphemes(false).collect::>(); let mut b = s.graphemes(false).rev().collect::>(); b.reverse(); a == b } fn quickcheck_join_graphemes(s: String) -> bool { let a = s.graphemes(true).collect::(); let b = s.graphemes(false).collect::(); a == s && b == s } fn quickcheck_forward_reverse_words(s: String) -> bool { let a = s.split_word_bounds().collect::>(); let mut b = s.split_word_bounds().rev().collect::>(); b.reverse(); a == b } fn quickcheck_join_words(s: String) -> bool { let a = s.split_word_bounds().collect::(); a == s } }