//! Compares the performance of `UnicodeSegmentation::graphemes` with stdlib's UTF-8 scalar-based //! `std::str::chars`. //! //! It is expected that `std::str::chars` is faster than `UnicodeSegmentation::graphemes` since it //! does not consider the complexity of grapheme clusters. The question in this benchmark //! is how much slower full unicode handling is. use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; use std::fs; use unicode_segmentation::UnicodeSegmentation; const FILES: &[&str] = &[ "arabic", "english", "hindi", "japanese", "korean", "mandarin", "russian", "source_code", ]; #[inline(always)] fn grapheme(text: &str) { for c in UnicodeSegmentation::graphemes(black_box(text), true) { black_box(c); } } #[inline(always)] fn scalar(text: &str) { for c in black_box(text).chars() { black_box(c); } } fn bench_all(c: &mut Criterion) { let mut group = c.benchmark_group("chars"); for file in FILES { group.bench_with_input( BenchmarkId::new("grapheme", file), &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(), |b, content| b.iter(|| grapheme(content)), ); } for file in FILES { group.bench_with_input( BenchmarkId::new("scalar", file), &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(), |b, content| b.iter(|| scalar(content)), ); } } criterion_group!(benches, bench_all); criterion_main!(benches);