// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use criterion::{black_box, criterion_group, criterion_main, Criterion}; use icu_collections::codepointtrie::CodePointTrie; #[path = "tries/mod.rs"] mod tries; #[cfg(feature = "bench")] mod sample_str_lng { // "eng" is all ASCII pub const ENG: &str = "Universal Declaration of Human Rights"; // "pcd" has ASCII mixed with low-BMP code points pub const PCD: &str = "Dèclaråcion dès dreûts d' l'ome po tos lès payîs dè monde"; // "ukr" uses low-BMP code points (less than U+1000) pub const UKR: &str = "ЗАГАЛЬНА ДЕКЛАРАЦІЯ ПРАВ ЛЮДИНІ"; // "yue" uses high-BMP code points (greater than U+1000), so it benefits from fast mode pub const YUE: &str = "世界人权宣言"; // "ccp" exercises supplementary code points pub const CCP: &str = "𑄟𑄚𑄬𑄭𑄃𑄇𑄴𑄇𑄥𑄧𑄁𑄢𑄴 𑄝𑄬𑄇𑄴𑄅𑄚𑄮𑄢𑄴 𑄟𑄧𑄚𑄳𑄢𑄧𑄧𑄇𑄉𑄮𑄌𑄴"; } const SAMPLE_STRING_MIXED: &str = "Dèclaråcion ЗАГАЛЬНА 世界人权宣言 𑄟𑄚𑄬𑄭𑄃𑄇𑄴𑄇𑄥𑄧𑄁𑄢𑄴"; /// A function that returns 100 code points in the desired language fn one_hundred_code_points(sample_str: &str) -> String { sample_str.chars().cycle().take(100).collect() } fn get_trie_small() -> CodePointTrie<'static, u8> { CodePointTrie::try_new( tries::gc_small::HEADER, tries::gc_small::INDEX, tries::gc_small::DATA, ) .unwrap() } #[cfg(feature = "bench")] fn get_trie_fast() -> CodePointTrie<'static, u8> { CodePointTrie::try_new( tries::gc_fast::HEADER, tries::gc_fast::INDEX, tries::gc_fast::DATA, ) .unwrap() } fn overview_bench(c: &mut Criterion) { let s = one_hundred_code_points(SAMPLE_STRING_MIXED); let cpt_small = get_trie_small(); c.bench_function("cpt/overview", |b| { b.iter(|| { black_box(&s) .chars() .map(|c| black_box(&cpt_small).get32(c as u32)) .reduce(|a, b| a.wrapping_add(b)) }); }); #[cfg(feature = "bench")] c.bench_function("cpt/get_range", |b| { b.iter(|| { black_box(&s) .chars() .map(|c| black_box(&cpt_small).get_range(c as u32).unwrap()) .fold(0u32, |acc, ele| { acc.wrapping_add(ele.range.end() - ele.range.start() + ele.value as u32) }) }); }); #[cfg(feature = "bench")] { let cpt_fast = get_trie_fast(); lang_bench(c, &cpt_small, "small/eng", sample_str_lng::ENG); lang_bench(c, &cpt_small, "small/pcd", sample_str_lng::PCD); lang_bench(c, &cpt_small, "small/ukr", sample_str_lng::UKR); lang_bench(c, &cpt_small, "small/yue", sample_str_lng::YUE); lang_bench(c, &cpt_small, "small/ccp", sample_str_lng::CCP); lang_bench(c, &cpt_fast, "fast/eng", sample_str_lng::ENG); lang_bench(c, &cpt_fast, "fast/pcd", sample_str_lng::PCD); lang_bench(c, &cpt_fast, "fast/ukr", sample_str_lng::UKR); lang_bench(c, &cpt_fast, "fast/yue", sample_str_lng::YUE); lang_bench(c, &cpt_fast, "fast/ccp", sample_str_lng::CCP); } } #[cfg(feature = "bench")] fn lang_bench(c: &mut Criterion, cpt: &CodePointTrie, lid: &str, sample_str: &str) { let bench_name = format!("cpt/get/{lid}"); let s = one_hundred_code_points(sample_str); c.bench_function(&bench_name, |b| { b.iter(|| { black_box(&s) .chars() .map(|c| black_box(&cpt).get32(c as u32)) .reduce(|a, b| a.wrapping_add(b)) }); }); } criterion_group!(benches, overview_bench,); criterion_main!(benches);