use std::fs; use bencher::{benchmark_group, benchmark_main, Bencher}; #[cfg(unix)] const TEXT_PATH: &str = "benches/data/wikipedia-rust.txt"; #[cfg(windows)] const TEXT_PATH: &str = r"benches\data\wikipedia-rust.txt"; static UTF8_CHAR_WIDTH: [usize; 256] = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x3F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x5F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x7F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x9F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xBF 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xDF 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF ]; fn retrieve_get_width(bencher: &mut Bencher) { let bytes = fs::read(TEXT_PATH).unwrap(); let length = bytes.len(); bencher.iter(|| { let mut widths = Vec::new(); let mut p = 0; loop { let e = bytes[p]; let width = utf8_width::get_width(e); widths.push(width); p += width; if p == length { break; } } widths }); bencher.bytes = length as u64; } fn retrieve_get_width_assume_valid(bencher: &mut Bencher) { let bytes = fs::read(TEXT_PATH).unwrap(); let length = bytes.len(); bencher.iter(|| { let mut widths = Vec::new(); let mut p = 0; let length = bytes.len(); loop { let e = bytes[p]; let width = unsafe { utf8_width::get_width_assume_valid(e) }; widths.push(width); p += width; if p == length { break; } } widths }); bencher.bytes = length as u64; } fn retrieve_get_width_by_looking_table(bencher: &mut Bencher) { let bytes = fs::read(TEXT_PATH).unwrap(); let length = bytes.len(); bencher.iter(|| { let mut widths = Vec::new(); let mut p = 0; let length = bytes.len(); loop { let e = bytes[p]; let width = UTF8_CHAR_WIDTH[e as usize]; widths.push(width); p += width; if p == length { break; } } widths }); bencher.bytes = length as u64; } fn retrieve_get_width_by_chars(bencher: &mut Bencher) { let text = fs::read_to_string(TEXT_PATH).unwrap(); let length = text.len(); bencher.iter(|| { let mut widths = Vec::new(); for c in text.chars() { widths.push(c.len_utf8()) } widths }); bencher.bytes = length as u64; } benchmark_group!( get_width, retrieve_get_width, retrieve_get_width_assume_valid, retrieve_get_width_by_looking_table, retrieve_get_width_by_chars ); benchmark_main!(get_width);