use core::time::Duration; use criterion::BenchmarkId; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use std::fs; use textdistance::{nstr, str}; fn read_licenses() -> Vec<(String, String)> { let mut licenses: Vec<(String, String)> = Vec::new(); let dir = fs::read_dir("choosealicense.com/_licenses").unwrap(); let mut i = 0; for lfile in dir { let lpath = lfile.unwrap(); let ltext = fs::read_to_string(lpath.path()).unwrap(); let lname = lpath.file_name().to_str().unwrap().to_owned(); // shorten the text to speed up benchmarks run let ltext = ltext[1..200].to_string(); licenses.push((lname, ltext)); // take only a subset of licenses to speed up benchmarks run i += 1; if i == 10 { break; } } licenses } type AlgFn = dyn Fn(&str, &str) -> f64; fn criterion_benchmark(c: &mut Criterion) { benchmark_nstr(c); } fn benchmark_nstr(c: &mut Criterion) { let licenses = read_licenses(); let mut group = c.benchmark_group("nstr"); group.sample_size(10); group.measurement_time(Duration::new(3, 0)); group.warm_up_time(Duration::new(1, 0)); // group.sampling_mode(criterion::SamplingMode::Flat); let algs: Vec<(&str, Box)> = vec![ ("bag", Box::new(nstr::bag)), ("cosine", Box::new(nstr::cosine)), ("damerau_levenshtein", Box::new(nstr::damerau_levenshtein)), ( "damerau_levenshtein_restricted", Box::new(nstr::damerau_levenshtein_restricted), ), ("entropy_ncd", Box::new(nstr::entropy_ncd)), ("hamming", Box::new(nstr::hamming)), ("jaccard", Box::new(nstr::jaccard)), ("jaro_winkler", Box::new(nstr::jaro_winkler)), ("jaro", Box::new(nstr::jaro)), ("lcsseq", Box::new(nstr::lcsseq)), ("lcsstr", Box::new(nstr::lcsstr)), ("length", Box::new(nstr::length)), ("levenshtein", Box::new(nstr::levenshtein)), ("lig3", Box::new(nstr::lig3)), ("mlipns", Box::new(nstr::mlipns)), ("overlap", Box::new(nstr::overlap)), ("prefix", Box::new(nstr::prefix)), ("ratcliff_obershelp", Box::new(nstr::ratcliff_obershelp)), ("roberts", Box::new(nstr::roberts)), ("sift4_common", Box::new(nstr::sift4_common)), ("sift4_simple", Box::new(nstr::sift4_simple)), ("smith_waterman", Box::new(nstr::smith_waterman)), ("sorensen_dice", Box::new(nstr::sorensen_dice)), ("suffix", Box::new(nstr::suffix)), ("tversky", Box::new(nstr::tversky)), ("yujian_bo", Box::new(nstr::yujian_bo)), ]; for (alg_name, alg_fn) in algs { group.bench_with_input( BenchmarkId::from_parameter(alg_name), &licenses, |b, licenses| { b.iter(|| { for (_, l1) in licenses { for (_, l2) in licenses { let s1 = black_box(l1); let s2 = black_box(l2); alg_fn(s1, s2); } } }); }, ); } } criterion_group!(benches, criterion_benchmark); criterion_main!(benches);