use std::convert::Infallible; use std::path::PathBuf; use criterion::measurement::Measurement; use criterion::{ black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, }; use git_repository::object::tree::diff::{Action, Change}; use git_repository::Id; use imara_diff::intern::InternedInput; use imara_diff::sink::Counter; use imara_diff::Algorithm; fn extract_diff(change: &Change) -> Option<(Vec, Vec)> { use git_repository::object::tree::diff::change::Event::Modification; let (previous_id, id) = match change.event { Modification { previous_entry_mode, previous_id, entry_mode, id, } if previous_entry_mode.is_blob() && entry_mode.is_blob() => (previous_id, id), _ => return None, }; let old = previous_id.object().ok()?.detach().data; let new = id.object().ok()?.detach().data; Some((new, old)) } fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec, Vec, usize)>) { let from_tree = from.object().unwrap().peel_to_tree().unwrap(); let to_tree = to.object().unwrap().peel_to_tree().unwrap(); from_tree .changes() .track_filename() .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> { if let Some((old, new)) = extract_diff(&change) { let input = InternedInput::new(&*old, &*new); let changes = imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total(); let complexity = changes * (old.len() + new.len()); diffs.push((old, new, complexity)); } Ok(Action::Continue) }) .unwrap(); } pub fn project_root() -> PathBuf { let dir = env!("CARGO_MANIFEST_DIR"); let mut res = PathBuf::from(dir); while !res.join("README.md").exists() { res = res .parent() .expect("reached fs root without finding project root") .to_owned() } res } fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) { let path = project_root().join("bench_data").join("repos").join(name); let repo = git_repository::open(path).unwrap(); let tag1 = repo .find_reference(tag1) .unwrap() .into_fully_peeled_id() .unwrap(); let tag2 = repo .find_reference(tag2) .unwrap() .into_fully_peeled_id() .unwrap(); let mut diffs = Vec::new(); git_tree_diff(tag1, tag2, &mut diffs); let mut last_commit = tag2; tag2.object() .unwrap() .into_commit() .ancestors() .all() .unwrap() .take(num_commits as usize) .for_each(|parent| { let parent = parent.unwrap(); git_tree_diff(last_commit, parent, &mut diffs); last_commit = parent; }); diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity); if std::env::var("PLOT").is_ok() { let mut group = c.benchmark_group(format!("{name}_plot")); group.sample_size(15); bench_file_diffs(group, &diffs, 12, true); } else { bench_file_diffs(c.benchmark_group(name), &diffs, 2, false); } } fn bench_file_diffs( mut group: BenchmarkGroup, files: &[(Vec, Vec, usize)], num_chunks: usize, compare_to_similar: bool, ) { let mut run = |name, f: fn(&[u8], &[u8]) -> usize| { let mut i = 0; for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) { let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum(); average_complexity /= chunk.len(); println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len()); i += chunk.len(); group.bench_function( BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())), |b| { b.iter(|| { for (old, new, _) in chunk { // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths // we use that as an x axis to find a meaningful way to plot a black_box(f(old, new)); } }); }, ); } }; run("imara_diff-histogram", |file1, file2| { let input = InternedInput::new(file1, file2); imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total() }); run("imara_diff-myers", |file1, file2| { let input = InternedInput::new(file1, file2); imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total() }); if compare_to_similar { run("similar", |file1, file2| { let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2); diff.len() }); } group.finish(); } fn rust(c: &mut Criterion) { bench_repo(c, "rust", "1.64.0", "1.50.0", 30); } fn vscode(c: &mut Criterion) { bench_repo(c, "vscode", "1.72.2", "1.41.0", 30); } fn linux(c: &mut Criterion) { bench_repo(c, "linux", "v6.0", "v5.7", 30); } fn helix(c: &mut Criterion) { bench_repo(c, "helix", "22.08.1", "v0.5.0", 30); } criterion_group!(realworld_repos, helix, rust, vscode, linux); criterion_main!(realworld_repos);