use std::{ collections::{BTreeSet, VecDeque}, time::Duration, }; use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode, Throughput, }; mod meshes; mod primitives; use forsyth::Config; use meshes::*; use primitives::*; #[repr(C)] #[derive(Clone, Copy)] pub(crate) struct BenchVertex { pub nx: u16, pub ny: u16, pub nz: u16, pub u: u16, pub v: u16, pub pad: u16, } impl BenchVertex { pub fn new(seed: u16) -> Self { Self { nx: seed.wrapping_shl(1), ny: seed.wrapping_shl(3), nz: seed.wrapping_shl(5), u: seed.wrapping_shl(7), v: seed.wrapping_shl(9), pad: seed.wrapping_shl(11), } } } fn perform_work_on_mesh(vertices: &[BenchVertex], indices: &[u32]) -> Result { let mut accum: u32 = 0; for index in indices { if let Some(vertex) = vertices.get(*index as usize) { accum = accum.wrapping_add(vertex.nx as u32); accum = accum.wrapping_add(vertex.ny as u32); accum = accum.wrapping_add(vertex.nz as u32); accum = accum.wrapping_add(vertex.u as u32); accum = accum.wrapping_add(vertex.v as u32); } else { return Err(format!( "failed to access vertex: index {} out of bounds", index )); } } Ok(accum) } fn encode_indices_to_deltas(indices: &[u32]) -> Result, String> { let mut deltas = Vec::with_capacity(indices.len()); let mut last = 0; for index in indices { let index = *index as i32; deltas.push(index - last); last = index; } Ok(deltas) } fn compress_deltas_lz4(indices: &[i32]) -> Vec { let index_bytes = { let mut index_bytes = Vec::with_capacity(indices.len() * 4); for index in indices { index_bytes.extend(&(*index).to_le_bytes()); } index_bytes }; let mut compressed = Vec::with_capacity(128 + (indices.len() * 4)); let _ = lz_fear::CompressionSettings::default() .compress(&mut index_bytes.as_slice(), &mut compressed); compressed } fn compress_indices_lz4(indices: &[u32]) -> Vec { let index_bytes = { let mut index_bytes = Vec::with_capacity(indices.len() * 4); for index in indices { index_bytes.extend(&(*index).to_le_bytes()); } index_bytes }; let mut compressed = Vec::with_capacity(128 + (indices.len() * 4)); let _ = lz_fear::CompressionSettings::default() .compress(&mut index_bytes.as_slice(), &mut compressed); compressed } fn bench_fan(c: &mut Criterion) { let mut group = c.benchmark_group("fans"); group.warm_up_time(Duration::from_secs(1)); group.sample_size(10); let bench_sizes = [400usize, 600, 800]; let bench_inputs: Vec> = bench_sizes.iter().map(|s| make_fan(*s)).collect(); for input in &bench_inputs { is_permutated_mesh( input.as_slice(), forsyth::order_triangles(input.as_slice()) .unwrap() .as_slice(), ) .unwrap(); group.throughput(Throughput::Elements(input.len() as u64)); group.bench_with_input( BenchmarkId::from_parameter(input.len() / 3), input, |b, input| { b.iter(|| forsyth::order_triangles(input).unwrap().len()); }, ); } group.finish(); } fn bench_multi_fans(c: &mut Criterion) { let mut group = c.benchmark_group("multi_fans"); group.warm_up_time(Duration::from_secs(1)); group.sample_size(10); let bench_sizes = [10_usize, 100, 600, 1000]; let bench_inputs: Vec> = bench_sizes.iter().map(|s| make_multi_fan(8, *s)).collect(); for input in &bench_inputs { is_permutated_mesh( input.as_slice(), forsyth::order_triangles(input.as_slice()) .unwrap() .as_slice(), ) .unwrap(); group.throughput(Throughput::Elements(input.len() as u64)); group.bench_with_input( BenchmarkId::from_parameter(input.len() / 3), input, |b, input| { b.iter(|| forsyth::order_triangles(input).unwrap().len()); }, ); } group.finish(); } fn bench_quad_patch(c: &mut Criterion) { let mut group = c.benchmark_group("quad_patch"); group.warm_up_time(Duration::from_secs(1)); group.sample_size(10); let bench_sizes = [4_usize, 8, 32, 52, 64]; let bench_inputs: Vec> = bench_sizes .iter() .map(|s| make_quad_patch(*s, *s)) .collect(); for input in &bench_inputs { is_permutated_mesh( input.as_slice(), forsyth::order_triangles(input.as_slice()) .unwrap() .as_slice(), ) .unwrap(); group.throughput(Throughput::Elements(input.len() as u64)); group.bench_with_input( BenchmarkId::from_parameter(input.len() / 3), input, |b, input| { b.iter(|| forsyth::order_triangles(input).unwrap().len()); }, ); } group.finish(); } fn feed_cache(indices: &[u32], cache_size: usize) -> usize { let mut cache = VecDeque::with_capacity(cache_size); let mut num_misses = 0; for &idx in indices { if !cache.contains(&idx) { num_misses += 1; cache.push_front(idx); if cache.len() > cache_size { cache.pop_back(); } } } num_misses } fn measure_cache(indices: &[u32], cache_size: usize) -> (f64, f64) { let num_misses = feed_cache(indices, cache_size); let num_tris = indices.len() / 3; let num_verts = { let mut verts = BTreeSet::new(); for i in &indices[..] { verts.insert(*i); } verts.len() }; ( num_misses as f64 / num_tris as f64, num_misses as f64 / num_verts as f64, ) } fn bench_caches(c: &mut Criterion) { let mut group = c.benchmark_group("caches"); group.warm_up_time(Duration::from_secs(1)); group.sample_size(10); group.sampling_mode(SamplingMode::Flat); let bench_sources = [ // ("teapot", "./benches/data/teapot.obj"), // ("cornellbox", "./benches/data/CornellBox-Original.obj"), // ("lpshead", "./benches/data/head.OBJ"), // ("moriknob", "./benches/data/testObj.obj"), ("sphere1k", "./benches/data/sphere-cylcoords-1k.obj"), ("sphere4k", "./benches/data/sphere-cylcoords-4k.obj"), ("sphere16k", "./benches/data/sphere-cylcoords-16k.obj"), ]; let bench_inputs: Vec<(String, Vec)> = { let mut bench_inputs: Vec<(String, Vec)> = bench_sources .iter() .map(|src| { let indices = generate_mesh_indices(src.1); is_permutated_mesh( &indices, forsyth::order_triangles(&indices).unwrap().as_slice(), ) .unwrap(); (src.0.to_owned(), indices) }) .collect(); bench_inputs.sort_by(|a, b| { if a.1.len() <= b.1.len() { std::cmp::Ordering::Less } else { std::cmp::Ordering::Greater } }); bench_inputs }; for cache_size in &[16_u16, 32, 64] { for input in &bench_inputs { let mesh_name = &input.0; let mesh_indices = &input.1; group.throughput(Throughput::Elements(mesh_indices.len() as u64)); group.bench_with_input( BenchmarkId::new(format!("{}", cache_size), mesh_name), mesh_indices, |b, mesh_indices| { b.iter(|| { let mut mesh_indices = mesh_indices.clone(); forsyth::order_triangles_inplace( Config::default(), mesh_indices.as_mut_slice(), black_box(*cache_size), ) .unwrap() .len() }); }, ); } } group.finish(); } fn bench_meshes(c: &mut Criterion) { let bench_sources = [ ("teapot", generate_mesh_indices("./benches/data/teapot.obj")), ( "cornellbox", generate_mesh_indices("./benches/data/CornellBox-Original.obj"), ), ("lpshead", generate_mesh_indices("./benches/data/head.OBJ")), ("bunny", generate_mesh_indices("./benches/data/bunny.obj")), /*( "breakfast", generate_mesh_indices("./benches/data/breakfast_room.obj"), ),*/ ( "mitsuba", generate_mesh_indices("./benches/data/mitsuba.obj"), ), ("sponza", generate_mesh_indices("./benches/data/sponza.obj")), ( "moriknob", generate_mesh_indices("./benches/data/testObj.obj"), ), //("bmw", generate_mesh_indices("./benches/data/bmw.obj")), ( "sibenik", generate_mesh_indices("./benches/data/sibenik.obj"), ), ( "sphere1k", generate_mesh_indices("./benches/data/sphere-cylcoords-1k.obj"), ), ( "sphere4k", generate_mesh_indices("./benches/data/sphere-cylcoords-4k.obj"), ), ( "sphere16k", generate_mesh_indices("./benches/data/sphere-cylcoords-16k.obj"), ), ("multi_fans_64x64", make_multi_fan(64, 64)), // ("multi_fans_8x64", make_multi_fan(8, 64)), // ("multi_fans_8x128", make_multi_fan(8, 128)), //("multi_fans_8x256", make_multi_fan(8, 256)), ("multi_fans_128x32", make_multi_fan(128, 32)), ("multi_fans_32x128", make_multi_fan(32, 128)), // ("multi_fans_8x1024", make_multi_fan(8, 1024)), // ("multi_fans_8x2048", make_multi_fan(8, 2048)), // ("multi_fans_8x4096", make_multi_fan(8, 4096)), ("quad_patch_64x64", make_quad_patch(64, 64)), ("quad_patch_32x32", make_quad_patch(32, 32)), ("quad_patch_16x32", make_quad_patch(16, 32)), ("quad_patch_16x16", make_quad_patch(16, 16)), ("quad_strip_1k", make_quad_patch(1, 1024)), //("quad_strip_8k", make_quad_patch(1, 8192)), ("quad_strip_32k", make_quad_patch(1, 32 * 1024)), // ("quad_strip_64k", make_quad_patch(1, 64 * 1024)), ("quad_strip_128k", make_quad_patch(1, 128 * 1024)), ]; let bench_inputs: Vec<(String, Vec)> = { let mut bench_inputs: Vec<(String, Vec)> = bench_sources .iter() .map(|src| { let indices = src.1.clone(); let ordered = forsyth::order_triangles(&indices).unwrap(); is_permutated_mesh(&indices, ordered.as_slice()).unwrap(); for fifo_size in &[8_usize, 16, 32, 64, 1024] { let (initial_acmr, initial_atvr) = measure_cache(&indices, *fifo_size); let (ordered_acmr, ordered_atvr) = measure_cache(ordered.as_slice(), *fifo_size); println!( " FIFO - INITIAL ACMR {}: {} -> {}", &src.0, fifo_size, initial_acmr ); println!( " - ORDERED ACMR {}: {} -> {}", &src.0, fifo_size, ordered_acmr ); println!( " - INITIAL ATVR {}: {} -> {}", &src.0, fifo_size, initial_atvr ); println!( " - ORDERED ATVR {}: {} -> {}", &src.0, fifo_size, ordered_atvr ); } (src.0.to_owned(), indices) }) .collect(); bench_inputs.sort_by(|a, b| { if a.1.len() <= b.1.len() { std::cmp::Ordering::Less } else { std::cmp::Ordering::Greater } }); bench_inputs }; { let mut group = c.benchmark_group("work_with_mesh"); group.warm_up_time(Duration::from_secs(2)); for input in &bench_inputs { let mesh_name = &input.0; let mesh_indices = input.1.as_slice(); let mesh_vertices = synthesize_vertex_buffer(mesh_indices, |seed| { let seed = (seed & 0xFFFF) as u16; BenchVertex::new(seed) }); let mesh_indices_ordered = forsyth::order_triangles(mesh_indices).unwrap(); let fully_ordered_mesh = forsyth::order_vertices(&mesh_vertices, &mesh_indices_ordered).unwrap(); println!("Compressing {}:", mesh_name); for elem in &[ ("mesh", 4 * mesh_indices.len()), ("mesh_lz4", compress_indices_lz4(mesh_indices).len()), ( "mesh_ordT_lz4", compress_indices_lz4(&mesh_indices_ordered).len(), ), ( "mesh_ordT_ordV_lz4", compress_indices_lz4(&fully_ordered_mesh.1).len(), ), ( "mesh_delta_lz4", compress_deltas_lz4(&encode_indices_to_deltas(mesh_indices).unwrap()).len(), ), ( "mesh_ordT_delta_lz4", compress_deltas_lz4(&encode_indices_to_deltas(&mesh_indices_ordered).unwrap()) .len(), ), ( "mesh_ordT_ordV_delta_lz4", compress_deltas_lz4( &encode_indices_to_deltas(fully_ordered_mesh.1.as_slice()).unwrap(), ) .len(), ), ] { println!("{:>40}:{:>20}", elem.0, elem.1); } // measure time to perform work per vertex with different orderings group.throughput(Throughput::Elements(mesh_indices.len() as u64)); group.bench_with_input( BenchmarkId::new("mesh", mesh_name), &(&mesh_vertices, mesh_indices), |b, (mesh_vertices, mesh_indices)| { b.iter(|| { perform_work_on_mesh(mesh_vertices.as_slice(), mesh_indices).unwrap() }); }, ); group.bench_with_input( BenchmarkId::new("mesh_ordT", mesh_name), &(&mesh_vertices, mesh_indices_ordered), |b, (mesh_vertices, mesh_indices)| { b.iter(|| { perform_work_on_mesh(mesh_vertices.as_slice(), mesh_indices).unwrap() }); }, ); group.bench_with_input( BenchmarkId::new("mesh_ordT_ordV", mesh_name), &(fully_ordered_mesh.0, fully_ordered_mesh.1), |b, (mesh_vertices, mesh_indices)| { b.iter(|| { perform_work_on_mesh(mesh_vertices.as_slice(), mesh_indices.as_slice()) .unwrap() }); }, ); } group.finish(); } { let mut group_order_vertices = c.benchmark_group("order_vertices"); group_order_vertices.warm_up_time(Duration::from_secs(1)); group_order_vertices.sample_size(10); group_order_vertices.sampling_mode(SamplingMode::Flat); for input in &bench_inputs { let mesh_name = &input.0; let mesh_indices = input.1.as_slice(); let mesh_vertices = synthesize_vertex_buffer(mesh_indices, |seed| { let seed = (seed & 0xFFFF) as u16; BenchVertex::new(seed) }); // measure time to order vertices group_order_vertices.throughput(Throughput::Elements(mesh_vertices.len() as u64)); group_order_vertices.bench_with_input( BenchmarkId::from_parameter(mesh_name), &(mesh_vertices, mesh_indices), |b, (mesh_vertices, mesh_indices)| { b.iter(|| { forsyth::order_vertices(mesh_vertices, mesh_indices) .unwrap() .0 .len() }); }, ); } group_order_vertices.finish(); } { let mut group_order_triangles = c.benchmark_group("order_triangles"); group_order_triangles.warm_up_time(Duration::from_secs(1)); group_order_triangles.sample_size(10); group_order_triangles.sampling_mode(SamplingMode::Flat); for input in &bench_inputs { let mesh_name = &input.0; let mesh_indices = input.1.as_slice(); // measure the actual order_triangles time group_order_triangles.throughput(Throughput::Elements(mesh_indices.len() as u64)); group_order_triangles.bench_with_input( BenchmarkId::from_parameter(mesh_name), mesh_indices, |b, mesh_indices| { b.iter(|| forsyth::order_triangles(mesh_indices).unwrap().len()); }, ); } group_order_triangles.finish(); } } criterion_group!( benches, bench_meshes, bench_caches, bench_multi_fans, bench_fan, bench_quad_patch ); criterion_main!(benches);