use anyhow::Result; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use usls::{models::YOLO, DataLoader, Options, Vision, YOLOTask, YOLOVersion}; enum Stage { Pre, Run, Post, Pipeline, } fn yolo_stage_bench( model: &mut YOLO, x: &[image::DynamicImage], stage: Stage, n: u64, ) -> std::time::Duration { let mut t_pre = std::time::Duration::new(0, 0); let mut t_run = std::time::Duration::new(0, 0); let mut t_post = std::time::Duration::new(0, 0); let mut t_pipeline = std::time::Duration::new(0, 0); for _ in 0..n { let t0 = std::time::Instant::now(); let xs = model.preprocess(x).unwrap(); t_pre += t0.elapsed(); let t = std::time::Instant::now(); let xs = model.inference(xs).unwrap(); t_run += t.elapsed(); let t = std::time::Instant::now(); let _ys = black_box(model.postprocess(xs, x).unwrap()); t_post += t.elapsed(); t_pipeline += t0.elapsed(); } match stage { Stage::Pre => t_pre, Stage::Run => t_run, Stage::Post => t_post, Stage::Pipeline => t_pipeline, } } pub fn benchmark_cuda(c: &mut Criterion, h: isize, w: isize) -> Result<()> { let mut group = c.benchmark_group(format!("YOLO ({}-{})", w, h)); group .significance_level(0.05) .sample_size(80) .measurement_time(std::time::Duration::new(20, 0)); let options = Options::default() .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb .with_model("yolo/v8-m-dyn.onnx")? .with_cuda(0) // .with_cpu() .with_dry_run(0) .with_ixx(0, 2, (320, h, 1280).into()) .with_ixx(0, 3, (320, w, 1280).into()) .with_confs(&[0.2, 0.15]); let mut model = YOLO::new(options)?; let xs = [DataLoader::try_read("./assets/bus.jpg")?]; group.bench_function("pre-process", |b| { b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Pre, n)) }); group.bench_function("run", |b| { b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Run, n)) }); group.bench_function("post-process", |b| { b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Post, n)) }); group.bench_function("pipeline", |b| { b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Pipeline, n)) }); group.finish(); Ok(()) } pub fn criterion_benchmark(c: &mut Criterion) { // benchmark_cuda(c, 416, 416).unwrap(); benchmark_cuda(c, 640, 640).unwrap(); benchmark_cuda(c, 448, 768).unwrap(); // benchmark_cuda(c, 800, 800).unwrap(); } criterion_group!(benches, criterion_benchmark); criterion_main!(benches);