use std::time::Instant; use dfdx::prelude::*; #[cfg(feature = "cuda")] type Dev = Cuda; #[cfg(not(feature = "cuda"))] type Dev = Cpu; type Model = BatchNorm2D<512>; type Dtype = f32; type InputShape = Rank4<64, 512, 28, 28>; fn main() { println!("Benchmarking `BatchNorm2D`"); println!("Device {}", std::any::type_name::()); println!("Dtype {}", std::any::type_name::()); println!("Input shape {}", std::any::type_name::()); println!(); let dev: Dev = Default::default(); let mut m = dev.build_module::(); let mut grads = m.alloc_grads(); loop { let img: Tensor = dev.sample_normal(); let start = Instant::now(); let _ = m.forward(img.clone()); dev.synchronize(); let infer_dur = start.elapsed(); let start = Instant::now(); let out = m.forward_mut(img.traced(grads)); let loss = out.square().mean(); dev.synchronize(); let fwd_dur = start.elapsed(); let start = Instant::now(); grads = loss.backward(); dev.synchronize(); let bwd_dur = start.elapsed(); println!("infer={infer_dur:?}, fwd={fwd_dur:?} bwd={bwd_dur:?}"); } }