use std::time::Instant; use dfdx::prelude::*; #[cfg(feature = "cuda")] type Dev = Cuda; #[cfg(not(feature = "cuda"))] type Dev = Cpu; type Dtype = f32; type InputShape = Rank4<32, 64, 128, 256>; type Ax = Axis<3>; fn main() { println!("Benchmarking `softmax` {}", std::any::type_name::()); println!("Device {}", std::any::type_name::()); println!("Dtype {}", std::any::type_name::()); println!("Input shape {}", std::any::type_name::()); println!(); let dev: Dev = Default::default(); loop { let img: Tensor = dev.sample_normal(); let start = Instant::now(); let _ = img.softmax::(); dev.synchronize(); let infer_dur = start.elapsed(); let img: Tensor = dev.sample_normal(); let start = Instant::now(); let y = img.leaky_traced().softmax::(); dev.synchronize(); let fwd_dur = start.elapsed(); let start = Instant::now(); let _ = y.sum().backward(); dev.synchronize(); let bwd_dur = start.elapsed(); println!("infer={infer_dur:?}, fwd={fwd_dur:?} bwd={bwd_dur:?}"); } }