use dfdx::{ nn::ZeroGrads, prelude::*, tensor::{AutoDevice, Gradients}, }; fn main() { let dev = AutoDevice::default(); type Model = (Linear<2, 5>, ReLU, Linear<5, 10>, Tanh, Linear<10, 20>); let model = dev.build_module::(); let x: Tensor, f32, _> = dev.sample_normal(); // first we call .alloc_grads, which both pre-allocates gradients // and also marks non-parameter gradients as temporary. // this allows .backward() to drop temporary gradients. let mut grads: Gradients = model.alloc_grads(); grads = model.forward(x.trace(grads)).mean().backward(); // backward will return the same gradients object that we passed // into trace() grads = model.forward(x.trace(grads)).mean().backward(); // you can do this as many times as you want! grads = model.forward(x.trace(grads)).mean().backward(); // finally, we can use ZeroGrads to zero out the accumulated gradients model.zero_grads(&mut grads); assert_eq!(grads.get(&model.0.weight).array(), [[0.0; 2]; 5]); }