//! This package contains an implementation of //! [BFGS](https://en.wikipedia.org/w/index.php?title=BFGS_method), an algorithm for minimizing //! convex twice-differentiable functions. //! //! BFGS is explained at a high level in //! [the blog post](https://paulkernfeld.com/2018/08/06/rust-needs-bfgs.html) introducing this //! package. //! //! In this example, we minimize a 2d function: //! //! ```rust //! extern crate my_bfgs as bfgs; //! extern crate ndarray; //! //! use ndarray::{Array, Array1}; //! //! fn main() { //! let x0 = Array::from_vec(vec![8.888, 1.234]); // Chosen arbitrarily //! let f = |x: &Array1| x.dot(x); //! let g = |x: &Array1| 2.0 * x; //! let x_min = bfgs::bfgs(x0, f, g); //! assert_eq!(x_min, Ok(Array::from_vec(vec![0.0, 0.0]))); //! } //! ``` //! //! This project uses [cargo-make](https://sagiegurari.github.io/cargo-make/) for builds; to build, //! run `cargo make all`. mod benchmark; use ndarray::{Array1, Array2}; use std::f64::INFINITY; const F64_MACHINE_EPSILON: f64 = 2e-53; // From the L-BFGS paper (Zhu et al. 1994), 1e7 is for "moderate accuracy." 1e12 for "low // accuracy," 10 for "high accuracy." If factr is 0, the algorithm will only stop if the value of f // stops improving completely. const FACTR: f64 = 1e7; // This is FTOL from Zhu et al. const F_TOLERANCE: f64 = FACTR * F64_MACHINE_EPSILON; // Dumbly try many values of epsilon, taking the best one // Return the value of epsilon that minimizes f fn line_search(f: F) -> Result where F: Fn(f64) -> f64, { let mut best_epsilon = 0.0; let mut best_val_f = INFINITY; for i in -20..20 { let epsilon = 2.0_f64.powi(i); let val_f = f(epsilon); if val_f < best_val_f { best_epsilon = epsilon; best_val_f = val_f; } } if best_epsilon == 0.0 { Err(()) } else { Ok(best_epsilon) } } fn new_identity_matrix(len: usize) -> Array2 { let mut result = Array2::zeros((len, len)); for z in result.diag_mut() { *z = 1.0; } result } // If the improvement in f is not too much bigger than the rounding error, then call it a // success. This is the first stopping criterion from Zhu et al. fn stop(f_x_old: f64, f_x: f64) -> bool { let negative_delta_f = f_x_old - f_x; let denom = f_x_old.abs().max(f_x.abs()).max(1.0); negative_delta_f / denom <= F_TOLERANCE } /// Returns a value of `x` that should minimize `f`. `f` must be convex and twice-differentiable. /// /// - `x0` is an initial guess for `x`. Often this is chosen randomly. /// - `f` is the objective function /// - `g` is the gradient of `f` #[allow(clippy::many_single_char_names)] pub fn bfgs(x0: Array1, f: F, g: G) -> Result, ()> where F: Fn(&Array1) -> f64, G: Fn(&Array1) -> Array1, { let mut x = x0; let mut f_x = f(&x); let mut g_x = g(&x); let p = x.len(); assert_eq!(g_x.dim(), x.dim()); // Initialize the inverse approximate Hessian to the identity matrix let mut b_inv = new_identity_matrix(x.len()); loop { // Find the search direction let search_dir = -1.0 * b_inv.dot(&g_x); // Find a good step size let epsilon = line_search(|epsilon| f(&(&search_dir * epsilon + &x))).map_err(|_| ())?; // Save the old values let f_x_old = f_x; let g_x_old = g_x; // Take a step in the search direction x.scaled_add(epsilon, &search_dir); f_x = f(&x); g_x = g(&x); // Compute deltas between old and new let y: Array2 = (&g_x - &g_x_old) .into_shape((p, 1)) .expect("y into_shape failed"); let s: Array2 = (epsilon * search_dir) .into_shape((p, 1)) .expect("s into_shape failed"); let sy: f64 = s.t().dot(&y).into_shape(()).expect("sy into_shape failed")[()]; let ss: Array2 = s.dot(&s.t()); if stop(f_x_old, f_x) { return Ok(x); } // Update the Hessian approximation let to_add: Array2 = ss * (sy + &y.t().dot(&b_inv.dot(&y))) / sy.powi(2); let to_sub: Array2 = (b_inv.dot(&y).dot(&s.t()) + s.dot(&y.t().dot(&b_inv))) / sy; b_inv = b_inv + to_add - to_sub; } } #[cfg(test)] mod tests { use crate::bfgs; use ndarray::{array, Array1}; use spectral::assert_that; use spectral::numeric::OrderedAssertions; fn l2_distance(xs: &Array1, ys: &Array1) -> f64 { xs.iter().zip(ys.iter()).map(|(x, y)| (y - x).powi(2)).sum() } #[test] fn test_x_squared_1d() { let x0 = array![2.0]; let f = |x: &Array1| x.iter().map(|xx| xx * xx).sum(); let g = |x: &Array1| 2.0 * x; let x_min = bfgs(x0, f, g); assert_eq!(x_min, Ok(array![0.0])); } #[test] fn test_begin_at_minimum() { let x0 = array![0.0]; let f = |x: &Array1| x.iter().map(|xx| xx * xx).sum(); let g = |x: &Array1| 2.0 * x; let x_min = bfgs(x0, f, g); assert_eq!(x_min, Ok(array![0.0])); } // An error because this function has a maximum instead of a minimum #[test] fn test_negative_x_squared() { let x0 = array![2.0]; let f = |x: &Array1| x.iter().map(|xx| -xx * xx).sum(); let g = |x: &Array1| -2.0 * x; let x_min = bfgs(x0, f, g); assert_eq!(x_min, Err(())); } #[test] fn test_rosenbrock() { let x0 = array![0.0, 0.0]; let f = |x: &Array1| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2); let g = |x: &Array1| { array![ -400.0 * (x[1] - x[0].powi(2)) * x[0] - 2.0 * (1.0 - x[0]), 200.0 * (x[1] - x[0].powi(2)), ] }; let x_min = bfgs(x0, f, g).expect("Rosenbrock test failed"); assert_that(&l2_distance(&x_min, &array![1.0, 1.0])).is_less_than(&0.01); } }