extern crate rand; extern crate rand_chacha; extern crate textplots; extern crate easy_ml; #[cfg(test)] mod tests { use rand::distributions::Standard; use rand::{Rng, SeedableRng}; use textplots::{Chart, Plot, Shape}; use easy_ml::distributions::{Gaussian, MultivariateGaussian}; use easy_ml::matrices::slices::{Slice, Slice2D}; use easy_ml::matrices::Matrix; // 3 steps for bayesian regression // 0: have data to model // 1: create a model from a prior distribution // 2: observe data // 3: update beliefs const LINES_TO_DRAW: usize = 5; const SAMPLES_FOR_DISTRIBUTION: usize = 500; /** * The function of x without any noise */ fn target(x: f64) -> f64 { (x * 1.5) + 3.0 } fn generate_data( random_generator: &mut R, variance: f64, training_size: usize, ) -> ( Matrix, Matrix, Matrix, Matrix, Matrix, Matrix, ) { // create data range from -1 to 1 let x = Matrix::column((0..100).map(|x| ((x as f64 * 0.01) * 2.0) - 1.0).collect()); // create y values without any noise let y = x.map(|x| target(x)); // create some random observations in the -1 to 1 range to train on let observations = Matrix::column( n_random_numbers(random_generator, training_size) .iter() .map(|x| (x * 2.0) - 1.0) .collect(), ); // create noisy target values to train on from these observations let normal_distribution = Gaussian::new(0.0, variance); let mut random_numbers = n_random_numbers(random_generator, training_size); let samples = normal_distribution .draw(&mut random_numbers.drain(..), training_size) .unwrap(); let targets = observations.map_with_index(|x, row, _| target(x) + samples[row]); // create a design matrix of [1, x] for each x in observations let design_matrix = { let mut design_matrix = observations.clone(); design_matrix.insert_column(0, 1.0); design_matrix }; let test_design_matrix = { let mut test_design_matrix = x.clone(); test_design_matrix.insert_column(0, 1.0); test_design_matrix }; ( x, y, test_design_matrix, design_matrix, targets, observations, ) } fn n_random_numbers(random_generator: &mut R, n: usize) -> Vec { random_generator.sample_iter(Standard).take(n).collect() } fn merge_for_plotting(x: &Matrix, fx: &Matrix) -> Vec<(f32, f32)> { x.column_iter(0) .zip(fx.column_iter(0)) .map(|(x, y)| (x as f32, y as f32)) .collect() } fn sort_and_merge_for_plotting(x: &Matrix, fx: &Matrix) -> Vec<(f32, f32)> { let mut list: Vec<(f32, f32)> = x .column_iter(0) .zip(fx.column_iter(0)) .map(|(x, y)| (x as f32, y as f32)) .collect(); list.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); list } fn split_for_plotting(pairs: &Matrix) -> Vec<(f32, f32)> { pairs .column_iter(0) .zip(pairs.column_iter(1)) .map(|(x, y)| (x as f32, y as f32)) .collect() } // model is w0 + w1*x = y = f(x, w) where w will be a 2x1 column vector of w0 and w1 // hence we need basis functions that transform each row of data [x] into [1, x] // so when we take the dot product of x and w we compute w0*1 + w1*x = y #[test] fn test_bayesian_regression() { // use a fixed seed random generator from the rand crate let mut random_generator = rand_chacha::ChaCha8Rng::seed_from_u64(10); // Start by defining the precision (β) (the inverse of variance (σ^2)) of the gaussian // noise in the data, in a real world scenario we cannot know this a priori but could still // approximate it from the data available. // In this example we define β and then compute the noisy line according to // the value we choose for simplicity. let noise_precision = 25.0; let noise_variance = 1.0 / noise_precision; let (x, y, test_design_matrix, design_matrix, targets, observations) = generate_data(&mut random_generator, noise_variance, 20); // plot x and y to see the true line and the approximate line from // the noisy data println!("True x and f(x) and noisy version"); Chart::new(180, 60, -1.0, 1.0) .lineplot(&Shape::Lines(&merge_for_plotting(&x, &y))) .lineplot(&Shape::Points(&sort_and_merge_for_plotting( &observations, &targets, ))) .display(); // Start with a prior distribution which we will update as we see new data. // We use 0 mean and the scaled identity matrix as the covariance prior which regularises // the bayesian regression towards low values for w0 and w1. To get this regularisation // in standard linear regression we would have to add a regularisation parameter, but // because bayesian regression starts with a prior we can regularise with the prior. // If we thought the line had an intercept around 100 rather than 3 then we could start // the prior there to help with the learning. // Note also that the prior and posterior distributions are over the parameters // of our line w0 + w1*x. They are not distributions over the values of x or f(x). // Both the prior and posterior distributions are generative models that we can draw // samples for the weights w0 and w1 to then estimate f(x) from x. let prior_precision = 1.0; let prior_variance = 1.0 / prior_precision; let prior = MultivariateGaussian::new( Matrix::column(vec![0.0, 0.0]), Matrix::diagonal(prior_variance, (2, 2)), ); // Draw some lines from the prior before seeing data to see what our // prior belief looks like let mut random_numbers = n_random_numbers(&mut random_generator, LINES_TO_DRAW * 2); let weights = prior .draw(&mut random_numbers.drain(..), LINES_TO_DRAW) .unwrap(); let predicted_targets = &test_design_matrix * weights.transpose(); // plot the x and predicted to see the lines drawn from the posterior // over the whole data range println!("True x and f(x) and 5 lines of the parameters drawn from the prior"); { let mut chart = Chart::new(180, 60, -1.0, 1.0); let mut chart = &mut chart; let lines = merge_for_plotting(&x, &y); let lines = Shape::Lines(&lines); chart = chart.lineplot(&lines); let lines: Vec<_> = (0..LINES_TO_DRAW) .map(|i| { // slice into each column of the predicted_targets matrix to // get the predictions for each set of paramters drawn from the posterior merge_for_plotting( &x, &Matrix::column(predicted_targets.column_iter(i).collect()), ) }) .collect(); let lines: Vec<_> = lines.iter().map(|lines| Shape::Lines(lines)).collect(); for l in lines.iter() { chart = chart.lineplot(l); } chart.display(); } // draw more weights to plot the distribution of weights in the prior println!("Weights distribution of prior (w1 and w0)"); let mut random_numbers = n_random_numbers(&mut random_generator, SAMPLES_FOR_DISTRIBUTION * 2); let weights = prior .draw(&mut random_numbers.drain(..), SAMPLES_FOR_DISTRIBUTION) .unwrap(); Chart::new(80, 80, -3.0, 3.0) .lineplot(&Shape::Points(&split_for_plotting(&weights))) .display(); for training_size in vec![1, 2, 5, 20] { println!("Training size: {}", training_size); // use increasing amounts of training samples to see the effect // on the posterior as more evidence is seen let design_matrix_n = design_matrix.retain( Slice2D::new() .rows(Slice::Range(0..training_size)) .columns(Slice::All()), ); let targets_n = targets.retain( Slice2D::new() .rows(Slice::Range(0..training_size)) .columns(Slice::All()), ); let observations_n = observations.retain( Slice2D::new() .rows(Slice::Range(0..training_size)) .columns(Slice::All()), ); println!("Observations for N={}", training_size); Chart::new(180, 60, -1.0, 1.0) .lineplot(&Shape::Points(&sort_and_merge_for_plotting( &observations_n, &targets_n, ))) .display(); // General case for multivariate regression is // Prior is N(u_prior, C_prior) // (C_n)^-1 is (X^T * C_error^-1 * X) + (C_prior)^-1 // u_n is C_n * ((X^T * C_error * targets_n) + (((C_prior)^-1) * u_prior)) let new_precision = Matrix::diagonal(prior_precision, (2, 2)) + (design_matrix_n.transpose() * &design_matrix_n).map(|x| x * noise_precision); let new_covariance = new_precision.inverse().unwrap(); let new_mean = new_covariance.map(|x| x * noise_precision) * design_matrix_n.transpose() * &targets_n; let posterior = MultivariateGaussian::new(new_mean, new_covariance); // then draw a few samples from the new Gaussian having seen N data // and use these w0 and w1 parameters drawn to create a few lines // draw MxN random numbers because N is even let mut random_numbers = n_random_numbers(&mut random_generator, LINES_TO_DRAW * 2); let weights = posterior .draw(&mut random_numbers.drain(..), LINES_TO_DRAW) .unwrap(); let predicted_targets = &test_design_matrix * weights.transpose(); // plot the x and predicted to see the lines drawn from the posterior // over the whole data range println!( "True x and f(x) and 5 lines of the parameters drawn from the posterior of N={}", training_size ); { let mut chart = Chart::new(180, 60, -1.0, 1.0); let mut chart = &mut chart; let lines = merge_for_plotting(&x, &y); let lines = Shape::Lines(&lines); chart = chart.lineplot(&lines); let lines: Vec<_> = (0..LINES_TO_DRAW) .map(|i| { // slice into each column of the predicted_targets matrix to // get the predictions for each set of paramters drawn from the posterior merge_for_plotting( &x, &Matrix::column(predicted_targets.column_iter(i).collect()), ) }) .collect(); let lines: Vec<_> = lines.iter().map(|lines| Shape::Lines(lines)).collect(); for l in lines.iter() { chart = chart.lineplot(l); } chart.display(); } // draw more weights to plot the distribution of weights in the posterior println!( "Weights distribution of posterior (w1 and w0) of N={}", training_size ); let mut random_numbers = n_random_numbers(&mut random_generator, SAMPLES_FOR_DISTRIBUTION * 2); let weights = posterior .draw(&mut random_numbers.drain(..), SAMPLES_FOR_DISTRIBUTION) .unwrap(); Chart::new(80, 80, 2.0, 4.0) .lineplot(&Shape::Points(&split_for_plotting(&weights))) .display(); // From inspecting the distributions of the final N=20 posterior we // can see that the model quickly becomes very certain on the value for the intercept // in the weights, but still retains some uncertainty on the gradient // TODO: Predictive distribution } //assert_eq!(1, 2); } // TODO: add appendix on formulas on how to obtain the noise precision and the prior // precision by search rather than cheating and taking them as known }