use criterion::*; use dyn_stack::{GlobalPodBuffer, PodStack}; use faer_core::{c32, c64, unzipped, zipped, ComplexField, Mat, Parallelism, RealField}; use faer_evd::{ tridiag::{tridiagonalize_in_place, tridiagonalize_in_place_req}, tridiag_real_evd::{compute_tridiag_real_evd, compute_tridiag_real_evd_req}, }; use reborrow::*; use std::any::type_name; fn random() -> E { if coe::is_same::() { coe::coerce_static(rand::random::()) } else if coe::is_same::() { coe::coerce_static(rand::random::()) } else if coe::is_same::() { coe::coerce_static(c32::new(rand::random(), rand::random())) } else if coe::is_same::() { coe::coerce_static(c64::new(rand::random(), rand::random())) } else if coe::is_same::() { coe::coerce_static(num_complex::Complex32::new(rand::random(), rand::random())) } else if coe::is_same::() { coe::coerce_static(num_complex::Complex64::new(rand::random(), rand::random())) } else { panic!() } } fn epsilon() -> E { if coe::is_same::() { coe::coerce_static(f32::EPSILON) } else if coe::is_same::() { coe::coerce_static(f64::EPSILON) } else { panic!() } } fn min_positive() -> E { if coe::is_same::() { coe::coerce_static(f32::MIN_POSITIVE) } else if coe::is_same::() { coe::coerce_static(f64::MIN_POSITIVE) } else { panic!() } } fn tridiagonalization(criterion: &mut Criterion) { for n in [32, 64, 128, 256, 512, 1024, 2000, 4000] { let mut mat = Mat::from_fn(n, n, |_, _| random::()); let adjoint = mat.adjoint().to_owned(); zipped!(mat.as_mut(), adjoint.as_ref()) .for_each(|unzipped!(mut x, y)| x.write(x.read().faer_add(y.read()))); let mut trid = mat.clone(); let mut tau_left = Mat::zeros(n - 1, 1); { let parallelism = Parallelism::None; let mut mem = GlobalPodBuffer::new(tridiagonalize_in_place_req::(n, parallelism).unwrap()); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("tridiag-st-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(trid.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); tridiagonalize_in_place( trid.as_mut(), tau_left.as_mut().col_mut(0).as_2d_mut(), parallelism, stack.rb_mut(), ); }); }, ); } { let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new(tridiagonalize_in_place_req::(n, parallelism).unwrap()); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("tridiag-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(trid.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); tridiagonalize_in_place( trid.as_mut(), tau_left.as_mut().col_mut(0).as_2d_mut(), parallelism, stack.rb_mut(), ); }); }, ); } } } fn tridiagonal_evd(criterion: &mut Criterion) { for n in [32, 64, 128, 256, 512, 1024, 4096] { let diag = (0..n).map(|_| random::()).collect::>(); let offdiag = (0..n - 1).map(|_| random::()).collect::>(); let mut u = Mat::::zeros(n, n); let parallelism = Parallelism::None; let mut mem = GlobalPodBuffer::new(compute_tridiag_real_evd_req::(n, parallelism).unwrap()); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("tridiag-evd-st-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { let mut diag = diag.clone(); let mut offdiag = offdiag.clone(); compute_tridiag_real_evd( &mut diag, &mut offdiag, u.as_mut(), epsilon(), min_positive(), parallelism, stack.rb_mut(), ); }); }, ); let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new(compute_tridiag_real_evd_req::(n, parallelism).unwrap()); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("tridiag-evd-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { let mut diag = diag.clone(); let mut offdiag = offdiag.clone(); compute_tridiag_real_evd( &mut diag, &mut offdiag, u.as_mut(), epsilon(), min_positive(), parallelism, stack.rb_mut(), ); }); }, ); } } fn evd(criterion: &mut Criterion) { for n in [4, 6, 8, 10, 12, 16, 24, 32, 64, 128, 256, 512, 1024, 4096] { let mut mat = Mat::from_fn(n, n, |_, _| random::()); let adjoint = mat.adjoint().to_owned(); zipped!(mat.as_mut(), adjoint.as_ref()) .for_each(|unzipped!(mut x, y)| x.write(x.read().faer_add(y.read()))); let mut s = Mat::zeros(n, n); let mut u = Mat::zeros(n, n); { let parallelism = Parallelism::None; let mut mem = GlobalPodBuffer::new( faer_evd::compute_hermitian_evd_req::( n, faer_evd::ComputeVectors::Yes, parallelism, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("sym-evd-st-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { faer_evd::compute_hermitian_evd( mat.as_ref(), s.as_mut().diagonal_mut().column_vector_mut().as_2d_mut(), Some(u.as_mut()), parallelism, stack.rb_mut(), Default::default(), ); }); }, ); } { let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new( faer_evd::compute_hermitian_evd_req::( n, faer_evd::ComputeVectors::Yes, parallelism, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("sym-evd-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { faer_evd::compute_hermitian_evd( mat.as_ref(), s.as_mut().diagonal_mut().column_vector_mut().as_2d_mut(), Some(u.as_mut()), parallelism, stack.rb_mut(), Default::default(), ); }); }, ); } } } fn evd_nalgebra(criterion: &mut Criterion) { for n in [4, 6, 8, 10, 12, 16, 24, 32, 64, 128, 256, 512, 1024, 4096] { let mat = nalgebra::DMatrix::::from_fn(n, n, |_, _| random::()); criterion.bench_function( &format!("sym-evd-nalgebra-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { mat.clone().symmetric_eigen(); }); }, ); } } fn cplx_schur(criterion: &mut Criterion) { for n in [32, 64, 128, 256, 512, 1024, 4096] { let mat = Mat::from_fn(n, n, |_, _| random::()); let mut t = mat.clone(); let mut z = mat.clone(); let mut w = Mat::zeros(n, 1); { let parallelism = Parallelism::None; let mut mem = GlobalPodBuffer::new( faer_evd::hessenberg_cplx_evd::multishift_qr_req::( n, n, true, true, Parallelism::None, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function(&format!("schur-st-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(t.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); zipped!(z.as_mut()).for_each(|unzipped!(mut x)| x.write(E::faer_zero())); zipped!(z.as_mut().diagonal_mut().column_vector_mut().as_2d_mut()) .for_each(|unzipped!(mut x)| x.write(E::faer_one())); faer_evd::hessenberg_cplx_evd::multishift_qr( true, t.as_mut(), Some(z.as_mut()), w.as_mut(), 0, n, epsilon(), min_positive(), parallelism, stack.rb_mut(), Default::default(), ); }); }); } { let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new( faer_evd::hessenberg_cplx_evd::multishift_qr_req::( n, n, true, true, Parallelism::None, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function(&format!("schur-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(t.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); zipped!(z.as_mut()).for_each(|unzipped!(mut x)| x.write(E::faer_zero())); zipped!(z.as_mut().diagonal_mut().column_vector_mut().as_2d_mut()) .for_each(|unzipped!(mut x)| x.write(E::faer_one())); faer_evd::hessenberg_cplx_evd::multishift_qr( true, t.as_mut(), Some(z.as_mut()), w.as_mut(), 0, n, epsilon(), min_positive(), parallelism, stack.rb_mut(), Default::default(), ); }); }); } } } fn real_schur(criterion: &mut Criterion) { for n in [32, 64, 128, 256, 512, 1024, 4096] { let mat = Mat::from_fn(n, n, |_, _| random::()); let mut t = mat.clone(); let mut z = mat.clone(); let mut w_re = Mat::zeros(n, 1); let mut w_im = Mat::zeros(n, 1); { let parallelism = Parallelism::None; let mut mem = GlobalPodBuffer::new( faer_evd::hessenberg_real_evd::multishift_qr_req::( n, n, true, true, Parallelism::None, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function(&format!("schur-st-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(t.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); zipped!(z.as_mut()).for_each(|unzipped!(mut x)| x.write(E::faer_zero())); zipped!(z.as_mut().diagonal_mut().column_vector_mut().as_2d_mut()) .for_each(|unzipped!(mut x)| x.write(E::faer_one())); faer_evd::hessenberg_real_evd::multishift_qr( true, t.as_mut(), Some(z.as_mut()), w_re.as_mut(), w_im.as_mut(), 0, n, epsilon(), min_positive(), parallelism, stack.rb_mut(), Default::default(), ); }); }); } { let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new( faer_evd::hessenberg_real_evd::multishift_qr_req::( n, n, true, true, Parallelism::None, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function(&format!("schur-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(t.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); zipped!(z.as_mut()).for_each(|unzipped!(mut x)| x.write(E::faer_zero())); zipped!(z.as_mut().diagonal_mut().column_vector_mut().as_2d_mut()) .for_each(|unzipped!(mut x)| x.write(E::faer_one())); faer_evd::hessenberg_real_evd::multishift_qr( true, t.as_mut(), Some(z.as_mut()), w_re.as_mut(), w_im.as_mut(), 0, n, epsilon(), min_positive(), parallelism, stack.rb_mut(), Default::default(), ); }); }); } } } fn hessenberg(criterion: &mut Criterion) { for n in [32, 64, 128, 256, 512, 1024, 4096] { let mat = Mat::from_fn(n, n, |_, _| random::()); let mut t = mat.clone(); let bs = faer_qr::no_pivoting::compute::recommended_blocksize::(n - 1, n - 1); let mut householder = Mat::zeros(n - 1, bs); let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new( faer_evd::hessenberg::make_hessenberg_in_place_req::(n, bs, Parallelism::None) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("hessenberg-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { zipped!(t.as_mut(), mat.as_ref()) .for_each(|unzipped!(mut dst, src)| dst.write(src.read())); faer_evd::hessenberg::make_hessenberg_in_place( t.as_mut(), householder.as_mut(), parallelism, stack.rb_mut(), ); }); }, ); } } fn unsym_evd(criterion: &mut Criterion) { for n in [32, 64, 128, 256, 512, 1024, 2048, 4096] { let mat = Mat::from_fn(n, n, |_, _| random::()); let mut z = mat.clone(); let mut w_re = Mat::zeros(n, 1); let mut w_im = Mat::zeros(n, 1); { let parallelism = Parallelism::None; let mut mem = GlobalPodBuffer::new( faer_evd::compute_evd_req::( n, faer_evd::ComputeVectors::Yes, Parallelism::None, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("unsym-evd-st-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { faer_evd::compute_evd_real( mat.as_ref(), w_re.as_mut(), w_im.as_mut(), Some(z.as_mut()), parallelism, stack.rb_mut(), Default::default(), ); }); }, ); } { let parallelism = Parallelism::Rayon(0); let mut mem = GlobalPodBuffer::new( faer_evd::compute_evd_req::( n, faer_evd::ComputeVectors::Yes, parallelism, Default::default(), ) .unwrap(), ); let mut stack = PodStack::new(&mut mem); criterion.bench_function( &format!("unsym-evd-mt-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| { let t = std::time::Instant::now(); faer_evd::compute_evd_real( mat.as_ref(), w_re.as_mut(), w_im.as_mut(), Some(z.as_mut()), parallelism, stack.rb_mut(), Default::default(), ); dbg!(t.elapsed()); }); }, ); } } } fn cplx_schur_nalgebra(criterion: &mut Criterion) { type Cplx64 = num_complex::Complex64; for n in [32, 64, 128, 256, 512, 1024, 4096] { let mat = nalgebra::DMatrix::::from_fn(n, n, |_, _| random::()); criterion.bench_function( &format!("schur-nalgebra-{}-{}", type_name::(), n), |bencher| { bencher.iter(|| mat.clone().schur()); }, ); } } criterion_group!( benches, tridiagonalization::, tridiagonalization::, tridiagonalization::, tridiagonal_evd::, tridiagonal_evd::, evd::, evd::, evd::, cplx_schur::, real_schur::, hessenberg::, unsym_evd::, cplx_schur_nalgebra, evd_nalgebra, ); criterion_main!(benches);