use core::ptr::NonNull; use criterion::{criterion_group, criterion_main, Criterion}; use dyn_stack::{PodStack, ReborrowMut, StackReq}; use serde::Serialize; use std::{fs, path::PathBuf}; use tfhe_fft::c64; struct FftwAlloc { bytes: NonNull, } impl Drop for FftwAlloc { fn drop(&mut self) { unsafe { fftw_sys::fftw_free(self.bytes.as_ptr()); } } } impl FftwAlloc { pub fn new(size_bytes: usize) -> FftwAlloc { unsafe { let bytes = fftw_sys::fftw_malloc(size_bytes); if bytes.is_null() { use std::alloc::{handle_alloc_error, Layout}; handle_alloc_error(Layout::from_size_align_unchecked(size_bytes, 1)); } FftwAlloc { bytes: NonNull::new_unchecked(bytes), } } } } pub struct PlanInterleavedC64 { plan: fftw_sys::fftw_plan, n: usize, } impl Drop for PlanInterleavedC64 { fn drop(&mut self) { unsafe { fftw_sys::fftw_destroy_plan(self.plan); } } } pub enum Sign { Forward, Backward, } impl PlanInterleavedC64 { pub fn new(n: usize, sign: Sign) -> Self { let size_bytes = n.checked_mul(core::mem::size_of::()).unwrap(); let src = FftwAlloc::new(size_bytes); let dst = FftwAlloc::new(size_bytes); unsafe { let p = fftw_sys::fftw_plan_dft_1d( n.try_into().unwrap(), src.bytes.as_ptr() as _, dst.bytes.as_ptr() as _, match sign { Sign::Forward => fftw_sys::FFTW_FORWARD as _, Sign::Backward => fftw_sys::FFTW_BACKWARD as _, }, fftw_sys::FFTW_MEASURE, ); PlanInterleavedC64 { plan: p, n } } } pub fn print(&self) { unsafe { fftw_sys::fftw_print_plan(self.plan); } } pub fn execute(&self, src: &mut [c64], dst: &mut [c64]) { assert_eq!(src.len(), self.n); assert_eq!(dst.len(), self.n); let src = src.as_mut_ptr(); let dst = dst.as_mut_ptr(); unsafe { use fftw_sys::{fftw_alignment_of, fftw_execute_dft}; assert_eq!(fftw_alignment_of(src as _), 0); assert_eq!(fftw_alignment_of(dst as _), 0); fftw_execute_dft(self.plan, src as _, dst as _); } } } #[derive(Serialize)] struct BenchmarkParametersRecord { display_name: String, polynomial_size: usize, } /// Writes benchmarks parameters to disk in JSON format. fn write_to_json(bench_id: &str, display_name: impl Into, polynomial_size: usize) { let record = BenchmarkParametersRecord { display_name: display_name.into(), polynomial_size, }; let mut params_directory = ["benchmarks_parameters", bench_id] .iter() .collect::(); fs::create_dir_all(¶ms_directory).unwrap(); params_directory.push("parameters.json"); fs::write(params_directory, serde_json::to_string(&record).unwrap()).unwrap(); } pub fn bench_ffts(c: &mut Criterion) { for n in [ 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, ] { let mut mem = dyn_stack::GlobalPodBuffer::new(StackReq::all_of([ StackReq::new_aligned::(2 * n, 256), // scratch StackReq::new_aligned::(n, 256), // src StackReq::new_aligned::(n, 256), // dst ])); let mut stack = PodStack::new(&mut mem); let z = c64::new(0.0, 0.0); use rustfft::FftPlannerAvx; let mut scratch = []; let bench_duration = std::time::Duration::from_millis(10); let unordered = tfhe_fft::unordered::Plan::new(n, tfhe_fft::unordered::Method::Measure(bench_duration)); let (dst, stack) = stack.rb_mut().make_aligned_with::(n, 64, |_| z); let (src, mut stack) = stack.make_aligned_with::(n, 64, |_| z); let bench_id = format!("rustfft-fwd-{n}"); c.bench_function(&bench_id, |b| { let mut planner = FftPlannerAvx::::new().unwrap(); let fwd_rustfft = planner.plan_fft_forward(n); b.iter(|| fwd_rustfft.process_outofplace_with_scratch(src, dst, &mut scratch)) }); write_to_json(&bench_id, "rustfft-fwd", n); let bench_id = format!("fftw-fwd-{n}"); c.bench_function(&bench_id, |b| { let fwd_fftw = PlanInterleavedC64::new(n, Sign::Forward); b.iter(|| { fwd_fftw.execute(src, dst); }) }); write_to_json(&bench_id, "fftw-fwd", n); if n <= 1024 { let ordered = tfhe_fft::ordered::Plan::new(n, tfhe_fft::ordered::Method::Measure(bench_duration)); let bench_id = format!("concrete-fwd-{n}"); c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack.rb_mut()))); write_to_json(&bench_id, "concrete-fwd", n); } let bench_id = format!("unordered-fwd-{n}"); c.bench_function(&bench_id, |b| { b.iter(|| unordered.fwd(dst, stack.rb_mut())); }); write_to_json(&bench_id, "unordered-fwd", n); let bench_id = format!("unordered-inv-{n}"); c.bench_function(&bench_id, |b| { b.iter(|| unordered.inv(dst, stack.rb_mut())); }); write_to_json(&bench_id, "unordered-inv", n); // memcpy let bench_id = format!("memcpy-{n}"); c.bench_function(&bench_id, |b| { b.iter(|| unsafe { std::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), n); }) }); write_to_json(&bench_id, "memcpy", n); let bench_id = format!("fwd-monomial-{n}"); c.bench_function(&bench_id, |b| { let mut degree = 0; b.iter(|| { degree += 1; if degree == n { degree = 0; } unordered.fwd_monomial(degree, dst); }) }); write_to_json(&bench_id, "fwd-monomial", n); } } #[cfg(feature = "fft128")] pub fn bench_fft128(c: &mut Criterion) { for n in [64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384] { use tfhe_fft::fft128::*; let twid_re0 = vec![0.0; n]; let twid_re1 = vec![0.0; n]; let twid_im0 = vec![0.0; n]; let twid_im1 = vec![0.0; n]; let mut data_re0 = vec![0.0; n]; let mut data_re1 = vec![0.0; n]; let mut data_im0 = vec![0.0; n]; let mut data_im1 = vec![0.0; n]; let bench_id = format!("tfhe-fft128-fwd-{n}"); c.bench_function(&bench_id, |bench| { bench.iter(|| { negacyclic_fwd_fft_scalar( &mut data_re0, &mut data_re1, &mut data_im0, &mut data_im1, &twid_re0, &twid_re1, &twid_im0, &twid_im1, ); }); }); write_to_json(&bench_id, "fft128-fwd", n); let bench_id = format!("tfhe-fft128-inv-{n}"); c.bench_function(&bench_id, |bench| { bench.iter(|| { negacyclic_inv_fft_scalar( &mut data_re0, &mut data_re1, &mut data_im0, &mut data_im1, &twid_re0, &twid_re1, &twid_im0, &twid_im1, ); }); }); write_to_json(&bench_id, "fft128-inv", n); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if let Some(simd) = pulp::x86::V3::try_new() { let bench_id = format!("tfhe-fft128-avx-fwd-{n}"); c.bench_function(&bench_id, |bench| { bench.iter(|| { negacyclic_fwd_fft_avxfma( simd, &mut data_re0, &mut data_re1, &mut data_im0, &mut data_im1, &twid_re0, &twid_re1, &twid_im0, &twid_im1, ); }); }); write_to_json(&bench_id, "fft128-fwd-avx", n); let bench_id = format!("tfhe-fft128-avx-inv-{n}"); c.bench_function(&bench_id, |bench| { bench.iter(|| { negacyclic_inv_fft_avxfma( simd, &mut data_re0, &mut data_re1, &mut data_im0, &mut data_im1, &twid_re0, &twid_re1, &twid_im0, &twid_im1, ); }); }); write_to_json(&bench_id, "fft128-inv-avx", n); } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(feature = "nightly")] if let Some(simd) = pulp::x86::V4::try_new() { let bench_id = format!("tfhe-fft128-avx512-fwd-{n}"); c.bench_function(&bench_id, |bench| { bench.iter(|| { negacyclic_fwd_fft_avx512( simd, &mut data_re0, &mut data_re1, &mut data_im0, &mut data_im1, &twid_re0, &twid_re1, &twid_im0, &twid_im1, ); }); }); write_to_json(&bench_id, "fft128-fwd-avx512", n); let bench_id = format!("tfhe-fft128-avx512-inv-{n}"); c.bench_function(&bench_id, |bench| { bench.iter(|| { negacyclic_inv_fft_avx512( simd, &mut data_re0, &mut data_re1, &mut data_im0, &mut data_im1, &twid_re0, &twid_re1, &twid_im0, &twid_im1, ); }); }); write_to_json(&bench_id, "fft128-inv-avx512", n); } } } criterion_group!(fft, bench_ffts); #[cfg(feature = "fft128")] criterion_group!(fft128, bench_fft128); #[cfg(not(feature = "fft128"))] criterion_main!(fft); #[cfg(feature = "fft128")] criterion_main!(fft, fft128);