//! This example essentially generates a big tree and compares three //! implementations of a depth-first traversal to sum up the value at every //! node. There are three implementations: //! //! - a naïvely recursive one (which stack overflows for tall trees) //! - a loop with an explicit stack //! - a recursive one using vuot //! //! It uses a bespoke benchmarking setup where we construct a big tree, and then //! repeatedly run the three functions in a random order (with the shuffling //! kindly provided by [`HashSet`]). Finally, we sum up each individual run //! and take the mean. This probably isn't a statistically rigorous setup, but //! it gives some idea of the relative performance. Notably, //! //! - the naïve implementation and the implementation with the explicit stack //! tend to perform similarly //! - the implementation using vuot tends to be somewhere between 2-10 times //! slower than the other two with `bumpalo`, and around 10-20 times slower //! without `bumpalo` mod dizzy; use std::collections::HashSet; use std::io::{stdout, Write}; use std::time::{Duration, Instant}; use vuot::{run, Stack, StacklessFn}; const RUNS: usize = 25; fn main() { let start = Instant::now(); let tree = dizzy::block_on(Tree::new(25, 5)); println!("(generated a tree with {} nodes)", tree.len); println!("(setup done in {} ms)", start.elapsed().as_millis()); dizzy::block_on(run(Benchmarker(&tree))); } struct Data { minimum: Duration, median: Duration, mean: Duration, maximum: Duration, } impl Data { pub fn new(mut data: Vec) -> Self { assert!(!data.is_empty()); data.sort(); let sum = data.iter().copied().reduce(|a, b| a + b).unwrap(); let mean = sum / data.len() as u32; let minimum = data.first().copied().unwrap(); let maximum = data.last().copied().unwrap(); let half_lo = data.len() / 2; let half_hi = half_lo + data.len() % 2; let median = (data[half_lo] + data[half_hi]) / 2; Self { minimum, median, mean, maximum, } } pub fn print(&self, name: &str) { let min = self.minimum.as_millis(); let max = self.maximum.as_millis(); let med = self.median.as_millis(); let avg = self.mean.as_millis(); println!( "{name: >20} min {min: >4} ms max {max: >4} ms med {med: >4} ms avg {avg: >4} ms" ); } } struct Benchmarker<'local>(&'local Tree); impl<'a> StacklessFn<'a, ()> for Benchmarker<'_> { async fn call(self, stack: Stack<'a>) { let mut results = Results::default(); for _ in 0..RUNS { std::hint::black_box(results.run(stack, self.0).await); } let (naive, explicit, flattened) = results.results(); println!(); naive.print("naive"); explicit.print("explicit"); flattened.print("flattened"); } } #[derive(Default)] struct Results { naive: Vec, explicit: Vec, flattened: Vec, runs: u32, } impl Results { pub async fn run(&mut self, stack: Stack<'_>, tree: &Tree) -> (usize, usize, usize) { let order: Vec = HashSet::from([0, 1, 2]).into_iter().collect(); let mut naive = None; let mut explicit = None; let mut flattened = None; for i in order { match i { 0 => { let start = Instant::now(); naive = Some(sum_naive(tree)); self.naive.push(start.elapsed()); } 1 => { let start = Instant::now(); explicit = Some(sum_explicit(tree)); self.explicit.push(start.elapsed()); } _ => { let start = Instant::now(); flattened = Some(sum_recursive(stack, tree).await); self.flattened.push(start.elapsed()); } } let mut stdout = stdout().lock(); write!(stdout, ".").unwrap(); stdout.flush().unwrap() } self.runs += 1; (naive.unwrap(), explicit.unwrap(), flattened.unwrap()) } pub fn results(self) -> (Data, Data, Data) { let naive = Data::new(self.naive); let explicit = Data::new(self.explicit); let flattened = Data::new(self.flattened); (naive, explicit, flattened) } } /// Naïvely traverses the tree using recursion on the call stack. Since we're /// not creating a very deep tree, this is fine. fn sum_naive(tree: &Tree) -> usize { let mut sum = tree.value; for child in &tree.children { sum = sum_naive(child).wrapping_add(sum); } sum } /// Traverses the tree by pushing to an explicit stack data structure while /// popping from it in a loop. fn sum_explicit(tree: &Tree) -> usize { let mut trees = vec![tree]; let mut sum = 0; while let Some(tree) = trees.pop() { sum = tree.value.wrapping_add(sum); trees.extend(&tree.children); } sum } /// Recursive depth-first traversal using the virtual call stack async fn sum_recursive(stack: Stack<'_>, tree: &Tree) -> usize { let mut sum = tree.value; for child in &tree.children { sum = stack .run(sum_recursive(stack, child)) .await .wrapping_add(sum); } sum } struct Tree { value: usize, children: Vec, len: usize, } impl Tree { pub async fn new(width: usize, height: usize) -> Self { struct Make(usize, usize); impl<'a> StacklessFn<'a, Tree> for Make { async fn call(self, stack: Stack<'a>) -> Tree { make(stack, 1, self.0, self.1).await } } async fn make(stack: Stack<'_>, i: usize, width: usize, height: usize) -> Tree { let value = i; let mut len = 1; let children = if height == 0 { Vec::new() } else { let mut children = Vec::new(); for j in 0..width { let child = stack.run(make(stack, i + j, width, height - 1)).await; len += child.len; children.push(child); } children }; Tree { value, children, len, } } run(Make(width, height)).await } } impl Drop for Tree { fn drop(&mut self) { let mut queue: Vec<_> = self.children.drain(..).collect(); while let Some(mut tree) = queue.pop() { queue.append(&mut tree.children); } } }