//! Benchmark SQLite read performance. //! //! The benchmark setup consists in writing `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` 32-bytes random //! keys with random values 150 +/- 30 bytes long. With 10 000 keys and a ratio of 100 we get one //! million keys; ideally the db should be deleted for each benchmark run but in practice it has //! little impact on the performance numbers for these small database sizes. //! Allocations (on the Rust side) are counted and printed. //! //! Note that this benchmark is not a good way to measure the performance of the database itself; //! its purpose is to be a tool to gauge the performance of the glue code, or work as a starting point //! for a more elaborate benchmark of a specific workload. const NEEDLES: usize = 10_000; const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; use std::io; use std::time::{Duration, Instant}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use alloc_counter::{count_alloc_future, AllocCounterSystem}; use criterion::async_executor::FuturesExecutor; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use ethereum_types::H256; use keyvaluedb::{DBKeyValue, KeyValueDB}; use keyvaluedb_sqlite::{Database, DatabaseConfig}; use rand::{distributions::Uniform, seq::SliceRandom, Rng}; #[global_allocator] static A: AllocCounterSystem = AllocCounterSystem; criterion_group!(benches, get, iter); criterion_main!(benches); /// Opens (or creates) a SQLite database in the `benches/` folder of the crate with one column /// family and default options. Needs manual cleanup. fn open_db() -> Database { let tempdir_str = "./benches/_sqlite_bench_get"; let cfg = DatabaseConfig::with_columns(1); Database::open(tempdir_str, cfg).expect("sqlite works") } /// Generate `n` random bytes +/- 20%. /// The variability in the payload size lets us simulate payload allocation patterns: `DBValue` is /// an `ElasticArray128` so sometimes we save on allocations. fn n_random_bytes(n: usize) -> Vec { let mut rng = rand::thread_rng(); let variability: i64 = rng.gen_range(0..(n / 5) as i64); let plus_or_minus: i64 = if variability % 2 == 0 { 1 } else { -1 }; let range = Uniform::from(0..u8::MAX); rng.sample_iter(&range) .take((n as i64 + plus_or_minus * variability) as usize) .collect() } /// Writes `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` keys to the DB. Keys are random, 32 bytes long and /// values are random, 120-180 bytes long. Every `NEEDLES_TO_HAYSTACK_RATIO` keys are kept and /// returned in a `Vec` for and used to benchmark point lookup performance. Keys are sorted /// lexicographically in the DB, and the benchmark keys are random bytes making the needles are /// effectively random points in the key set. async fn populate(db: &Database) -> io::Result> { let mut needles = Vec::with_capacity(NEEDLES); let mut batch = db.transaction(); for i in 0..NEEDLES * NEEDLES_TO_HAYSTACK_RATIO { let key = H256::random(); if i % NEEDLES_TO_HAYSTACK_RATIO == 0 { needles.push(key); if i % 100_000 == 0 && i > 0 { println!("[populate] {} keys", i); } } // In ethereum keys are mostly 32 bytes and payloads ~140bytes. batch.put(0, key.as_bytes(), &n_random_bytes(140)); } db.write(batch).await.map_err(|e| e.error)?; Ok(needles) } fn get(c: &mut Criterion) { let db = open_db(); let rt = tokio::runtime::Runtime::new().unwrap(); let needles = rt.block_on(async { populate(&db).await.expect("sqlite works") }); { let total_iterations = Arc::new(AtomicU64::new(0)); let total_allocs = Arc::new(AtomicU64::new(0)); c.bench_function("get key", |b| { b.to_async(FuturesExecutor).iter_custom(|iterations| { let needles = &needles; let db = &db; let total_iterations = total_iterations.clone(); let total_allocs = total_allocs.clone(); async move { total_iterations.fetch_add(iterations, Ordering::Relaxed); let mut elapsed = Duration::new(0, 0); // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc_future(async { let start = Instant::now(); for _ in 0..iterations { // This has no measurable impact on performance (~30ns) let needle = needles .choose(&mut rand::thread_rng()) .expect("needles is not empty"); black_box(db.get(0, needle.as_bytes()).await.unwrap()); } elapsed = start.elapsed(); }) .await; total_allocs.fetch_add(alloc_stats.0 as u64, Ordering::Relaxed); elapsed } }); }); let total_iterations = total_iterations.load(Ordering::Relaxed); let total_allocs = total_allocs.load(Ordering::Relaxed); if total_iterations > 0 { println!( "[get key] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 ); } } { let total_iterations = Arc::new(AtomicU64::new(0)); let total_allocs = Arc::new(AtomicU64::new(0)); c.bench_function("get key by prefix", |b| { b.to_async(FuturesExecutor).iter_custom(|iterations| { let needles = &needles; let db = &db; let total_iterations = total_iterations.clone(); let total_allocs = total_allocs.clone(); async move { total_iterations.fetch_add(iterations, Ordering::Relaxed); let mut elapsed = Duration::new(0, 0); // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc_future(async { let start = Instant::now(); for _ in 0..iterations { // This has no measurable impact on performance (~30ns) let needle = needles .choose(&mut rand::thread_rng()) .expect("needles is not empty"); black_box( db.first_with_prefix(0, &needle.as_bytes()[..8]) .await .unwrap(), ); } elapsed = start.elapsed(); }) .await; total_allocs.fetch_add(alloc_stats.0 as u64, Ordering::Relaxed); elapsed } }); }); let total_iterations = total_iterations.load(Ordering::Relaxed); let total_allocs = total_allocs.load(Ordering::Relaxed); if total_iterations > 0 { println!( "[get key by prefix] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 ); } } } fn iter(c: &mut Criterion) { let db = open_db(); { let total_iterations = Arc::new(AtomicU64::new(0)); let total_allocs = Arc::new(AtomicU64::new(0)); c.bench_function("iterate over 1k keys", |b| { b.to_async(FuturesExecutor).iter_custom(|iterations| { let db = &db; let total_iterations = total_iterations.clone(); let total_allocs = total_allocs.clone(); async move { total_iterations.fetch_add(iterations, Ordering::Relaxed); let mut elapsed = Duration::new(0, 0); // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc_future(async { let start = Instant::now(); for _ in 0..iterations { let mut contents: Vec = Vec::new(); db.iter(0, None, |kv| { contents.push((kv.0.clone(), kv.1.clone())); Ok(if contents.len() != 1000 { None } else { Some(()) }) }) .await .unwrap(); black_box(contents); } elapsed = start.elapsed(); }) .await; total_allocs.fetch_add(alloc_stats.0 as u64, Ordering::Relaxed); elapsed } }); let total_iterations = total_iterations.load(Ordering::Relaxed); let total_allocs = total_allocs.load(Ordering::Relaxed); if total_iterations > 0 { println!( "[iterate over 1k keys] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 ); } }); } { let total_iterations = Arc::new(AtomicU64::new(0)); let total_allocs = Arc::new(AtomicU64::new(0)); c.bench_function("single key from iterator", |b| { b.to_async(FuturesExecutor).iter_custom(|iterations| { let db = &db; let total_iterations = total_iterations.clone(); let total_allocs = total_allocs.clone(); async move { total_iterations.fetch_add(iterations, Ordering::Relaxed); let mut elapsed = Duration::new(0, 0); // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc_future(async { let start = Instant::now(); for _ in 0..iterations { let out = db.iter(0, None, |_| Ok(Some(()))).await; let _ = black_box(out); } elapsed = start.elapsed(); }).await; total_allocs.fetch_add(alloc_stats.0 as u64, Ordering::Relaxed); elapsed } }); }); let total_iterations = total_iterations.load(Ordering::Relaxed); let total_allocs = total_allocs.load(Ordering::Relaxed); if total_iterations > 0 { println!( "[single key from iterator] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 ); } } }