use criterion::{criterion_group, criterion_main, BatchSize, Criterion, Throughput}; use integer_encoding::VarInt; use rand::distributions::{Distribution, Standard}; use rand::{thread_rng, Rng}; use varint_simd::{ decode, decode_eight_u8_unsafe, decode_four_unsafe, decode_len, decode_len_unsafe, decode_two_unsafe, //decode_two_wide_unsafe, decode_unsafe, encode, VarIntTarget, }; mod leb128; mod prost_varint; #[inline(always)] fn create_batched_encoded_generator( rng: &mut R, ) -> impl FnMut() -> (Vec, Vec) + '_ where Standard: Distribution, { move || { let mut encoded = Vec::new(); let mut idx = 0; for _ in 0..C { if encoded.len() < idx + 16 { encoded.extend(std::iter::repeat(0).take(idx + 11 - encoded.len())) } let len = rng.gen::().encode_var(&mut encoded[idx..]); idx += len; } (encoded, vec![Default::default(); C]) } } #[inline(always)] fn decode_len_batched_varint_simd(input: &mut (Vec, Vec)) { let data = &input.0; let mut slice = &data[..]; for _ in 0..C { // SAFETY: the input slice should have at least 16 bytes of allocated padding at the end let len = decode_len::(slice).unwrap(); slice = &slice[len..]; } } #[inline(always)] fn decode_len_batched_varint_simd_unsafe( input: &mut (Vec, Vec), ) { let data = &input.0; let mut slice = &data[..]; for _ in 0..C { // SAFETY: the input slice should have at least 16 bytes of allocated padding at the end let len = unsafe { decode_len_unsafe::(slice.as_ptr()) }; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_varint_simd_unsafe( input: &mut (Vec, Vec), ) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { // SAFETY: the input slice should have at least 16 bytes of allocated padding at the end let (num, len) = unsafe { decode_unsafe::(slice.as_ptr()) }; out[i] = num; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_varint_simd_2x_unsafe( input: &mut (Vec, Vec), ) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..(C / 2) { let (num1, num2, len1, len2) = unsafe { decode_two_unsafe::(slice.as_ptr()) }; out[i * 2] = num1; out[i * 2 + 1] = num2; slice = &slice[((len1 + len2) as usize)..]; } } // #[inline(always)] // fn decode_batched_varint_simd_2x_wide_unsafe( // input: &mut (Vec, Vec), // ) { // let data = &input.0; // let out = &mut input.1; // // let mut slice = &data[..]; // for i in 0..(C / 2) { // let (num1, num2, len1, len2) = unsafe { decode_two_wide_unsafe::(slice.as_ptr()) }; // out[i * 2] = num1; // out[i * 2 + 1] = num2; // slice = &slice[((len1 + len2) as usize)..]; // } // } #[inline(always)] fn decode_batched_varint_simd_4x_unsafe( input: &mut (Vec, Vec), ) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..(C / 4) { let (num1, num2, num3, num4, len1, len2, len3, len4, _invalid) = unsafe { decode_four_unsafe::(slice.as_ptr()) }; out[i * 4] = num1; out[i * 4 + 1] = num2; out[i * 4 + 2] = num3; out[i * 4 + 3] = num4; slice = &slice[((len1 + len2 + len3 + len4) as usize)..]; } } #[inline(always)] fn decode_batched_varint_simd_8x_u8_unsafe(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..(C / 8) { let (nums, total_len) = unsafe { decode_eight_u8_unsafe(slice.as_ptr()) }; out[(i * 8)..(i * 8 + 8)].copy_from_slice(&nums); slice = &slice[(total_len as usize)..]; } } #[inline(always)] fn decode_batched_varint_simd_safe(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let (num, len) = decode::(slice).unwrap(); out[i] = num; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_integer_encoding(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let (num, len) = T::decode_var(slice).unwrap(); out[i] = num; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_rustc_u8(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let (num, len) = leb128::read_u16_leb128(slice); out[i] = num as u8; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_rustc_u16(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let (num, len) = leb128::read_u16_leb128(slice); out[i] = num; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_rustc_u32(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let (num, len) = leb128::read_u32_leb128(slice); out[i] = num; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_rustc_u64(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let (num, len) = leb128::read_u64_leb128(slice); out[i] = num; slice = &slice[len..]; } } #[inline(always)] fn decode_batched_prost(input: &mut (Vec, Vec)) { let data = &input.0; let out = &mut input.1; let mut slice = &data[..]; for i in 0..C { let num = prost_varint::decode_varint(&mut slice).unwrap(); out[i] = T::cast_u64(num); } } pub fn criterion_benchmark(c: &mut Criterion) { let mut rng = thread_rng(); // Must be a multiple of 8 const SEQUENCE_LEN: usize = 256; let mut group = c.benchmark_group("varint-u8/decode"); group.throughput(Throughput::Elements(SEQUENCE_LEN as u64)); group.bench_function("integer-encoding", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_integer_encoding::, BatchSize::SmallInput, ) }); group.bench_function("rustc", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_rustc_u8::, BatchSize::SmallInput, ) }); group.bench_function("prost-varint", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_prost::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/safe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_safe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/2x/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_2x_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/4x/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_4x_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/8x/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_8x_u8_unsafe::, BatchSize::SmallInput, ) }); group.finish(); let mut group = c.benchmark_group("varint-u8/decode_len"); group.throughput(Throughput::Elements(SEQUENCE_LEN as u64)); group.bench_function("varint-simd/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_len_batched_varint_simd_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/safe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_len_batched_varint_simd::, BatchSize::SmallInput, ) }); group.finish(); let mut group = c.benchmark_group("varint-u8/encode"); group.throughput(Throughput::Elements(1)); group.bench_function("integer-encoding", |b| { b.iter_batched( || rng.gen::(), |num| { let mut target = [0u8; 16]; u8::encode_var(num, &mut target) }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("rustc", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); leb128::write_u16_leb128(&mut target, num as u16); }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("prost-varint", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); prost_varint::encode_varint(num as u64, &mut target) }, BatchSize::SmallInput, ) }); group.bench_function("varint-simd", |b| { b.iter_batched(|| rng.gen::(), encode, BatchSize::SmallInput) }); group.finish(); let mut group = c.benchmark_group("varint-u16/decode"); group.throughput(Throughput::Elements(SEQUENCE_LEN as u64)); group.bench_function("integer-encoding", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_integer_encoding::, BatchSize::SmallInput, ) }); group.bench_function("rustc", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_rustc_u16::, BatchSize::SmallInput, ) }); group.bench_function("prost-varint", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_prost::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/safe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_safe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/2x/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_2x_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/4x/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_4x_unsafe::, BatchSize::SmallInput, ) }); group.finish(); let mut group = c.benchmark_group("varint-u16/encode"); group.throughput(Throughput::Elements(1)); group.bench_function("integer-encoding", |b| { b.iter_batched( || rng.gen::(), |num| { let mut target = [0u8; 16]; u16::encode_var(num, &mut target) }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("rustc", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); leb128::write_u16_leb128(&mut target, num); }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("prost-varint", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); prost_varint::encode_varint(num as u64, &mut target) }, BatchSize::SmallInput, ) }); group.bench_function("varint-simd", |b| { b.iter_batched(|| rng.gen::(), encode, BatchSize::SmallInput) }); group.finish(); let mut group = c.benchmark_group("varint-u32/decode"); group.throughput(Throughput::Elements(SEQUENCE_LEN as u64)); group.bench_function("integer-encoding", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_integer_encoding::, BatchSize::SmallInput, ) }); group.bench_function("rustc", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_rustc_u32::, BatchSize::SmallInput, ) }); group.bench_function("prost-varint", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_prost::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/safe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_safe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/2x/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_2x_unsafe::, BatchSize::SmallInput, ) }); group.finish(); let mut group = c.benchmark_group("varint-u32/encode"); group.throughput(Throughput::Elements(1)); group.bench_function("integer-encoding", |b| { b.iter_batched( || rng.gen::(), |num| { let mut target = [0u8; 16]; u32::encode_var(num, &mut target) }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("rustc", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); leb128::write_u32_leb128(&mut target, num); }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("prost-varint", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); prost_varint::encode_varint(num as u64, &mut target) }, BatchSize::SmallInput, ) }); group.bench_function("varint-simd", |b| { b.iter_batched(|| rng.gen::(), encode, BatchSize::SmallInput) }); group.finish(); let mut group = c.benchmark_group("varint-u64/decode"); group.throughput(Throughput::Elements(SEQUENCE_LEN as u64)); group.bench_function("integer-encoding", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_integer_encoding::, BatchSize::SmallInput, ) }); group.bench_function("rustc", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_rustc_u64::, BatchSize::SmallInput, ) }); group.bench_function("prost-varint", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_prost::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/unsafe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_unsafe::, BatchSize::SmallInput, ) }); group.bench_function("varint-simd/safe", |b| { b.iter_batched_ref( create_batched_encoded_generator::(&mut rng), decode_batched_varint_simd_safe::, BatchSize::SmallInput, ) }); // group.bench_function("varint-simd/2x_wide/unsafe", |b| { // b.iter_batched_ref( // create_batched_encoded_generator::(&mut rng), // decode_batched_varint_simd_2x_wide_unsafe::, // BatchSize::SmallInput, // ) // }); group.finish(); let mut group = c.benchmark_group("varint-u64/encode"); group.throughput(Throughput::Elements(1)); group.bench_function("integer-encoding", |b| { b.iter_batched( || rng.gen::(), |num| { let mut target = [0u8; 16]; u64::encode_var(num, &mut target) }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("rustc", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); leb128::write_u64_leb128(&mut target, num); }, BatchSize::SmallInput, ) }); let mut target = Vec::with_capacity(16); group.bench_function("prost-varint", |b| { b.iter_batched( || rng.gen::(), |num| { target.clear(); prost_varint::encode_varint(num, &mut target) }, BatchSize::SmallInput, ) }); group.bench_function("varint-simd", |b| { b.iter_batched(|| rng.gen::(), encode, BatchSize::SmallInput) }); group.finish(); } criterion_group!(benches, criterion_benchmark); criterion_main!(benches);