#![feature(test)] #![allow(unused_imports)] #![allow(unused_mut)] #![allow(bad_style)] extern crate test; use test::Bencher; #[bench] #[ignore] fn bench_1(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; let mut stateC: u8 = 94; let mut stateD: u8 = 100; b.iter(|| { for _ in 0..1000 { dmg_inflate1(test::black_box(stateA), test::black_box(stateB)); dmg_inflate1(test::black_box(stateC), test::black_box(stateD)); } }); } #[bench] #[ignore] fn bench_2(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; b.iter(|| { for _ in 0..1000 { dmg_inflate2(test::black_box(stateA), test::black_box(stateB)); } }); } #[bench] #[ignore] fn bench_3(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; b.iter(|| { for _ in 0..1000 { dmg_inflate3(test::black_box(stateA), test::black_box(stateB)); } }); } #[bench] #[ignore] fn bench_4(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; b.iter(|| { for _ in 0..1000 { dmg_inflate4(test::black_box(stateA), test::black_box(stateB)); } }); } #[bench] #[ignore] fn bench_5(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; b.iter(|| { for _ in 0..1000 { dmg_inflate5(test::black_box(stateA), test::black_box(stateB)); } }); } #[bench] #[ignore] fn bench_6(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; b.iter(|| { for _ in 0..1000 { dmg_inflate6(test::black_box(stateA), test::black_box(stateB)); } }); } #[bench] #[ignore] fn bench_7(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; b.iter(|| { for _ in 0..1000 { dmg_inflate7(test::black_box(stateA), test::black_box(stateB)); } }); } #[bench] #[ignore] fn bench_evy(b: &mut Bencher) { let mut stateA: u8 = 76; let mut stateB: u8 = 68; let mut stateC: u8 = 94; let mut stateD: u8 = 100; b.iter(|| { for _ in 0..1000 { dmg_inflate_evy(test::black_box(stateA), test::black_box(stateB)); dmg_inflate_evy(test::black_box(stateC), test::black_box(stateD)); } }); } #[bench] fn bench_evy_2(b: &mut Bencher) { let mut src = [0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; b.iter(|| { for _ in 0..1000 { dmg_inflate_evy_2(test::black_box(&src)); } }); } #[bench] fn bench_const_tile(b: &mut Bencher) { let mut src = [0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; b.iter(|| { for _ in 0..1000 { for chunk in test::black_box(src).chunks_exact(2) { unsafe { dmg_inflate1(*chunk.get_unchecked(0), *chunk.get_unchecked(1)); } } } }); } pub fn dmg_inflate_evy_2(block: &[u8; 16]) -> [[u8; 16]; 4] { use std::arch::x86_64::*; // block: [lo, hi, lo, hi, lo, hi, lo, hi,...] let mut res = [[0; 16]; 4]; let input = unsafe { _mm_loadu_si128(block.as_ptr() as *const __m128i) }; let shufmask = unsafe { _mm_setr_epi32(0, 0, 0x02020202, 0x02020202) }; fn expand(x: __m128i) -> __m128i { let all1 = unsafe { _mm_set1_epi8(1) }; let splat = unsafe { _mm_and_si128( x, _mm_setr_epi32( 0x08040201_u32 as i32, 0x80402010_u32 as i32, 0x08040201_u32 as i32, 0x80402010_u32 as i32, ), ) }; unsafe { _mm_and_si128( _mm_xor_si128(_mm_cmpeq_epi8(splat, _mm_setzero_si128()), all1), all1, ) } } for i in 0..4 { let j = i << 2; let mut lo_pair = unsafe { _mm_shuffle_epi8(input, _mm_add_epi32(shufmask, _mm_set1_epi8(j))) }; let mut hi_pair = unsafe { _mm_shuffle_epi8(input, _mm_add_epi32(shufmask, _mm_set1_epi8(j + 1))) }; lo_pair = expand(lo_pair); hi_pair = expand(hi_pair); let comb = unsafe { _mm_or_si128(_mm_slli_epi32(hi_pair, 1), lo_pair) }; let ptr = unsafe { res.as_mut_ptr().add(i as usize) } as *mut __m128i; unsafe { _mm_storeu_si128(ptr, comb) }; } res } pub fn dmg_inflate_evy(lo: u8, hi: u8) -> [u8; 8] { use std::arch::x86_64::*; fn splat_widen(x: u8) -> u64 { let splat = 0x01010101_01010101_u64.wrapping_mul(x as u64) & 0x80402010_08040201_u64; // Vector is now 0x????????????????_8040201008040201_u128 let mut v = unsafe { _mm_cvtsi64x_si128(splat as i64) }; // Vector is now 0x????????????????_0101010101010101_u128 let one = unsafe { _mm_set1_epi8(0b1) }; v = unsafe { _mm_and_si128( _mm_xor_si128(_mm_cmpeq_epi8(v, _mm_setzero_si128()), one), one, ) }; unsafe { _mm_cvtsi128_si64x(v) as u64 } } let splat_lo = splat_widen(lo); let splat_hi = splat_widen(hi); let res = (splat_hi << 1) | splat_lo; res.to_le_bytes() } pub fn dmg_inflate7(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; for (bit_position, out_mut) in out.iter_mut().enumerate() { let bit = 1 << bit_position; let low_flag = bit & low > 0; let high_flag = bit & high > 0; let low_mask = 1; let high_mask = 2; *out_mut ^= (0u8.wrapping_sub(low_flag as u8) ^ *out_mut) & low_mask; *out_mut ^= (0u8.wrapping_sub(high_flag as u8) ^ *out_mut) & high_mask; } out } pub fn dmg_inflate6(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; for (bit_position, out_mut) in out.iter_mut().enumerate() { let bit = 1 << bit_position; if bit & low != 0 { *out_mut |= 1; } if bit & high != 0 { *out_mut |= 2; } } out } pub fn dmg_inflate5(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; for (bit_position, out_mut) in out.iter_mut().enumerate() { let bit = 1 << bit_position; if bit & low > 0 { *out_mut |= 1; } if bit & high > 0 { *out_mut |= 2; } } out } pub fn dmg_inflate4(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; for (bit_position, out_mut) in out.iter_mut().enumerate() { if (1 << bit_position) & low > 0 { *out_mut |= 1; } if (1 << bit_position) & high > 0 { *out_mut |= 2; } } out } pub fn dmg_inflate3(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; for (bit_position, out_mut) in out.iter_mut().enumerate() { *out_mut = match ( (1 << bit_position) & high > 0, (1 << bit_position) & low > 0, ) { (false, false) => 0, (false, true) => 1, (true, false) => 2, (true, true) => 3, }; } out } pub fn dmg_inflate2(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; for (bit_position, out_mut) in out.iter_mut().enumerate() { *out_mut = (((1 << bit_position) & low) > 0) as u8 | (((((1 << bit_position) & high) > 0) as u8) << 1) } out } pub const fn dmg_inflate1(low: u8, high: u8) -> [u8; 8] { let mut out = [0; 8]; out[0] = (((1 << 0) & low) > 0) as u8 | (((((1 << 0) & high) > 0) as u8) << 1); out[1] = (((1 << 1) & low) > 0) as u8 | (((((1 << 1) & high) > 0) as u8) << 1); out[2] = (((1 << 2) & low) > 0) as u8 | (((((1 << 2) & high) > 0) as u8) << 1); out[3] = (((1 << 3) & low) > 0) as u8 | (((((1 << 3) & high) > 0) as u8) << 1); out[4] = (((1 << 4) & low) > 0) as u8 | (((((1 << 4) & high) > 0) as u8) << 1); out[5] = (((1 << 5) & low) > 0) as u8 | (((((1 << 5) & high) > 0) as u8) << 1); out[6] = (((1 << 6) & low) > 0) as u8 | (((((1 << 6) & high) > 0) as u8) << 1); out[7] = (((1 << 7) & low) > 0) as u8 | (((((1 << 7) & high) > 0) as u8) << 1); out }