#![feature(test)] extern crate test; use rand::prelude::*; use rand::rngs::SmallRng; use std::cmp::Ordering; use std::hash::{Hash, Hasher}; #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] #[repr(u8)] enum Nucleobase { Adenine, Cytosine, Guanine, Thymine, } #[derive(Copy, Clone, Debug, Ord, Eq)] struct Primer { length: u8, sequence: [Nucleobase; 31], } impl Primer { fn as_bytes(&self) -> &[u8] { unsafe { let subslice = self.sequence.get_unchecked(..(self.length as usize)); std::mem::transmute(subslice) } } } impl PartialEq for Primer { fn eq(&self, other: &Primer) -> bool { self.as_bytes() == other.as_bytes() } } impl PartialOrd for Primer { fn partial_cmp(&self, other: &Primer) -> Option { Some(self.as_bytes().cmp(other.as_bytes())) } } impl Hash for Primer { fn hash(&self, state: &mut H) { self.as_bytes().hash(state); } } impl std::convert::AsRef<[u8]> for Primer { fn as_ref(&self) -> &[u8] { self.as_bytes() } } struct CustomDist; impl Distribution for CustomDist { fn sample(&self, rng: &mut R) -> Primer { let length = 22 + rng.gen::() % 4; let mut sequence = [Nucleobase::Adenine; 31]; let mut prev = Nucleobase::Adenine; for i in 0..length { let next = match (prev, rng.gen::()) { (Nucleobase::Adenine, 0..=128) => Nucleobase::Adenine, (Nucleobase::Adenine, 129..=170) => Nucleobase::Cytosine, (Nucleobase::Adenine, 171..=240) => Nucleobase::Guanine, (Nucleobase::Adenine, 141..=255) => Nucleobase::Thymine, (Nucleobase::Cytosine, 0..=80) => Nucleobase::Adenine, (Nucleobase::Cytosine, 80..=100) => Nucleobase::Cytosine, (Nucleobase::Cytosine, 101..=200) => Nucleobase::Guanine, (Nucleobase::Cytosine, 201..=255) => Nucleobase::Thymine, (Nucleobase::Guanine, 0..=60) => Nucleobase::Adenine, (Nucleobase::Guanine, 61..=80) => Nucleobase::Cytosine, (Nucleobase::Guanine, 81..=180) => Nucleobase::Guanine, (Nucleobase::Guanine, 181..=255) => Nucleobase::Thymine, (Nucleobase::Thymine, 0..=30) => Nucleobase::Adenine, (Nucleobase::Thymine, 31..=120) => Nucleobase::Cytosine, (Nucleobase::Thymine, 121..=254) => Nucleobase::Guanine, (Nucleobase::Thymine, 255..=255) => Nucleobase::Thymine, }; sequence[i as usize] = next; prev = next; } Primer { length, sequence } } } sortedvec::sortedvec! { struct SortedPrimerVec { fn derive_key(x: &Primer) -> &Primer { x } } } sortedvec::sortedvec_slicekey! { struct SortedSlicePrimerVec { fn derive_key(x: &Primer) -> &[u8] { x.as_bytes() } } } macro_rules! gen_bench { ( $( $i:ident; $sample_size:expr ),* ) => { $(mod $i { use super::*; const SAMPLE_SIZE: usize = $sample_size; const SEED: [u8; 16] = [1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16]; #[bench] fn find_primer_naive_sortedvec(b: &mut test::Bencher) { // create primer set let mut rng = SmallRng::from_seed(SEED); let dataset: SortedPrimerVec = rng.sample_iter(&CustomDist).take(SAMPLE_SIZE).collect(); let test_val = dataset[SAMPLE_SIZE/ 2 - 1]; b.iter(|| { dataset.find(&&test_val); }); } #[bench] fn find_primer_list_sortedvec(b: &mut test::Bencher) { // check that Primer is of right size assert_eq!(std::mem::size_of::(), 32); // create primer set let mut rng = SmallRng::from_seed(SEED); let dataset: SortedSlicePrimerVec = rng.sample_iter(&CustomDist).take(SAMPLE_SIZE).collect(); let test_val = dataset[SAMPLE_SIZE/ 2 - 1]; b.iter(|| { dataset.find(&test_val); }); } #[bench] fn find_primer_hashset(b: &mut test::Bencher) { // create primer set let mut rng = SmallRng::from_seed(SEED); let dataset: std::collections::HashSet<_> = rng.sample_iter(&CustomDist).take(SAMPLE_SIZE).collect(); let test_val = dataset.iter().skip(SAMPLE_SIZE/ 2 - 1).next().unwrap(); b.iter(|| { dataset.contains(test_val) }); } })* } } gen_bench! { dna_e1;10, dna_e2;100, dna_e3;1_000, dna_e4;10_000, dna_e5;100_000, dna_e6;1_000_000 }