use std::sync::{Mutex, Arc}; use rand::{os, Rng}; use time::{Duration, PreciseTime}; use rayon::prelude::*; use gen::{Generator, Generators}; use stats::{PValue, Unif01Statistics, Statistics, two_level_test}; /// Generate a suite over the entire set of available statistics and generators. pub fn full_suite() -> Suite { Suite::new(Statistics::iterator().collect::>(), Generators::nonspecial_generators()) } /// One element of output from running a [`Suite`]. /// /// This provides all the top-level data available for reporting to the user. /// /// [`Suite`]: ./struct.Suite.html #[derive(PartialOrd, Debug, Clone)] pub struct TestOutcome { pub gen: Generators, pub stat: Statistics, pub p_value: PValue, pub duration: Duration, } /// TestOutcome instances are equal if all of the following hold: /// 1. the generators are identical /// 2. the statistics are identical /// 3. the p-values are within 1e-10 of each other impl PartialEq for TestOutcome { fn eq(&self, other: &TestOutcome) -> bool { (self.gen == other.gen) && (self.stat == other.stat) && ((self.p_value.0 - other.p_value.0).abs() < 1e-10) } } /// A [`Suite`] collects a set of generators and a set of statistics with which to test each generator, /// and can orchestrate the parallel running of these tests. /// /// Here is an example of basic intialization and usage with additional type annotations used for /// clarity: /// /// ``` /// use randtest::{ /// create_report, /// Suite, /// Generators, /// Statistics, /// TestOutcome, /// }; /// /// // We select all the statistics and generators available internally (except special stdin /// // and file based generators). /// let all_gens: Vec = Generators::nonspecial_generators(); /// let all_stats: Vec = Statistics::iterator().collect::>(); /// /// let outcomes : Vec = Suite::new(all_stats, all_gens) /// .psamples(10) /// .multiplier(1.0) /// .superseed(0u64) /// .run(); /// /// // We create a String from the raw outcomes information to report the results: /// println!("{}", create_report(&outcomes[..])); /// ``` /// /// The [`TestOutcome`](suite/struct.TestOutcome.html) struct can only be produced by running a /// [`Suite`] struct. They report the p-value results from the [`two_level_test()`] performed /// internally, but also provide timing information for the test and fields for identifying exactly /// which statistic was tested against which generator variant. /// /// Here we run the same suite from above more succinctly and do some basic (but admittedly /// pointless) processing on the information from the results. /// /// ``` /// use randtest::{Suite, Generators, Statistics}; /// /// let outcomes = Suite::new(Statistics::iterator().collect::>(), Generators::nonspecial_generators()) /// .psamples(10) /// .multiplier(1.0) /// .superseed(0u64) /// .run(); /// /// // extract out a vector of the p-values from all `two_level_test()` calls /// let raw_p_values: Vec = outcomes.iter().map(|ref outcome| outcome.p_value.0 as f64).collect::>(); /// /// // find their mean /// let sum_p_values : f64 = raw_p_values.iter().fold(0f64, |sum, val| sum + val); /// let mean_p_value : f64 = sum_p_values / outcomes.len() as f64; /// /// // find their max /// let max_p_value: f64 = raw_p_values.iter().cloned().fold(0./0., f64::max); /// /// println!("Mean p-value: {}", mean_p_value); /// println!("Max p-value: {}", max_p_value); /// ``` /// /// [`Suite`]: ./struct.Suite.html /// [`two_level_test()`]: ../stats/fn.two_level_test.html /// [`TestOutcome`]: ./struct.TestOutcome.html #[derive(Debug)] pub struct Suite { stats: Vec, gens: Vec, /// The number of p-samples collected in the second level test psamples: u64, /// The multiple of the minimum_recommended_samples for a test used on the first level tests. multiplier: f64, /// A single value used to deterministically generate seeds for any given stats/gen pairings. /// /// This means giving the same u64 on the cli AND using the same combination of tests and /// generators should produce the same test results (hopefully) repeatedly. superseed: u64, /// The selection of which test of standard uniformity to be used for the `two_level_test`. unif01: Unif01Statistics, } impl Suite { /// Create a new Suite with the default configuration using the provided set of selections for /// statistics and generators to run. pub fn new(stats: Vec, gens: Vec) -> Self { // Sorting is required to maintain the guarantee of repeatability of runs given the same // generators, statistics, and configuration. This lets us give the same generated samples // to the correct tests in order. let mut inner_stats = stats.clone(); let mut inner_gens = gens.clone(); inner_stats.sort(); inner_gens.sort(); Suite { stats: inner_stats, gens: inner_gens, psamples: 100, multiplier: 2.0, // start out with a superseed generated by the OsRng superseed: os::OsRng::new().unwrap().next_u64(), unif01: Unif01Statistics::KolmogorovSmirnov, } } /// Use the provided statistic on standard uniform data on this suite's runs. pub fn unif01(mut self, unif01: Unif01Statistics) -> Self { self.unif01 = unif01; self } /// Use the provided superseed as the initial seed for generating each individual generator's /// seed. /// /// Because we want RandTest to be repeatable, we need to be able to provide the same exact /// seed values on multiple runs to every generator involved. This superseed is a single u64 /// value so that it can be provided on the command-line as a small integer in text format that /// is more memorable for a user. pub fn superseed(mut self, superseed: u64) -> Self { self.superseed = superseed; self } /// Select how many p-values are to be collected and tested in each two-level test for the /// combinations of statistics and generators. /// /// The p-values have hypothesis testing for standard uniformity applied on them to produce the /// final reported p-value. Increasing `psamples` effectively increases the sample-size of this /// second level of hypothesis testing. pub fn psamples(mut self, psamples: u64) -> Self{ self.psamples = psamples; self } /// A positive multiplicative factor applied to the minimum recommended sample-size for each /// statistic in the individual one-level tests. /// /// For example, use `multiplier` equal to 2 for one-level tests that have 2x the number of /// samples. Try 100 for 100x as many samples, but be prepared to wait on that big of a run. /// /// Panics on non-positive inputs. pub fn multiplier(mut self, multiplier: f64) -> Self { debug_assert!(multiplier > 0.0f64); self.multiplier = multiplier; self } /// Running a [`Suite`] perform a two-level test on each element of the Cartesian product of /// the statistics and the generators in parallel. /// /// TODO: Calculate the needed sample sizes in advance, draw all samples in parallel over each /// generator upfront, and then pass each two_level_test its (multiple concatenated) samples /// and not its generator. pub fn run(&self) -> Vec { self.inner_parallelism_run() } /// Creates instances of the configured generators and pairs them with their matching enum /// variant as a label. fn create_labelled_gens(&self) -> Vec<(Arc>, Generators)> { let gens: Vec>> = self.gens.iter().map(|gen| gen.create().unwrap()).collect(); gens .into_iter() .zip(self.gens.clone().into_iter()) .collect() } /// An implementation of the Suite::run() method where the statistics are looped over /// sequentially and within each iteration each generator is tested against that statistic in /// parallel. #[allow(unused)] fn inner_parallelism_run(&self) -> Vec { let labelled_gens = self.create_labelled_gens(); self.stats .clone() .into_iter() .map(|ref stat| { labelled_gens.par_iter() .map(|gen_tuple| { Suite::run_kernel(&(*gen_tuple).0, &(*gen_tuple).1, &stat, &self.unif01, self.psamples, self.multiplier) }).collect::>() }) .flat_map(|vec_outcomes: Vec| vec_outcomes.into_iter()) .collect::>() } /// An implementation of the Suite::run() method where the creation of generators happens in /// parallel and within each thread of execution each statistic is looped over sequentially. #[allow(unused)] fn outer_parallelism_run(&self) -> Vec { let labelled_gens = self.create_labelled_gens(); labelled_gens .into_par_iter() .map(|ref gen_tuple| { self.stats.iter() .map(|stat| { Suite::run_kernel(&(*gen_tuple).0, &(*gen_tuple).1, &stat, &self.unif01, self.psamples, self.multiplier) }).collect::>() }) .flat_map(|vec_outcomes: Vec| vec_outcomes.into_par_iter()) .collect::>() } /// The inner kernel core computation inherent inside all the run() methods' implementations. fn run_kernel(gen: &Arc>, gen_variant: &Generators, stat: &Statistics, unif01: &Unif01Statistics, psamples: u64, multiplier: f64) -> TestOutcome { let start_time = PreciseTime::now(); let mut generator = gen.lock().unwrap(); let p_value: PValue = two_level_test( &mut *generator, &stat, &unif01, psamples, multiplier ); TestOutcome { gen: gen_variant.clone(), stat: stat.clone(), p_value: p_value, duration: start_time.to(PreciseTime::now()) } } } #[cfg(test)] mod tests { #[cfg(feature = "nightly")] use test::Bencher; use suite; //use gen::Generators; //use stats::{Statistics, Unif01Statistics}; //#[test] //// TODO: implement quickcheck::Arbitrary on Generators, Statistics, and Unif01Statistics for //// this to work //quickcheck! { // fn suite_run_smoke(gens: Vec, stats: Vec, unif01: Unif01Statistics) -> bool { // let gens = gens.iter().filter(|x| match x { // Generators::File(_) => false, // Generators::Stdin => false, // _ => true, // }); // let results = suite::Suite::new(gens, stats) // .psamples(10) // .multiplier(2) // .superseed(10) // .unif01(unif01).run(); // true // } //} #[test] fn outer_and_inner_parallelism_run_implementations_equal() { let fullsuite = suite::full_suite().psamples(10).multiplier(2.).superseed(10); assert_eq!(fullsuite.inner_parallelism_run(), fullsuite.outer_parallelism_run()); } #[cfg(feature = "nightly")] #[bench] fn inner_parallelism_run_full_suite(b: &mut Bencher) { let fullsuite = suite::full_suite().psamples(100).multiplier(5.).superseed(100); b.iter(|| { assert!(fullsuite.inner_parallelism_run().len() > 0); }); } #[cfg(feature = "nightly")] #[bench] fn outer_parallelism_run_full_suite(b: &mut Bencher) { let fullsuite = suite::full_suite().psamples(100).multiplier(5.).superseed(100); b.iter(|| { assert!(fullsuite.outer_parallelism_run().len() > 0); }); } }