#![feature(test)] extern crate test; extern crate collenchyma as co; extern crate collenchyma_blas as co_blas; extern crate rblas; extern crate rand; use test::Bencher; use co::prelude::*; use co_blas::plugin::*; use rand::{thread_rng, Rng}; fn backend() -> Backend { Backend::::default().unwrap() } #[bench] fn bench_1000_dot_100_rblas(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(100).collect::>(); let slice_b = rng.gen_iter::().take(100).collect::>(); b.iter(|| { for _ in 0..1000 { let res = rblas::Dot::dot(&slice_a, &slice_b); test::black_box(res); } }); } #[bench] fn bench_1000_dot_100_collenchyma(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(100).collect::>(); let slice_b = rng.gen_iter::().take(100).collect::>(); let backend = backend(); let shared_a = &mut SharedTensor::::new(backend.device(), &100).unwrap(); let shared_b = &mut SharedTensor::::new(backend.device(), &100).unwrap(); let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); bench_1000_dot_100_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); } #[inline(never)] fn bench_1000_dot_100_collenchyma_profile( b: &mut Bencher, backend: &Backend, shared_a: &mut SharedTensor, shared_b: &mut SharedTensor, shared_res: &mut SharedTensor ) { b.iter(|| { for _ in 0..1000 { let _ = backend.dot(shared_a, shared_b, shared_res); } }); } #[bench] fn bench_1000_dot_100_collenchyma_plain(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(100).collect::>(); let slice_b = rng.gen_iter::().take(100).collect::>(); let backend = backend(); let shared_a = &mut SharedTensor::::new(backend.device(), &100).unwrap(); let shared_b = &mut SharedTensor::::new(backend.device(), &100).unwrap(); let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); bench_1000_dot_100_collenchyma_plain_profile(b, &backend, shared_a, shared_b, shared_res); } #[inline(never)] fn bench_1000_dot_100_collenchyma_plain_profile( b: &mut Bencher, backend: &Backend, shared_a: &mut SharedTensor, shared_b: &mut SharedTensor, shared_res: &mut SharedTensor ) { b.iter(|| { for _ in 0..1000 { let _ = backend.dot_plain(shared_a, shared_b, shared_res); } }); } #[bench] fn bench_100_dot_1000_rblas(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(1000).collect::>(); let slice_b = rng.gen_iter::().take(1000).collect::>(); b.iter(|| { for _ in 0..100 { let res = rblas::Dot::dot(&slice_a, &slice_b); test::black_box(res); } }); } #[bench] fn bench_100_dot_1000_collenchyma(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(1000).collect::>(); let slice_b = rng.gen_iter::().take(1000).collect::>(); let backend = backend(); let shared_a = &mut SharedTensor::::new(backend.device(), &1000).unwrap(); let shared_b = &mut SharedTensor::::new(backend.device(), &1000).unwrap(); let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); bench_100_dot_1000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); } #[inline(never)] fn bench_100_dot_1000_collenchyma_profile( b: &mut Bencher, backend: &Backend, shared_a: &mut SharedTensor, shared_b: &mut SharedTensor, shared_res: &mut SharedTensor ) { b.iter(|| { for _ in 0..100 { let _ = backend.dot(shared_a, shared_b, shared_res); } }); } #[bench] fn bench_50_dot_2000_collenchyma(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(2000).collect::>(); let slice_b = rng.gen_iter::().take(2000).collect::>(); let backend = backend(); let shared_a = &mut SharedTensor::::new(backend.device(), &2000).unwrap(); let shared_b = &mut SharedTensor::::new(backend.device(), &2000).unwrap(); let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); bench_50_dot_2000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); } #[inline(never)] fn bench_50_dot_2000_collenchyma_profile( b: &mut Bencher, backend: &Backend, shared_a: &mut SharedTensor, shared_b: &mut SharedTensor, shared_res: &mut SharedTensor ) { b.iter(|| { for _ in 0..50 { let _ = backend.dot(shared_a, shared_b, shared_res); } }); } #[bench] fn bench_10_dot_10000_rblas(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(10000).collect::>(); let slice_b = rng.gen_iter::().take(10000).collect::>(); b.iter(|| { for _ in 0..10 { let res = rblas::Dot::dot(&slice_a, &slice_b); test::black_box(res); } }); } #[bench] fn bench_10_dot_10000_collenchyma(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(10000).collect::>(); let slice_b = rng.gen_iter::().take(10000).collect::>(); let backend = backend(); let shared_a = &mut SharedTensor::::new(backend.device(), &10000).unwrap(); let shared_b = &mut SharedTensor::::new(backend.device(), &10000).unwrap(); let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); bench_10_dot_10000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); } #[inline(never)] fn bench_10_dot_10000_collenchyma_profile( b: &mut Bencher, backend: &Backend, shared_a: &mut SharedTensor, shared_b: &mut SharedTensor, shared_res: &mut SharedTensor ) { b.iter(|| { for _ in 0..10 { let _ = backend.dot(shared_a, shared_b, shared_res); } }); } #[bench] fn bench_5_dot_20000_rblas(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(20000).collect::>(); let slice_b = rng.gen_iter::().take(20000).collect::>(); b.iter(|| { for _ in 0..5 { let res = rblas::Dot::dot(&slice_a, &slice_b); test::black_box(res); } }); } #[bench] fn bench_5_dot_20000_collenchyma(b: &mut Bencher) { let mut rng = thread_rng(); let slice_a = rng.gen_iter::().take(20000).collect::>(); let slice_b = rng.gen_iter::().take(20000).collect::>(); let backend = backend(); let shared_a = &mut SharedTensor::::new(backend.device(), &20000).unwrap(); let shared_b = &mut SharedTensor::::new(backend.device(), &20000).unwrap(); let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); bench_5_dot_20000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); } #[inline(never)] fn bench_5_dot_20000_collenchyma_profile( b: &mut Bencher, backend: &Backend, shared_a: &mut SharedTensor, shared_b: &mut SharedTensor, shared_res: &mut SharedTensor ) { b.iter(|| { for _ in 0..5 { let _ = backend.dot(shared_a, shared_b, shared_res); } }); }