import random import string random.seed(16092015) PRINTABLE = [a for a in string.printable[:-3]] TEXT_LENGTH = 2**16 def gen_text(needles, rarity, length): parts = [] tail = '' cur_len = 0 n_len = max([len(n) for n in needles]) while cur_len < length: planted = random.randint(1, rarity) == 1 next_elements = PRINTABLE if planted: next_elements = needles next_elt = random.choice(next_elements) # Check to see if we put a needle at the end of the string just by # random chance. (If we did, we won't keep it.) accidentally_planted = False if not planted: next_tail = tail + next_elt for n in needles: if next_tail.endswith(n): accidentally_planted = True break if not accidentally_planted: cur_len += len(next_elt) parts += next_elt tail = (tail + next_elt)[-n_len:] return ''.join(parts)[0:length] def gen_needles(number, length): def gen_needle(): needle = '' while len(needle) < length: needle += random.choice(PRINTABLE) return needle return [gen_needle() for i in range(number)] preamble = """// This file was generated by text_gen.py. #![feature(cfg_target_feature, test)] extern crate simd; extern crate test; extern crate teddy; use test::Bencher; use teddy::Teddy; macro_rules! bench { ($name:ident, $pats:expr, $haystack:expr, $count:expr) => { #[bench] fn $name(b: &mut Bencher) { let ted = Teddy::new($pats.iter().map(|s| s.as_bytes())).unwrap(); b.bytes = $haystack.len() as u64; b.iter(|| { let mut hay = $haystack.as_bytes(); let mut count = 0; while let Some(mat) = ted.find(hay) { count += 1; hay = &hay[(mat.start + 1)..]; } assert_eq!(count, $count); }); } } } """ needles_numbers = [2, 8, 16, 32] needles_lengths = [1, 2, 3] needle_rarities = [8196, 1024, 128, 16] rs_file = open('random.rs', 'w') rs_file.write(preamble) def gen_set(name_prefix, needles_numbers, needles_lengths, needle_rarities, text_lengths): def escape(s): s = repr(s)[1:-1] s = s.replace('"', '\\"') return '"' + s + '"' for n_num in needles_numbers: for n_len in needles_lengths: needles = gen_needles(n_num, n_len) varname = '%s_%d_%d' % (name_prefix, n_num, n_len) needles_rust = '[' + ', '.join([escape(n) for n in needles]) + ']' rs_file.write("static %s: [&'static str; %d] = %s;\n" % (varname.upper(), n_num, needles_rust)) for rarity in needle_rarities: print('%s %d %d %d' % (name_prefix, n_num, n_len, rarity)) filename = 'data/%s_%d_%d_%d.txt' % (name_prefix, n_num, n_len, rarity) text = gen_text(needles, rarity, TEXT_LENGTH) with open(filename, 'w') as f: f.write(text) hayname = '%s_%d_%d_%d' % (name_prefix, n_num, n_len, rarity) rs_file.write("""\nstatic %s: &'static str = include_str!("%s");\n""" % (hayname.upper(), filename)) for tl in text_lengths: count = sum([text[:tl].count(n) for n in needles]) rs_file.write("bench!({}_{}, {}, {}[0..{}], {});\n".format(hayname, tl, varname.upper(), hayname.upper(), tl, count)); gen_set('rarity', [8], [1, 2, 3], needle_rarities, [2**16]) gen_set('needle_num', [8, 16, 32], [3], [1024], [2**16]) gen_set('text_len', [8], [1, 2, 3], [2**16], [16, 32, 64, 128, 256, 512])