#[macro_use] extern crate criterion; use criterion::{Criterion, Throughput}; use tokenizers::Tokenizer; pub fn llama3(c: &mut Criterion) { let data = std::fs::read_to_string("data/big.txt").unwrap(); let mut group = c.benchmark_group("llama3-encode"); group.throughput(Throughput::Bytes(data.bytes().len() as u64)); group.bench_function("llama3-offsets", |b| { let tokenizer = Tokenizer::from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct", None).unwrap(); let data: Vec<_> = data.lines().collect(); let add_special_tokens = false; b.iter(|| { tokenizer .encode_batch_char_offsets(criterion::black_box(data.clone()), add_special_tokens) .unwrap() }) }); group.bench_function("llama3-nooffsets", |b| { let tokenizer = Tokenizer::from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct", None).unwrap(); let data: Vec<_> = data.lines().collect(); let add_special_tokens = false; b.iter(|| { tokenizer .encode_batch(criterion::black_box(data.clone()), add_special_tokens) .unwrap() }) }); group.finish(); } criterion_group! { name = bert_benches; config = Criterion::default().sample_size(10); targets = llama3 } criterion_main!(bert_benches);