| Crates.io | bytepiece_rs |
| lib.rs | bytepiece_rs |
| version | 0.2.2 |
| created_at | 2023-09-18 23:55:16.673846+00 |
| updated_at | 2023-11-12 08:47:09.081883+00 |
| description | The Bytepiece Tokenizer Implemented in Rust |
| homepage | |
| repository | https://github.com/hscspring/bytepiece-rs |
| max_upload_size | |
| id | 976379 |
| size | 5,819,268 |
use bytepice_rs::Tokenizer;
let tokenizer = Tokenizer::new();
// or load a custom model
let tokenizer = Tokenizer::load_from("/path/to/model");
let text = "今天天气不错";
let ids = tokenizer.encode(text, false, false, alpha=0.0);
assert_eq!(ids, vec![40496, 45268, 39432]);
let text2 = tokenizer.decode(ids);
assert_eq!(text2, text);
cargo test
cargo bench -- --plotting-backend gnuplot