| Crates.io | roketok |
| lib.rs | roketok |
| version | 0.3.1 |
| created_at | 2025-07-11 15:28:34.815254+00 |
| updated_at | 2025-07-18 09:02:05.986466+00 |
| description | This crate provides a way to simply set up a tokenizer and use it. Not recommended for simple tokenizers as this crate adds a bunch of stuff to support many if not all kinds of tokenizers. |
| homepage | |
| repository | https://github.com/rok3tt/roketok.git |
| max_upload_size | |
| id | 1748072 |
| size | 19,232 |
[!WARNING] roketok is still under constant shifts
and changes, though finalisation is near.
A simple tokenization library, focused on ease of use.
If you find an issue, whether is performance or just bugs in general, please submit an issue in issues.
use roketok::prelude::*;
#[derive(Default)]
enum TokenKind {
Identifier,
Number,
Asterisk,
Ampersand,
Semicolon,
Equal,
AddEqual,
Parenthesis,
#[default]
Invalid,
}
fn main() {
let contents = r#"
void foo(int *value) {
*value += 35;
}
int main(void) {
int value = 34;
foo(&value);
return value;
}
"#;
let config = Configuration::new()
.add_tokens([
(TokenConfiguration::Rule(&|iter, _| {
if let Some(char) = iter.last() {
if !char.is_alphabetic() { return false; }
while let Some(char) = iter.peek() {
if !char.is_alphanumeric() { break; }
let _ = iter.next();
}
return true;
}
false
}), TokenKind::Identifier),
(TokenConfiguration::Rule(&|iter, _| {
if let Some(char) = iter.last() {
if !char.is_numeric() { return false; }
while let Some(char) = iter.peek() {
if !char.is_alphanumeric() { break; }
let _ = iter.next();
}
return true;
}
false
}), TokenKind::Number),
(TokenConfiguration::Boring(&['*']), TokenKind::Asterisk),
(TokenConfiguration::Boring(&['&']), TokenKind::Ampersand),
(TokenConfiguration::Boring(&['=']), TokenKind::Equal),
(TokenConfiguration::Boring(&['+', '=']), TokenKind::AddEqual),
(TokenConfiguration::Boring(&[';']), TokenKind::Semicolon),
(TokenConfiguration::Branch(&['('], &[')']), TokenKind::Parenthesis),
]);
let mut tokenizer = Tokenizer::new(&config, contents);
let tree = tokenizer.build();
}