| Crates.io | leqx |
| lib.rs | leqx |
| version | 0.0.1 |
| created_at | 2025-08-04 17:06:43.715714+00 |
| updated_at | 2025-08-04 17:06:43.715714+00 |
| description | Simple regex-automata based lexer, as a proc macro |
| homepage | |
| repository | https://github.com/thequux/leqx |
| max_upload_size | |
| id | 1780956 |
| size | 7,524 |
A simple lexer generator based on regex_automata
Warning: this is alpha-quality code (as evidenced by the 0.0.1 version); it has not been exhaustively tested or documented. Unless you wish to help with either, I recommend waiting for version 0.2 (though feel free to prod me to get there)
In particular, the API is likely to change to support the following:
&[u8] tokens)use leqx::leqxer;
use regex_automata::{Anchored, Input};
pub enum Token<'a> {
Word(&'a str),
Number(isize),
}
leqxer! {
#[derive(Default)]
struct State {
line: usize,
column: usize,
}
#[leqxer(dfa=sparse, embed=true)]
mode lex_raw(&mut self, tok) -> Option<(usize, usize, Token)> {
"[ \t]+" => {
self.column += tok.len();
None
},
"\r?\n|\r" => {
self.column = 0;
self.line += 1;
None
},
"[a-z]+" => {
let col = self.column;
self.column += tok.len();
Some((self.line, col, Token::Word(tok)))
},
"[0-9]+" => {
let col = self.column;
self.column += tok.len();
Some((self.line, col, Token::Word(tok)))
}
}
}
pub struct Lexer<'a> {
state: State,
input: regex_automata::Input<'a>,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
state: State::default(),
input: regex_automata::Input::new(input).anchored(Anchored::Yes),
}
}
}
impl <'a> Iterator for Lexer<'a> {
type Item = (usize, usize, Token<'a>);
fn next(&mut self) -> Option<Self::Item> {
loop {
// the lex_raw method is from the name of the mode above
let tok = self.state.lex_raw(&mut self.input)?;
if let Some(tok) = tok {
return Some(tok);
}
}
}
}