Crates.io | neotoma |
lib.rs | neotoma |
version | 0.1.1 |
created_at | 2025-08-05 15:59:10.076618+00 |
updated_at | 2025-08-05 16:31:20.286259+00 |
description | A flexible, cached parser combinator framework for Rust. |
homepage | https://git.sr.ht/~djarb/neotoma |
repository | https://git.sr.ht/~djarb/neotoma |
max_upload_size | |
id | 1782169 |
size | 468,008 |
A flexible, cached parser combinator framework for Rust with built-in memoization and backtracking capabilities.
Read + Seek
implementationparking_lot
for efficient synchronization primitivesAdd Neotoma to your Cargo.toml
:
[dependencies]
neotoma = "0.1.0"
use neotoma::prelude::*;
use neotoma::{seq, oneof};
use std::io::Cursor;
// Parse "hello world" with optional whitespace
let parser = seq![
Literal::from_str("hello"),
Optional::new(Utf8Class::whitespace()),
Literal::from_str("world")
];
let input = Cursor::new(b"hello world");
let mut source = Source::new(input);
match parse(parser, &mut source) {
Ok(result) => println!("Parsed: {:?}", result),
Err(e) => println!("Parse error: {:?}", e),
}
use neotoma::prelude::*;
// Parse one or more digits
let numbers = Utf8Class::with_min("0123456789", 1);
// Parse alphabetic characters
let letters = Utf8Class::alpha();
// Parse alphanumeric with bounds
let identifier = Utf8Class::alphanumeric().with_bounds(1, 20);
use neotoma::{seq, oneof};
use neotoma::prelude::*;
// Sequential composition
let sequence = seq![
Literal::from_str("if"),
Utf8Class::whitespace(),
Literal::from_str("true")
];
// Alternative composition
let choice = oneof![
Literal::from_str("true"),
Literal::from_str("false"),
Utf8Class::digits()
];
use neotoma::prelude::*;
// Zero or more digits
let numbers = Repeat::new(Utf8Class::digits());
// Comma-separated list
let csv = Repeat::new(Utf8Class::alpha())
.with_joint(Literal::from_str(","));
// Bounded repetition
let bounded = Repeat::new(Utf8Class::alpha())
.with_bounds(2, 5);
Neotoma includes a self-parsing grammar system that allows you to
define complex parsers using a declarative syntax. The GrammarParser
can parse grammar definitions and produce executable parsers.
use neotoma::grammar::GrammarParser;
use neotoma::prelude::*;
use std::io::Cursor;
let grammar_text = r#"
@start expression
expression = (number "+" number)
number = digits
"#;
let grammar_parser = GrammarParser::new();
let mut input = Cursor::new(grammar_text.as_bytes());
let mut source = Source::new(&mut input);
let grammar = parse(grammar_parser, &mut source).unwrap();
// Use the grammar to parse expressions
let mut expr_input = Cursor::new(b"42+24");
let mut expr_source = Source::new(&mut expr_input);
let result = parse(grammar, &mut expr_source).unwrap();
"text"
- matches literal string (use \"
for escaped quotes)eof
- matches end of file (ensures complete input consumption)digits
- matches one or more decimal digits (0-9)alpha
- matches one or more alphabetic characters (ASCII)alphanumeric
- matches one or more alphanumeric characters (ASCII)whitespace
- matches one or more whitespace characters (ASCII)hexdigits
- matches one or more hexadecimal digits (0-9, a-f, A-F)udigits
, ualpha
, ualphanumeric
, uwhitespace
- UTF-8 equivalents[abc]
- matches any character in the set (custom UTF-8 character class)[^abc]
- matches any character NOT in the set (negated UTF-8 character class)(A B C)
- matches A followed by B followed by C (sequence)(| A B C)
- matches either A or B or C (alternatives)(* A)
- matches zero or more instances of A(+ A)
- matches one or more instances of A(? A)
- matches zero or one instances of A(* A / B)
- matches zero or more A's separated by B(+ A / B)
- matches one or more A's separated by Bname = rule
- defines a named parsing rulename
- references a named rule (enables recursion)@start name
- sets the starting rule (defaults to last rule if not specified)let arithmetic_grammar = r#"
@start complete_expression
complete_expression = (expression eof)
expression = additive_expr
additive_expr = (| addition subtraction multiplicative_expr)
addition = (multiplicative_expr (? whitespace) "+" (? whitespace) additive_expr)
subtraction = (multiplicative_expr (? whitespace) "-" (? whitespace) additive_expr)
multiplicative_expr = (| multiplication division primary_expr)
multiplication = (primary_expr (? whitespace) "*" (? whitespace) multiplicative_expr)
division = (primary_expr (? whitespace) "/" (? whitespace) multiplicative_expr)
primary_expr = (| number variable parenthesized_expr)
number = digits
variable = alpha
parenthesized_expr = ("(" (? whitespace) expression (? whitespace) ")")
"#;
This grammar handles:
Neotoma uses a Template Method Pattern for its core Parser
trait:
read()
: Implement your parsing logic hereparse()
: Public API that automatically handles caching and backtrackingid()
: Override for parameterized parsers to avoid cache conflictsThe tests/
directory contains comprehensive examples:
Python bindings are available in the neotoma-py/
directory. See the Python README for details.
Licensed under either of:
at your option.
Contributions are welcome! Please feel free to submit a patch.