Crates.io | yap_streaming |
lib.rs | yap_streaming |
version | 0.2.0 |
source | src |
created_at | 2023-10-24 02:08:38.261514 |
updated_at | 2023-11-21 05:45:41.313695 |
description | Yap extension crate for parsing streams |
homepage | https://github.com/Easyoakland/yap_streaming |
repository | https://github.com/Easyoakland/yap_streaming |
max_upload_size | |
id | 1012016 |
size | 28,834 |
This crate builds on the interfaces from yap
to allow simple parsing of streams.
There already exist many crates that intend to help with parsing.
Of that list nom
, winnow
, chumsky
, combine
support parsing streams of values.
nom
:
winnow
:
chumsky
is not designed for speed.
combine
is complicated.
This crate allows using an already written yap
parser by simply changing the initial tokens declaration.
use std::{
fs::File,
io::{self, BufReader, Read},
};
use yap_streaming::{
// Allows you to use `.into_tokens()` on strings and slices,
// to get an instance of the above:
IntoTokens,
// Allows you to get an instance of `Tokens` that supports streams:
StrStreamTokens,
// This trait has all of the parsing methods on it:
Tokens,
};
// Write parser
// =========================================
#[derive(PartialEq, Debug)]
enum Op {
Plus,
Minus,
Multiply,
}
#[derive(PartialEq, Debug)]
enum OpOrDigit {
Op(Op),
Digit(u32),
}
// The `Tokens` trait builds on `Iterator`, so we get a `next` method.
fn parse_op(t: &mut impl Tokens<Item = char>) -> Option<Op> {
let loc = t.location();
match t.next()? {
'-' => Some(Op::Minus),
'+' => Some(Op::Plus),
'x' => Some(Op::Multiply),
_ => {
t.set_location(loc);
None
}
}
}
// We also get other useful functions..
fn parse_digits(t: &mut impl Tokens<Item = char>) -> Option<u32> {
t.take_while(|c| c.is_digit(10)).parse::<u32, String>().ok()
}
fn parse_all(t: &mut impl Tokens<Item = char>) -> impl Tokens<Item = OpOrDigit> + '_ {
// As well as combinator functions like `sep_by_all` and `surrounded_by`..
t.sep_by_all(
|t| {
t.surrounded_by(
|t| parse_digits(t).map(OpOrDigit::Digit),
|t| {
t.skip_while(|c| c.is_ascii_whitespace());
},
)
},
|t| parse_op(t).map(OpOrDigit::Op),
)
}
// Now we've parsed our input into OpOrDigits, let's calculate the result..
fn eval(t: &mut impl Tokens<Item = char>) -> u32 {
let op_or_digit = parse_all(t);
let mut current_op = Op::Plus;
let mut current_digit = 0;
for d in op_or_digit.into_iter() {
match d {
OpOrDigit::Op(op) => current_op = op,
OpOrDigit::Digit(n) => match current_op {
Op::Plus => current_digit += n,
Op::Minus => current_digit -= n,
Op::Multiply => current_digit *= n,
},
}
}
current_digit
}
// Use parser
// =========================================
// Get our input and convert into something implementing `Tokens`
let mut tokens = "10 + 2 x 12-4,foobar".into_tokens();
// Parse
assert_eq!(eval(&mut tokens), 140);
// Instead of parsing an in-memory buffer we can use `yap_streaming` to parse a stream.
// While we could [`std::io::Read::read_to_end()`] here, what if the file was too large
// to fit in memory? What if we were parsing from a network socket?
let mut io_err = None;
let file_chars = BufReader::new(File::open("examples/opOrDigit.txt").expect("open file"))
.bytes()
.map_while(|x| {
match x {
Ok(x) => {
if x.is_ascii() {
Some(x as char)
} else {
io_err = Some(io::ErrorKind::InvalidData.into());
// Don't parse any further if non-ascii input.
// This simple example parser only makes sense with ascii values.
None
}
}
Err(e) => {
io_err = Some(e);
// Don't parse any further if io error.
// Alternatively could panic, retry the byte,
// or include as an error variant and parse Result<char, ParseError> instead.
None
}
}
});
// Convert to something implementing `Tokens`.
// If parsing a stream not of `char` use [`yap_streaming::StreamTokens`] instead.
let mut tokens = StrStreamTokens::new(file_chars);
// Parse
assert_eq!(eval(&mut tokens), 140);
// Check that parse encountered no io errors.
assert!(io_err.is_none());