#![doc = include_str!("./README.md")] #![allow(clippy::type_complexity, clippy::new_ret_no_self)] use std::{ collections::VecDeque, fmt::{self, Debug}, usize, }; #[cfg(feature = "buffered")] pub use buffered_token_queue::*; #[cfg(feature = "generator")] pub use generator_token_queue::*; #[cfg(all(not(target_arch = "wasm32"), feature = "parallel"))] pub use parallel_token_queue::*; /// [PartialEq] is required for comparing tokens with [TokenReader::expect_next] pub trait TokenTrait: PartialEq { /// Use this for *nully* tokens. Will be skipped under [TokenReader::expect_next] fn is_skippable(&self) -> bool { false } } /// A structure with a piece of data and some additional data such as a position #[cfg_attr(any(test, doctest), derive(PartialEq, Eq))] pub struct Token(pub T, pub TData); impl Debug for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("Token") .field(&self.0) .field(&self.1) .finish() } } /// A *reader* over a sequence of tokens pub trait TokenReader { /// Returns a reference to next token but does not advance current position fn peek(&mut self) -> Option<&Token>; /// Returns a reference to nth (zero based) upcoming token without advancing fn peek_n(&mut self, n: usize) -> Option<&Token>; /// Use with caution fn peek_mut(&mut self) -> Option<&mut Token>; /// Returns the next token and advances fn next(&mut self) -> Option>; /// Returns next if `cb` returns true for the upcoming token (the token from [TokenReader::peek]) fn conditional_next(&mut self, cb: impl FnOnce(&T) -> bool) -> Option> { let peek = self.peek()?; if cb(&peek.0) { self.next() } else { None } } /// Runs the closure (cb) over upcoming tokens. Passes the value behind the Token to the closure. /// Will stop and return a reference **to the next Token from when the closure returns true**. /// Returns None if scanning finishes before closure returns true. Does not advance the reader. /// /// Used for lookahead and then branching based on return value during parsing fn scan(&mut self, cb: impl FnMut(&T, &TData) -> bool) -> Option<&Token>; /// Tests that next token matches an expected type. Will return error if does not /// match. The `Ok` value contains the data of the valid token. /// Else it will return the Err with the expected token type and the token that did not match /// /// Is the token is skippable (using [TokenTrait::is_skippable]) fn expect_next(&mut self, expected_type: T) -> Result)>> { match self.next() { Some(token) => { if token.0 == expected_type { Ok(token.1) } else if token.0.is_skippable() { // This will advance to the next, won't cyclically recurse self.expect_next(expected_type) } else { Err(Some((expected_type, token))) } } None => Err(None), } } } /// Trait for a sender that can append a token to a sequence pub trait TokenSender { /// Appends a new [`Token`] /// Will return false if could not push token fn push(&mut self, token: Token) -> bool; } #[cfg(feature = "buffered")] mod buffered_token_queue { use super::*; /// A queue which can be used as a sender and reader. Use this for buffering all the tokens before reading pub struct BufferedTokenQueue { buffer: VecDeque>, } impl Default for BufferedTokenQueue { fn default() -> Self { Self { buffer: Default::default(), } } } impl BufferedTokenQueue { /// Constructs a new [`BufferedTokenQueue`] pub fn new() -> Self { Default::default() } } impl TokenSender for BufferedTokenQueue { fn push(&mut self, token: Token) -> bool { self.buffer.push_back(token); true } } impl TokenReader for BufferedTokenQueue { fn peek(&mut self) -> Option<&Token> { self.buffer.front() } fn peek_n(&mut self, n: usize) -> Option<&Token> { self.buffer.get(n) } fn peek_mut(&mut self) -> Option<&mut Token> { self.buffer.front_mut() } fn next(&mut self) -> Option> { self.buffer.pop_front() } fn scan(&mut self, mut cb: impl FnMut(&T, &TData) -> bool) -> Option<&Token> { let mut iter = self.buffer.iter().peekable(); while let Some(token) = iter.next() { if cb(&token.0, &token.1) { return iter.peek().copied(); } } None } } } #[cfg(all(not(target_arch = "wasm32"), feature = "parallel"))] mod parallel_token_queue { use super::*; use std::sync::mpsc::{sync_channel, Receiver, RecvError, SyncSender}; const DEFAULT_BUFFER_SIZE: usize = 20; /// A token queue used for doing lexing and parsing on different threads. Will send tokens between threads pub struct ParallelTokenQueue; impl ParallelTokenQueue { /// Creates two items, a sender and a receiver. Where the reader is on the parsing thread and the /// sender is on the lexer thread pub fn new( ) -> (ParallelTokenSender, ParallelTokenReader) { Self::new_with_buffer_size(DEFAULT_BUFFER_SIZE) } pub fn new_with_buffer_size( buffer_size: usize, ) -> (ParallelTokenSender, ParallelTokenReader) { let (sender, receiver) = sync_channel::>(buffer_size); ( ParallelTokenSender(sender), ParallelTokenReader { receiver, cache: VecDeque::new(), }, ) } } // Sender and reader structs generate by `ParallelTokenQueue::new`: #[doc(hidden)] pub struct ParallelTokenSender(SyncSender>); #[doc(hidden)] pub struct ParallelTokenReader { receiver: Receiver>, cache: VecDeque>, } impl TokenSender for ParallelTokenSender { fn push(&mut self, token: Token) -> bool { self.0.send(token).is_ok() } } impl TokenReader for ParallelTokenReader { fn peek(&mut self) -> Option<&Token> { if self.cache.is_empty() { match self.receiver.recv() { Ok(token) => self.cache.push_back(token), // Err is reader has dropped e.g. no more tokens Err(RecvError) => { return None; } } } self.cache.front() } fn peek_n(&mut self, n: usize) -> Option<&Token> { while self.cache.len() <= n { match self.receiver.recv() { Ok(token) => self.cache.push_back(token), // Err is reader has dropped e.g. no more tokens Err(RecvError) => { return None; } } } self.cache.get(n) } fn next(&mut self) -> Option> { if !self.cache.is_empty() { return self.cache.pop_front(); } self.receiver.recv().ok() } fn scan(&mut self, mut cb: impl FnMut(&T, &TData) -> bool) -> Option<&Token> { let found = scan_cache(&mut self.cache, &mut cb); let mut return_next = match found { ScanCacheResult::RetrievableInCacheAt(idx) => return self.cache.get(idx), ScanCacheResult::Found => true, ScanCacheResult::NotFound => false, }; loop { match self.receiver.recv() { Ok(val) => { if return_next { self.cache.push_back(val); return self.cache.back(); } if cb(&val.0, &val.1) { return_next = true; } self.cache.push_back(val); } // Err is reader has dropped e.g. no more tokens Err(RecvError) => { return None; } } } } fn peek_mut(&mut self) -> Option<&mut Token> { if self.cache.is_empty() { match self.receiver.recv() { Ok(token) => self.cache.push_back(token), // Err is reader has dropped e.g. no more tokens Err(RecvError) => { return None; } } } self.cache.front_mut() } } } #[cfg(feature = "generator")] mod generator_token_queue { use super::*; /// A token queue which has a backing generator/lexer which is called when needed by parsing logic pub struct GeneratorTokenQueue where T: TokenTrait, for<'a> TGenerator: FnMut(&mut TGeneratorState, &mut GeneratorTokenQueueBuffer<'a, T, TData>), { generator: TGenerator, generator_state: TGeneratorState, cache: VecDeque>, } /// A wrapping struct for the cache around [`GeneratorTokenQueue`]. Use as the second parameter /// in the generator/lexer function pub struct GeneratorTokenQueueBuffer<'a, T: TokenTrait, TData>( &'a mut VecDeque>, ); impl<'a, T: TokenTrait, TData> GeneratorTokenQueueBuffer<'a, T, TData> { pub fn is_empty(&self) -> bool { self.0.is_empty() } pub fn len(&self) -> usize { self.0.len() } } impl<'a, T: TokenTrait, TData> TokenSender for GeneratorTokenQueueBuffer<'a, T, TData> { fn push(&mut self, token: Token) -> bool { self.0.push_back(token); true } } impl GeneratorTokenQueue where T: TokenTrait, for<'a> TGenerator: FnMut(&mut TGeneratorState, &mut GeneratorTokenQueueBuffer<'a, T, TData>), { /// Create a new [`GeneratorTokenQueue`] with a lexer function and initial state pub fn new(generator: TGenerator, generator_state: TGeneratorState) -> Self { GeneratorTokenQueue { generator, generator_state, cache: VecDeque::new(), } } } impl TokenReader for GeneratorTokenQueue where T: TokenTrait, TData: Debug, for<'a> TGenerator: FnMut(&mut TGeneratorState, &mut GeneratorTokenQueueBuffer<'a, T, TData>), { fn peek(&mut self) -> Option<&Token> { if self.cache.is_empty() { (self.generator)( &mut self.generator_state, &mut GeneratorTokenQueueBuffer(&mut self.cache), ); } self.cache.front() } fn peek_n(&mut self, n: usize) -> Option<&Token> { while self.cache.len() <= n { (self.generator)( &mut self.generator_state, &mut GeneratorTokenQueueBuffer(&mut self.cache), ); } self.cache.get(n) } fn next(&mut self) -> Option> { if !self.cache.is_empty() { return self.cache.pop_front(); } (self.generator)( &mut self.generator_state, &mut GeneratorTokenQueueBuffer(&mut self.cache), ); self.cache.pop_front() } fn scan(&mut self, mut cb: impl FnMut(&T, &TData) -> bool) -> Option<&Token> { let cb = &mut cb; let found = scan_cache(&mut self.cache, cb); let mut return_next = match found { ScanCacheResult::RetrievableInCacheAt(idx) => return self.cache.get(idx), ScanCacheResult::Found => true, ScanCacheResult::NotFound => false, }; let mut found = None::; while found.is_none() { let start = self.cache.len(); (self.generator)( &mut self.generator_state, &mut GeneratorTokenQueueBuffer(&mut self.cache), ); if self.cache.is_empty() { return None; } for (idx, token) in self.cache.iter().enumerate().skip(start) { if return_next { found = Some(idx); break; } if cb(&token.0, &token.1) { return_next = true; } } } self.cache.get(found.unwrap()) } fn peek_mut(&mut self) -> Option<&mut Token> { if self.cache.is_empty() { (self.generator)( &mut self.generator_state, &mut GeneratorTokenQueueBuffer(&mut self.cache), ); } self.cache.front_mut() } } } enum ScanCacheResult { RetrievableInCacheAt(usize), NotFound, // Aka pull out next one Found, } /// Returns the idx of the **next item** after cb returns true /// This returns the idx instead of the item for lifetime reasons fn scan_cache( cache: &mut VecDeque>, cb: &mut impl FnMut(&T, &TData) -> bool, ) -> ScanCacheResult { let mut cb_returned_true_at_idx = None::; // Try to find in idx. Returns the idx when found for (idx, token) in cache.iter().enumerate() { if cb(&token.0, &token.1) { cb_returned_true_at_idx = Some(idx); break; } } if let Some(idx) = cb_returned_true_at_idx { if idx + 1 < cache.len() { ScanCacheResult::RetrievableInCacheAt(idx + 1) } else { ScanCacheResult::Found } } else { ScanCacheResult::NotFound } } #[cfg(feature = "sized-tokens")] pub mod sized_tokens { use crate::{Token, TokenReader, TokenTrait}; /// Tokens with a known length (in bytes) pub trait SizedToken: TokenTrait { fn length(&self) -> u32; } pub type TokenStart = source_map::Start; pub type TokenEnd = source_map::End; impl Token { pub fn get_span(&self) -> source_map::Span { let start = self.1 .0; source_map::Span { start, end: self.1 .0 + self.0.length(), source: (), } } pub fn get_end(&self) -> TokenEnd { source_map::End(self.1 .0 + self.0.length()) } } pub trait TokenReaderWithTokenEnds: TokenReader { fn expect_next_get_end( &mut self, expected_type: T, ) -> Result)>> { match self.next() { Some(token) => { if token.0 == expected_type { Ok(token.get_end()) } else if token.0.is_skippable() { // This will advance to the next, won't cyclically recurse self.expect_next_get_end(expected_type) } else { Err(Some((expected_type, token))) } } None => Err(None), } } } impl TokenReaderWithTokenEnds for TR where T: SizedToken, TR: TokenReader, { } } #[cfg(test)] mod tests { use super::*; impl TokenTrait for u32 {} mod buffered_token_queue { use super::{BufferedTokenQueue, Token, TokenReader, TokenSender}; #[test] fn next() { let mut btq = BufferedTokenQueue::new(); btq.push(Token(12, ())); btq.push(Token(32, ())); btq.push(Token(52, ())); assert_eq!(btq.next().unwrap(), Token(12, ())); assert_eq!(btq.next().unwrap(), Token(32, ())); assert_eq!(btq.next().unwrap(), Token(52, ())); assert!(btq.next().is_none()); } #[test] fn peek() { let mut btq = BufferedTokenQueue::new(); btq.push(Token(12, ())); assert_eq!(btq.peek().unwrap(), &Token(12, ())); assert_eq!(btq.next().unwrap(), Token(12, ())); assert!(btq.next().is_none()); } #[test] fn peek_n() { let mut btq = BufferedTokenQueue::new(); btq.push(Token(12, ())); btq.push(Token(32, ())); btq.push(Token(52, ())); assert_eq!(btq.peek_n(2).unwrap(), &Token(52, ())); assert_eq!(btq.next().unwrap(), Token(12, ())); assert_eq!(btq.next().unwrap(), Token(32, ())); assert_eq!(btq.next().unwrap(), Token(52, ())); assert!(btq.next().is_none()); } #[test] fn expect_next() { let mut btq = BufferedTokenQueue::new(); btq.push(Token(12, ())); btq.push(Token(24, ())); assert_eq!(btq.expect_next(12).unwrap(), ()); assert!(btq.expect_next(10).is_err()); assert!(btq.next().is_none()); } #[test] fn scan() { let mut btq = BufferedTokenQueue::new(); for val in vec![4, 10, 100, 200] { btq.push(Token(val, ())); } let mut count = 0; let x = btq.scan(move |token_val, _| { count += token_val; count > 100 }); assert_eq!(x.unwrap().0, 200); let mut count = 0; let y = btq.scan(move |token_val, _| { count += token_val; count > 1000 }); assert_eq!(y, None); assert_eq!(btq.next().unwrap().0, 4); assert_eq!(btq.next().unwrap().0, 10); assert_eq!(btq.next().unwrap().0, 100); assert_eq!(btq.next().unwrap().0, 200); assert!(btq.next().is_none()); } } mod parallel_token_queue { use super::{ParallelTokenQueue, Token, TokenReader, TokenSender}; #[test] fn next() { let (mut sender, mut reader) = ParallelTokenQueue::new(); std::thread::spawn(move || { sender.push(Token(12, ())); sender.push(Token(32, ())); sender.push(Token(52, ())); }); assert_eq!(reader.next().unwrap(), Token(12, ())); assert_eq!(reader.next().unwrap(), Token(32, ())); assert_eq!(reader.next().unwrap(), Token(52, ())); assert!(reader.next().is_none()); } #[test] fn peek() { let (mut sender, mut reader) = ParallelTokenQueue::new(); std::thread::spawn(move || { sender.push(Token(12, ())); }); assert_eq!(reader.peek().unwrap(), &Token(12, ())); assert_eq!(reader.next().unwrap(), Token(12, ())); assert!(reader.next().is_none()); } #[test] fn next_n() { let (mut sender, mut reader) = ParallelTokenQueue::new(); std::thread::spawn(move || { sender.push(Token(12, ())); sender.push(Token(32, ())); sender.push(Token(52, ())); }); assert_eq!(reader.peek_n(2).unwrap(), &Token(52, ())); assert_eq!(reader.next().unwrap(), Token(12, ())); assert_eq!(reader.next().unwrap(), Token(32, ())); assert_eq!(reader.next().unwrap(), Token(52, ())); assert!(reader.next().is_none()); } #[test] fn expect_next() { let (mut sender, mut reader) = ParallelTokenQueue::new(); std::thread::spawn(move || { sender.push(Token(12, ())); sender.push(Token(24, ())); }); assert_eq!(reader.expect_next(12).unwrap(), ()); assert!(reader.expect_next(10).is_err()); assert!(reader.next().is_none()); } #[test] fn scan() { let (mut sender, mut reader) = ParallelTokenQueue::new(); std::thread::spawn(move || { for val in vec![4, 10, 100, 200] { sender.push(Token(val, ())); } }); let mut count = 0; let x = reader.scan(move |token_val, _| { count += token_val; count > 100 }); assert_eq!(x.unwrap().0, 200); let mut count = 0; let y = reader.scan(move |token_val, _| { count += token_val; count > 1000 }); assert_eq!(y, None); assert_eq!(reader.next().unwrap().0, 4); assert_eq!(reader.next().unwrap().0, 10); assert_eq!(reader.next().unwrap().0, 100); assert_eq!(reader.next().unwrap().0, 200); assert!(reader.next().is_none()); } } mod generator_token_queue { use super::{ GeneratorTokenQueue, GeneratorTokenQueueBuffer, Token, TokenReader, TokenSender, }; fn lexer(state: &mut u32, sender: &mut GeneratorTokenQueueBuffer) { *state += 1; match state { 1..=3 => { sender.push(Token(*state * 2, ())); } _ => {} } } #[test] fn next() { let mut reader = GeneratorTokenQueue::new(lexer, 0); assert_eq!(reader.next().unwrap(), Token(2, ())); assert_eq!(reader.next().unwrap(), Token(4, ())); assert_eq!(reader.next().unwrap(), Token(6, ())); assert!(reader.next().is_none()); } #[test] fn peek() { let mut reader = GeneratorTokenQueue::new(lexer, 0); assert_eq!(reader.peek().unwrap(), &Token(2, ())); assert_eq!(reader.next().unwrap(), Token(2, ())); } #[test] fn peek_n() { let mut reader = GeneratorTokenQueue::new(lexer, 0); assert_eq!(reader.peek_n(2).unwrap(), &Token(6, ())); assert_eq!(reader.next().unwrap(), Token(2, ())); assert_eq!(reader.next().unwrap(), Token(4, ())); assert_eq!(reader.next().unwrap(), Token(6, ())); assert!(reader.next().is_none()); } #[test] fn expect_next() { let mut reader = GeneratorTokenQueue::new(lexer, 0); assert!(reader.expect_next(2).is_ok()); assert!(reader.expect_next(5).is_err()); assert!(reader.expect_next(6).is_ok()); } #[test] fn scan() { let mut reader = GeneratorTokenQueue::new(lexer, 0); let mut count = 0; let x = reader.scan(move |token_val, _| { count += token_val; count > 3 }); assert_eq!(x.unwrap().0, 6); assert_eq!(reader.next().unwrap(), Token(2, ())); } } #[test] fn conditional_next() { let mut btq = BufferedTokenQueue::new(); btq.push(Token(12, ())); btq.push(Token(32, ())); assert_eq!(btq.conditional_next(|t| *t == 28), None); assert_eq!(btq.conditional_next(|t| *t == 12), Some(Token(12, ()))); assert_eq!(btq.next().unwrap(), Token(32, ())); assert!(btq.next().is_none()); } mod skippable_token { use super::{BufferedTokenQueue, Token, TokenReader, TokenSender, TokenTrait}; #[derive(PartialEq, Eq, Debug)] enum TokenType { A, B, Ignore, } impl TokenTrait for TokenType { fn is_skippable(&self) -> bool { matches!(self, TokenType::Ignore) } } #[test] fn still_show_with_next() { let mut btq = BufferedTokenQueue::new(); generate_tokens(&mut btq); assert_eq!(btq.next().unwrap(), Token(TokenType::A, ())); assert_eq!(btq.next().unwrap(), Token(TokenType::Ignore, ())); assert_eq!(btq.next().unwrap(), Token(TokenType::B, ())); assert!(btq.next().is_none()); } #[test] fn skipped_under_expect_next() { let mut btq = BufferedTokenQueue::new(); generate_tokens(&mut btq); assert!(btq.expect_next(TokenType::A).is_ok()); assert!(btq.expect_next(TokenType::B).is_ok()); assert!(btq.next().is_none()); } fn generate_tokens(btq: &mut BufferedTokenQueue) { btq.push(Token(TokenType::A, ())); btq.push(Token(TokenType::Ignore, ())); btq.push(Token(TokenType::B, ())); } } }