//! This is a [foreach](https://esolangs.org/wiki/Foreach) lexer. //! This is intended to show how to use a slightly more complex example compared to brainfuck. //! //! Note: Foreach is an esolang so this lexer may be a bit strange. #![allow(dead_code)] use alkale::{span::Spanned, token::Token, FinalizedLexerResult, LexerResult, SourceCodeScanner}; /// Represents the foreach tokens #[derive(Debug, Clone)] enum ForeachToken<'a> { Identifier(&'a str), OpenBracket, // [ CloseBracket, // ] OpenBrace, // { CloseBrace, // } Semicolon, // ; Assign, // = ConstAssign, // := Foreach, // => Return, // -> } /// Tokenizes a string according to Foreach grammar. fn tokenize(source: &str) -> FinalizedLexerResult> { use ForeachToken::{ Assign, CloseBrace, CloseBracket, ConstAssign, Foreach, Identifier, OpenBrace, OpenBracket, Return, Semicolon, }; // Create the reader context let context = SourceCodeScanner::new(source); let mut result = LexerResult::new(); // Iterate as long as more characters exist in the lexer while context.has_next() { let Spanned { span, data: identifier, } = context.capture_str(|| { while let Some(c) = context.peek() { if is_identifier_char(c) { context.skip(); } else { break; } } }); // If span is None, then 0 characters were read; i.e. there is no identifier. if span.is_empty() { // Because there's no identifier here, push a single-character token, if there is one. // Consume a single character either way. let Spanned { span, data } = context.next_span().unwrap(); let token = match data { '[' => OpenBracket, ']' => CloseBracket, '{' => OpenBrace, '}' => CloseBrace, ';' => Semicolon, _ => continue, // Any other character will just be ignored. }; result.push_token(Token::new(token, span)); continue; }; // "//" will be matched as an identifier due to language rules. // If it's found, then skip until the next newline and continue. // Note: Something like "A//" passes this check, this is correct behavior. if identifier.starts_with("//") { context.skip_until('\n'); continue; } // Create a token from the identifier. Some specific identifier are their own tokens. let token = match identifier { "=" => Assign, ":=" => ConstAssign, "=>" => Foreach, "->" => Return, _ => Identifier(identifier), }; // Push the token from above along with the identifier's span. result.push_token(Token::new(token, span)); } // Return the result result.finalize() } /// Returns true if the input is a valid identifier char. /// Valid identifier chars are any non-whitespace that isn't one of the following: `;{}[]`. fn is_identifier_char(x: char) -> bool { !x.is_whitespace() && x != ';' && x != '[' && x != ']' && x != '{' && x != '}' } fn main() { let program = r#" false := []; true := [[]]; // True -> False, False -> True ! inp { v := inp => -> false; -> true; } // True if input array contains only truthy values. && inp { v := inp => _ := ! v => -> false; -> true; } // True if input array contains at least 1 truthy value. || inp v := inp => _ := v => -> true; // True if number of truthy values in input array is odd ^ inp { out = false; v := inp => _ := v => out = ! out; -> out; } "#; let result = tokenize(program); println!("{result:#?}"); }