//! This test just tokenizes Stefan Gustavson's simplex noise java implementation. use alkale::{ common::numeric::ParseNumberResult, format_notification, map_double_char_tokens, map_single_char_token, notification::NotificationSeverity, span::Spanned, token::Token, FinalizedLexerResult, LexerResult, SourceCodeScanner, }; use criterion::{criterion_group, Criterion}; #[expect(dead_code)] pub enum JavaToken<'a> { // Keywords Abstract, Boolean, Byte, Break, Class, Case, Catch, Char, Continue, Default, Do, Double, Else, Extends, Final, Finally, Float, For, If, Implements, Import, Instanceof, Int, Interface, Long, Native, New, Package, Private, Protected, Public, Return, Short, Static, Super, Switch, Synchronized, This, Throw, Throws, Transient, Try, Void, Volatile, While, Assert, Const, Enum, Goto, Strictfp, // Nonterminals Identifier(&'a str), IntValue(u64), FloatValue(f64), CharValue(char), StringValue(String), BooleanValue(bool), // Operators Plus, Minus, Slash, Asterisk, Percent, DoublePlus, DoubleMinus, Exclamation, Equals, PlusEquals, MinusEquals, AsteriskEquals, SlashEquals, PercentEquals, CaretEquals, DoubleEquals, ExclaimEquals, LessThan, GreaterThan, LessThanEqual, GreaterThanEqual, DoubleAmpersand, DoubleVerticalBar, Question, Colon, Ampersand, VerticalBar, Caret, Tilde, DoubleLessThan, DoubleGreaterThan, TripleGreaterThan, // Punctuation OpenBracket, CloseBracket, OpenBrace, CloseBrace, OpenParen, CloseParen, Comma, Period, Semicolon, } // We can assume no errors can occur, even if they do it's not a huge deal // for this benchmark. pub fn lexer() -> FinalizedLexerResult> { let program = include_str!("SimplexNoise.java"); let ctx = SourceCodeScanner::new(program); let mut lexer_result = LexerResult::new(); while ctx.has_next() { use JavaToken::{ Abstract, Ampersand, Assert, Asterisk, AsteriskEquals, Boolean, BooleanValue, Break, Byte, Caret, CaretEquals, Case, Catch, Char, CharValue, Class, CloseBrace, CloseBracket, CloseParen, Colon, Comma, Const, Continue, Default, Do, Double, DoubleAmpersand, DoubleEquals, DoubleGreaterThan, DoubleLessThan, DoubleMinus, DoublePlus, DoubleVerticalBar, Else, Enum, Equals, ExclaimEquals, Exclamation, Extends, Final, Finally, Float, FloatValue, For, Goto, GreaterThan, GreaterThanEqual, Identifier, If, Implements, Import, Instanceof, Int, IntValue, Interface, LessThan, LessThanEqual, Long, Minus, MinusEquals, Native, New, OpenBrace, OpenBracket, OpenParen, Package, Percent, PercentEquals, Period, Plus, PlusEquals, Private, Protected, Public, Question, Return, Semicolon, Short, Slash, SlashEquals, Static, Strictfp, StringValue, Super, Switch, Synchronized, This, Throw, Throws, Tilde, Transient, TripleGreaterThan, Try, VerticalBar, Void, Volatile, While, }; // Parse out single char tokens, we can't do tokens like `+` because they may // be followed by a `=`, which makes them a different token. map_single_char_token!(&ctx, &mut lexer_result, '[' => OpenBracket, ']' => CloseBracket, '{' => OpenBrace, '}' => CloseBrace, '(' => OpenParen, ')' => CloseParen, ',' => Comma, '.' => Period, ';' => Semicolon, '~' => Tilde, '?' => Question, ':' => Colon, ); // Parse out one-to-two character tokens. map_double_char_tokens!(&ctx, &mut lexer_result, '+' => { '+' => DoublePlus, '=' => PlusEquals, _ => Plus, }, '-' => { '-' => DoubleMinus, '=' => MinusEquals, _ => Minus, }, '*' => { '=' => AsteriskEquals, _ => Asterisk, }, '/' => { '/' => { ctx.skip_line(); continue; }, '*' => { loop { ctx.skip_until('*'); ctx.skip(); match ctx.next() { None => break, Some('/') => break, _ => () } } continue; }, '=' => SlashEquals, _ => Slash, }, '%' => { '=' => PercentEquals, _ => Percent, }, '^' => { '=' => CaretEquals, _ => Caret, }, '!' => { '=' => ExclaimEquals, _ => Exclamation, }, '&' => { '&' => DoubleAmpersand, _ => Ampersand, }, '|' => { '|' => DoubleVerticalBar, _ => VerticalBar, }, '<' => { '=' => LessThanEqual, '<' => DoubleLessThan, _ => LessThan, }, '=' => { '=' => DoubleEquals, _ => Equals, }, ); // Special case: >, >=, >>, >>>. if ctx.peek() == Some('>') { let span = ctx.span(); ctx.skip(); let data = match ctx.peek() { Some('=') => { ctx.skip(); GreaterThanEqual } Some('>') => { ctx.skip(); if ctx.peek() == Some('>') { ctx.skip(); TripleGreaterThan } else { DoubleGreaterThan } } _ => GreaterThan, }; let final_span = ctx.span(); lexer_result.push_token(Token::new(data, span.up_to(&final_span))); continue; } // Parse out identifiers and keywords if let Some(Spanned { span, data }) = ctx.try_consume_identifier(first_ident_char, rest_ident_char) { let data = match data { "abstract" => Abstract, "boolean" => Boolean, "byte" => Byte, "break" => Break, "class" => Class, "case" => Case, "catch" => Catch, "char" => Char, "continue" => Continue, "default" => Default, "do" => Do, "double" => Double, "else" => Else, "extends" => Extends, "final" => Final, "finally" => Finally, "float" => Float, "for" => For, "if" => If, "implements" => Implements, "import" => Import, "instanceof" => Instanceof, "int" => Int, "interface" => Interface, "long" => Long, "native" => Native, "new" => New, "package" => Package, "private" => Private, "protected" => Protected, "public" => Public, "return" => Return, "short" => Short, "static" => Static, "super" => Super, "switch" => Switch, "synchronized" => Synchronized, "this" => This, "throw" => Throw, "throws" => Throws, "transient" => Transient, "try" => Try, "void" => Void, "volatile" => Volatile, "while" => While, "assert" => Assert, "const" => Const, "enum" => Enum, "goto" => Goto, "strictfp" => Strictfp, "true" => BooleanValue(true), "false" => BooleanValue(false), _ => Identifier(data), }; lexer_result.push_token(Token::new(data, span)); continue; } // Numbers if let Some(Spanned { span, data: result }) = ctx.try_parse_number::() { match result { ParseNumberResult::Integer(Ok(number)) => { lexer_result.push_token(Token::new(IntValue(number), span)); } ParseNumberResult::Float(Ok(number)) => { lexer_result.push_token(Token::new(FloatValue(number), span)); } ParseNumberResult::Integer(Err(err)) => { format_notification!("Error creating number token: {:?}", err) .span(span) .severity(NotificationSeverity::Error) .report(&mut lexer_result); } ParseNumberResult::Float(Err(err)) => { format_notification!("Error creating number token: {:?}", err) .span(span) .severity(NotificationSeverity::Error) .report(&mut lexer_result); } } continue; } // Chars if let Some(Spanned { span, data: result }) = ctx.try_parse_character_token() { match result { Ok(char) => { lexer_result.push_token(Token::new(CharValue(char), span)); } Err(err) => { format_notification!("Error creating character: {:?}", err) .span(span) .severity(NotificationSeverity::Error) .report(&mut lexer_result); } } continue; } // Strings if let Some(Spanned { span, data: result }) = ctx.try_parse_strict_string() { match result { Ok(str) => { lexer_result.push_token(Token::new(StringValue(str), span)); } Err(errs) => { for err in errs { format_notification!("Error creating string: {:?}", err) .span(span) .severity(NotificationSeverity::Error) .report(&mut lexer_result); } } } continue; } if let Some(last) = ctx.next_span() { if !last.is_whitespace() { format_notification!("Unexpected character '{}'", last.data) .span(last.span) .severity(NotificationSeverity::Error) .report(&mut lexer_result); } } } lexer_result.finalize() } const fn first_ident_char(char: char) -> bool { matches!(char, 'a'..='z' | 'A'..='Z' | '$' | '_') } fn rest_ident_char(char: char) -> bool { first_ident_char(char) || char.is_ascii_digit() } fn bench(criterion: &mut Criterion) { criterion.bench_function("simplex", |x| x.iter(lexer)); } criterion_group!(benches, bench);