//! Here we use conflict resolution to parse mathematical expressions. //! //! The grammar will be : //! ```text //! Start → Statements //! Statements → Statement Statements //! Statements → //! Statement → Function //! Statement → RETURN_KW Expression ; //! Statement → LET_KW IDENT = Expression ; //! Statement → Expression ; //! Function → FN_KW IDENT ( FunctionArgs ) { Statements } //! FunctionArgs → FunctionArg , FunctionArgs //! FunctionArgs → FunctionArg //! FunctionArgs → //! FunctionArg → IDENT //! Expression → INT //! Expression → IDENT //! Expression → Expression ( Args ) //! Expression → Expression + Expression //! Expression → Expression - Expression //! Expression → Expression * Expression //! Expression → Expression / Expression //! Expression → - Expression //! Args → Expression , Args //! Args → Expression //! Args → //! ``` //! With the usual precedence for expressions. /* * Copyright 2022 Arnaud Golfouse * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ use ielr::{ input::{ConflictSolution, ConflictingAction, Grammar, Node, Symbol, Token}, output::Lookahead, }; use std::collections::HashMap; const START: Node = Node(0); const STATEMENTS: Node = Node(1); const STATEMENT: Node = Node(2); const FUNCTION: Node = Node(3); const FUNCTION_ARGS: Node = Node(4); const FUNCTION_ARG: Node = Node(5); const EXPRESSION: Node = Node(6); const ARGS: Node = Node(7); /// workaround for the lack of const unwrap const fn new_token(t: u16) -> Token { match Token::new(t) { Some(t) => t, None => unreachable!(), } } const RETURN_KW: Token = new_token(1); const LET_KW: Token = new_token(2); const IDENT: Token = new_token(3); const SEMICOLON: Token = new_token(4); const EQUAL: Token = new_token(5); const FN_KW: Token = new_token(6); const PARENTHESIS_LEFT: Token = new_token(7); const PARENTHESIS_RIGHT: Token = new_token(8); const BRACE_LEFT: Token = new_token(9); const BRACE_RIGHT: Token = new_token(10); const COMMA: Token = new_token(11); const INT: Token = new_token(12); const PLUS: Token = new_token(13); const MINUS: Token = new_token(14); const STAR: Token = new_token(15); const SLASH: Token = new_token(16); fn main() { use Symbol::{Node as N, Token as T}; let mut grammar = Grammar::new(); // productions that do not need precedence annotations for (lhs, rhs) in [ (START, vec![N(STATEMENTS)]), (STATEMENTS, vec![N(STATEMENT), N(STATEMENTS)]), (STATEMENTS, vec![]), (STATEMENT, vec![N(FUNCTION)]), (STATEMENT, vec![T(RETURN_KW), N(EXPRESSION), T(SEMICOLON)]), ( STATEMENT, vec![T(LET_KW), T(IDENT), T(EQUAL), N(EXPRESSION), T(SEMICOLON)], ), (STATEMENT, vec![N(EXPRESSION), T(SEMICOLON)]), ( FUNCTION, vec![ T(FN_KW), T(IDENT), T(PARENTHESIS_LEFT), N(FUNCTION_ARGS), T(PARENTHESIS_RIGHT), T(BRACE_LEFT), N(STATEMENTS), T(BRACE_RIGHT), ], ), ( FUNCTION_ARGS, vec![N(FUNCTION_ARG), T(COMMA), N(FUNCTION_ARGS)], ), (FUNCTION_ARGS, vec![N(FUNCTION_ARG)]), (FUNCTION_ARGS, vec![]), (FUNCTION_ARG, vec![T(IDENT)]), (EXPRESSION, vec![T(INT)]), (EXPRESSION, vec![T(IDENT)]), (ARGS, vec![N(EXPRESSION), T(COMMA), N(ARGS)]), (ARGS, vec![N(EXPRESSION)]), (ARGS, vec![]), ] { grammar.add_production(lhs, rhs).unwrap(); } let add_prod = grammar .add_production(EXPRESSION, vec![N(EXPRESSION), T(PLUS), N(EXPRESSION)]) .unwrap(); let sub_prod = grammar .add_production(EXPRESSION, vec![N(EXPRESSION), T(MINUS), N(EXPRESSION)]) .unwrap(); let mul_prod = grammar .add_production(EXPRESSION, vec![N(EXPRESSION), T(STAR), N(EXPRESSION)]) .unwrap(); let div_prod = grammar .add_production(EXPRESSION, vec![N(EXPRESSION), T(SLASH), N(EXPRESSION)]) .unwrap(); let neg_prod = grammar .add_production(EXPRESSION, vec![T(MINUS), N(EXPRESSION)]) .unwrap(); // Add conflicts solutions. let operator_to_production = HashMap::from([ (PLUS, add_prod), (MINUS, sub_prod), (STAR, mul_prod), (SLASH, div_prod), ]); // - all binary operators are left-associative here // - '-' has precedence over all binary operators for operator in [PLUS, MINUS, STAR, SLASH] { grammar.add_conflict_solution(ConflictSolution { prefer: ConflictingAction::Reduce(operator_to_production[&operator]), over: ConflictingAction::Shift(Lookahead::Token(operator)), }); grammar.add_conflict_solution(ConflictSolution { prefer: ConflictingAction::Reduce(neg_prod), over: ConflictingAction::Shift(Lookahead::Token(operator)), }); } // operator with the same precedence for (operator1, operator2) in [(PLUS, MINUS), (STAR, SLASH)] { grammar.add_conflict_solution(ConflictSolution { prefer: ConflictingAction::Reduce(operator_to_production[&operator1]), over: ConflictingAction::Shift(Lookahead::Token(operator2)), }); grammar.add_conflict_solution(ConflictSolution { prefer: ConflictingAction::Reduce(operator_to_production[&operator2]), over: ConflictingAction::Shift(Lookahead::Token(operator1)), }); } // operator with the different precedence for (prefer, over) in [(STAR, PLUS), (STAR, MINUS), (SLASH, PLUS), (SLASH, MINUS)] { grammar.add_conflict_solution(ConflictSolution { prefer: ConflictingAction::Shift(Lookahead::Token(prefer)), over: ConflictingAction::Reduce(operator_to_production[&over]), }); grammar.add_conflict_solution(ConflictSolution { prefer: ConflictingAction::Reduce(operator_to_production[&prefer]), over: ConflictingAction::Shift(Lookahead::Token(over)), }); } // Now we build the parsing table ! let (_tables, _statistics) = ielr::compute_table( // We are required to specify LR(1) when using conflict resolution. ielr::Algorithm::Lr(std::num::NonZeroU8::new(1).unwrap()), // We do not care about a maximum number of states &grammar, [START], ) .unwrap(); }