state_machine_parser

Crates.iostate_machine_parser
lib.rsstate_machine_parser
version0.1.0
sourcesrc
created_at2024-09-05 10:11:54.392814
updated_at2024-09-05 10:11:54.392814
descriptionThe parser based on state machine generated by EBNF rules.
homepage
repository
max_upload_size
id1364376
size43,516
Equescript (Equescript)

documentation

README

state_machine_parser

The parser based on state machine generated by EBNF rules.

Usage

[dependencies]
state_machine_parser = "0.1.0"

Quickstart

use std::collections::HashMap;
use state_machine_parser::{compile_bnf_rules, debug_print_match_record, MatchRecord, StateMachineParser, StateManager, Token, TokenType};

const RULE: &str = "
MultiplicationExpression = Number {(OperatorMul | OperatorDiv) Number};
Expression               = MultiplicationExpression {(OperatorAdd | OperatorSub) MultiplicationExpression};
";

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
enum NumericExpressionTokenType {
    Number,
    OperatorAdd,
    OperatorSub,
    OperatorMul,
    OperatorDiv,
}
impl TokenType for NumericExpressionTokenType {}

static mut TOKEN_TYPE_CONVERTER: Option<HashMap<Vec<char>, NumericExpressionTokenType>> = None;

impl TryFrom<Vec<char>> for NumericExpressionTokenType {
    type Error = ();
    fn try_from(value: Vec<char>) -> Result<Self, Self::Error> {
        match unsafe{TOKEN_TYPE_CONVERTER.as_ref().unwrap()}.get(&value) {
            Some(t) => Ok(t.clone()),
            None => Err(())
        }
    }
}

#[derive(Debug)]
struct NumericExpressionToken {
    token_type: NumericExpressionTokenType,
    value: usize,
}

impl NumericExpressionToken {
    fn number(number: usize) -> Self {
        Self { token_type: NumericExpressionTokenType::Number, value: number }
    }
    fn operator(operator: NumericExpressionTokenType) -> Self {
        Self { token_type: operator, value: 0 }
    }
}

impl std::fmt::Display for NumericExpressionToken {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}", self)
    }
}

impl Token<NumericExpressionTokenType> for NumericExpressionToken {
    fn token_type(&self) -> &NumericExpressionTokenType {
        &self.token_type
    }
}

fn main() {
    unsafe {
        TOKEN_TYPE_CONVERTER = Some(HashMap::from([
            ("Number".chars().collect::<Vec<char>>(), NumericExpressionTokenType::Number),
            ("OperatorAdd".chars().collect::<Vec<char>>(), NumericExpressionTokenType::OperatorAdd),
            ("OperatorSub".chars().collect::<Vec<char>>(), NumericExpressionTokenType::OperatorSub),
            ("OperatorMul".chars().collect::<Vec<char>>(), NumericExpressionTokenType::OperatorMul),
            ("OperatorDiv".chars().collect::<Vec<char>>(), NumericExpressionTokenType::OperatorDiv),
        ]));
    }

    let state_manager: StateManager<NumericExpressionTokenType> = compile_bnf_rules(RULE).unwrap();

    let expression: Vec<NumericExpressionToken> = vec![
        NumericExpressionToken::number(1),
        NumericExpressionToken::operator(NumericExpressionTokenType::OperatorMul),
        NumericExpressionToken::number(2),
        NumericExpressionToken::operator(NumericExpressionTokenType::OperatorAdd),
        NumericExpressionToken::number(3),
        NumericExpressionToken::operator(NumericExpressionTokenType::OperatorMul),
        NumericExpressionToken::number(4),
        ];

    let start_rule: usize = *state_manager.rule_ids.get(&"Expression".chars().collect::<Vec<char>>()).unwrap();
    let match_records: Vec<MatchRecord> = StateMachineParser::new(&state_manager).parse(&expression, start_rule).unwrap();
    debug_print_match_record(&expression, &match_records, &state_manager.rule_names);
}

Run the above code and you can get the output:

{ Expression
    { MultiplicationExpression
        NumericExpressionToken { token_type: Number, value: 1 }
        NumericExpressionToken { token_type: OperatorMul, value: 0 }
        NumericExpressionToken { token_type: Number, value: 2 }
    } MultiplicationExpression
    NumericExpressionToken { token_type: OperatorAdd, value: 0 }
    { MultiplicationExpression
        NumericExpressionToken { token_type: Number, value: 3 }
        NumericExpressionToken { token_type: OperatorMul, value: 0 }
        NumericExpressionToken { token_type: Number, value: 4 }
    } MultiplicationExpression
} Expression

EBNF

Each EBNF rule has four parts: a left-hand side, a right-hand side, the "=" character separating these two sides and the ";" character marking the end of rule. The left-hand side is the name of the rule and the right-hand side is the description of the rule. The four description forms is explained below.

Form Semantic
Sequence Items appear left–to–right, their order in important.
Choice Alternative items are enclosed between "(" and ")" (parenthesis) and separated by a "|" (stroke), one item is chosen from this list of alternatives, their order is unimportant.
Option The optional item is enclosed between "[" and "]" (square–brackets), the item can be either included or discarded.
Repetition The repeatable item is enclosed between "{" and "}" (curly–braces), the item can be repeated zero or more times.
Commit count: 0

cargo fmt