// Header "src/grammar.rs.pp"; context "LangBuilder"; /*=================================== Tokenizer ===================================*/ // Token types are defined with the token keyword token Keyword; token Identifier; token RegExp; token Literal; token Symbol; // The extract keyword will exclude the token type when parsing the AST extract token Comment; // Ignore whitespaces ignore /\s+/; // Comment Comment /\/\/[^\n]*\n?/; Comment /\/\*([^\*]|(\*[^\/]))*\*\//; // Literal and RegExp, which are surrounded by either "" or // Literal /"((\\.)|[^\\"])*"/; RegExp /\/((\\.)|[^\\\/])*\//; // Keywords Keyword "ignore"; Keyword "extract"; Keyword "token"; Keyword "semantic"; Keyword "rule"; Keyword "optional"; Keyword "context"; // Special Symbols Symbol /[{};|()=,:\.\[\]\+]/; // Identifier is alphanumeric and underscore, but does not start with digit Identifier /[_a-zA-Z]\w*/; /*=================================== Semantics ===================================*/ // Semantic types are used to augment the token types. // For example, the token type "Identifier" can both be a rule name or a variable name // When defining the rules, you can override the semantic type of a token // or an entire subtree. // Semantics semantic Variable; semantic Token; semantic Semantic; semantic Rule; semantic HookName; semantic HookType; semantic ContextType; /*=================================== Rules ===================================*/ // There are 2 types of rules: // Unions: rule A = B | C; // Functional: rule A(...) ; // The first rule is the target of the parser. // The parser can be configured to generate one root node, // or keep generating root nodes until the end of the file. rule TopLevelStatement(body: TopLevelDefine, _: token Symbol";"); rule TopLevelDefine = DefineIncludeStatement | DefineContextStatement | DefineRuleStatement | DefineTokenTypeStatement | DefineIgnoreTokenRuleStatement | DefineTokenRuleStatement | DefineSemanticStatement; rule ("parse_include":"()") DefineIncludeStatement(path: token Literal); rule ("parse_context":"()") DefineContextStatement( _: token Keyword"context", (ContextType) contextType: token Literal ); rule ("parse_rule":"()") DefineRuleStatement( _: token Keyword"rule", hookAttr: optional HookAttribute, (Rule) ruleName: token Identifier, body: RuleDefineBody ); rule ("parse_hook":"Hook") HookAttribute( _: token Symbol"(", (HookName) hookName: token Literal, _: token Symbol":", (HookType) hookType: token Literal, _: token Symbol")" ); rule ("parse_rule_value":"RuleValue") RuleDefineBody = UnionRuleBody | FunctionalRuleBody; rule UnionRuleBody( _: token Symbol"=", (Rule) first: optional token Identifier, rest: optional UnionRuleListTail+ ); rule UnionRuleListTail( _: token Symbol"|", (Rule) r: token Identifier ); rule FunctionalRuleBody( _: token Symbol"(", firstParam: optional Parameter, restParams: optional ParamListTail+, _: token Symbol")" ); rule ParamListTail(_: token Symbol",", p: Parameter); rule ("parse_param":"Param") Parameter( semAttr: optional ParamSemantic, (Variable) variable: token Identifier, _: token Symbol":", type: optional RuleType ); rule ParamSemantic( _: token Symbol"(", (Semantic) semanticName: optional token Identifier, _: token Symbol")" ); rule RuleType( kwOptional: optional token Keyword"optional", kwToken: optional token Keyword"token", id: token Identifier, tokenContent: optional token Literal, isList: optional token Symbol"+" ); rule ("parse_token_def":"()") DefineTokenTypeStatement( kwExtract: optional token Keyword"extract", _: token Keyword"token", (Token) tokenType: token Identifier ); rule ("parse_token_ignore_rule":"()") DefineIgnoreTokenRuleStatement(_: token Keyword"ignore", value: LiteralOrRegExp); rule ("parse_token_rule":"()") DefineTokenRuleStatement((Token) tokenType: token Identifier, value: LiteralOrRegExp); rule LiteralOrRegExp = TokenLiteral | TokenRegExp; rule TokenLiteral(t: token Literal); rule TokenRegExp(t: token RegExp); rule ("parse_semantic":"()") DefineSemanticStatement(_: token Keyword"semantic", (Semantic) id: token Identifier);