// the constant contains the order of precedence. // the lower the value, higher the precedence. const PRECEDENCE = { // A TERM has the highest precedence in Perl. // They include variables, quote and quote-like operators, any expression in parentheses, // and any function whose arguments are parenthesized TERM: 1, ARROW_OPERATOR: 2, COMMENTS: 3, // comments over anything. Except in strings or regex. // begin of operators AUTO_INCREMENT_DECREMENT: 10, EXPONENTIATION: 11, SYMBOLIC_UNARY: 12, BINDING_OPERATORS: 13, BODMAS_1: 14, BODMAS_2: 15, SHIFT_OPERATORS: 16, RELATIONAL_OPERATORS: 17, EQUALITY_OPERATORS: 18, ISA_OPERATOR: 19, BITWISE_AND: 20, BITWISE_OR_XOR: 21, LOGICAL_AND: 22, LOGICAL_ORS: 23, RANGE_OPERATOR: 24, TERNARY_OPERATOR: 25, ASSIGNMENT_OPERATORS: 26, COMMA_OPERATORS: 27, UNARY_NOT: 28, UNARY_AND: 29, OR_XOR: 30, // end of operators HASH: 31, ARRAY: 32, ESCAPE_SEQ: 33, LOWEST: 100 }; /// // @ts-check module.exports = grammar({ name: 'perl', inline: $ => [ $.semi_colon, ], conflicts: $ => [ [$._auto_increment_decrement], [$._range_exp], [$._class_instance_exp], [$.package_name], [$.hash_ref], [$._block_statements], // they are the same, but we want a different token for each [$.parenthesized_argument, $.array], [$.parenthesized_expression, $.array], [$.function_prototype, $.array], ], externals: $ => [ $._scalar_variable_external, $._start_delimiter, $._end_delimiter, $._string_content, $._string_single_quoted_content, $._string_qq_quoted_content, $._string_double_quoted_content, $._start_delimiter_qw, $._element_in_qw, $._end_delimiter_qw, $._start_delimiter_regex, $._regex_pattern, // supports interpolation $._end_delimiter_regex, $._start_delimiter_search_replace, $._search_replace_content, $._separator_delimiter_search_replace, $._end_delimiter_search_replace, $._start_delimiter_transliteration, $._transliteration_content, $._separator_delimiter_transliteration, $._end_delimiter_transliteration, // heredocs $._imaginary_heredoc_start, $.heredoc_start_identifier, $._heredoc_content, $.heredoc_end_identifier, // end of heredocs $._pod_content, $._automatic_semicolon, ], extras: $ => [ $.comments, /[\s\uFEFF\u2060\u200B\u00A0]/, ], precedences: $ => [ ], word: $ => $.identifier, rules: { source_file: $ => repeat($._statement), _statement: $ => prec(PRECEDENCE.LOWEST, choice( $.package_statement, $.use_no_statement, $.use_no_if_statement, $.bareword_import, $.use_no_subs_statement, $.use_no_feature_statement, $.use_no_version, $.require_statement, $.use_constant_statement, $.use_parent_statement, $._expression_statement, $._declaration, $.single_line_statement, $._compound_statement, $.standalone_block, $.named_block_statement, $.ellipsis_statement, $.special_block, $.special_literal, $.heredoc_body_statement, $.pod_statement, )), // pseudocode // ------------ // have start identifier as external. then parse till end of line // then \n, then start hereodc body. heredoc_initializer: $ => prec(PRECEDENCE.TERM, seq( $._heredoc_operator, $.heredoc_start_identifier, )), _heredoc_operator: $ => '<<', heredoc_body_statement: $ => prec(PRECEDENCE.TERM, seq( $._imaginary_heredoc_start, // just to track between initializer and body start repeat(choice( $.interpolation, $.escape_sequence, $._heredoc_content, )), $.heredoc_end_identifier )), pod_statement: $ => prec(PRECEDENCE.COMMENTS, seq( /=[\w]+/, $._pod_content, )), special_literal: $ => prec(PRECEDENCE.LOWEST, choice( '__FILE__', '__LINE__', '__PACKAGE__', '__SUB__', '__END__', '__DATA__', )), use_parent_statement: $ => seq( 'use', 'parent', optional(seq($.no_require, ',')), choice( commaSeparated($._string), $.word_list_qw, ), $.semi_colon, ), use_constant_statement: $ => seq( 'use', 'constant', choice( seq( field('constant', choice($.identifier, $._string)), choice('=>', ','), field('value', $._expression) ), field('value', $.hash_ref), ), $.semi_colon, ), special_block: $ => seq( optional('sub'), // but this is often frowned upon choice( 'BEGIN', 'UNITCHECK', 'CHECK', 'INIT', 'END', ), field('body', $.block), ), package_statement: $ => seq( 'package', $.package_name, optional(field('version', $.version)), choice( $.semi_colon, field('body', $.block) ) ), ellipsis_statement: $ => seq( '...', optional($.semi_colon), ), use_no_version: $ => seq( choice( field('use', 'use'), field('no', 'no'), ), field('version', $.version), $.semi_colon, ), use_no_feature_statement: $ => seq( choice( field('use', 'use'), field('no', 'no'), ), 'feature', optional(choice($._list, $._string)), $.semi_colon, ), _expression_or_return_expression: $ => choice( $._expression, $.return_expression, ), // aka _statement_modifiers_expression single_line_statement: $ => prec.right(seq( $._expression_or_return_expression, $._statement_modifiers, )), _statement_modifiers: $ => choice( $.if_simple_statement, $.unless_simple_statement, $.while_simple_statement, $.until_simple_statement, $.for_simple_statement, $.when_simple_statement, ), _compound_statement: $ => choice( // conditional statements $.if_statement, $.unless_statement, // $.given_statement, // loops $.while_statement, $.until_statement, $.for_statement_1, $.for_statement_2, ), _expression_statement: $ => seq( $._expression, $.semi_colon, ), use_no_statement: $ => seq( choice( field('use', 'use'), field('no', 'no'), ), choice($.package_name, $.module_name), optional($.version), optional(choice($._list, $._string)), $.semi_colon, ), use_no_if_statement: $ => seq( choice( field('use', 'use'), field('no', 'no'), ), seq( $._if_simple, ',', ), choice($.package_name, $.module_name, $._string), optional($.version), optional($._comma_operator), optional(choice($._list, $._string)), $.semi_colon, ), // Module->import( LIST ); bareword_import: $ => seq( field('module', $.identifier), '->', 'import', $._list, $.semi_colon, ), use_no_subs_statement: $ => seq( choice( field('use', 'use'), field('no', 'no'), ), 'subs', $._list, $.semi_colon, ), require_statement: $ => seq( 'require', $.package_name, $.semi_colon, ), if_simple_statement: $ => prec.right(seq( $._if_simple, $.semi_colon, )), _if_simple: $ => prec.right(seq( 'if', field('condition', choice($.parenthesized_expression, $._expression)), )), unless_simple_statement: $ => prec.right(seq( 'unless', field('condition', choice($.parenthesized_expression, $._expression)), $.semi_colon, )), while_simple_statement: $ => prec.right(seq( 'while', field('condition', choice($.parenthesized_expression, $._expression)), $.semi_colon, )), until_simple_statement: $ => prec.right(seq( 'until', field('condition', choice($.parenthesized_expression, $._expression)), $.semi_colon, )), for_simple_statement: $ => prec.right(seq( choice('for', 'foreach'), field('list', with_or_without_brackets($._expression)), $.semi_colon, )), when_simple_statement: $ => prec.right(seq( 'when', field('condition', choice($.parenthesized_expression, $._expression)), $.semi_colon, )), // TODO: should be a boolean expression and not the current one? if_statement: $ => prec.left(seq( 'if', field('condition', $.parenthesized_expression), field('consequence', $.block), repeat(field('alternative', $.elsif_clause)), optional(field('alternative', $.else_clause)), )), unless_statement: $ => prec.left(seq( 'unless', field('condition', $.parenthesized_expression), field('consequence', $.block), repeat(field('alternative', $.elsif_clause)), optional(field('alternative', $.else_clause)), )), elsif_clause: $ => seq( 'elsif', field('condition', $.parenthesized_expression), field('alternative_if_consequence', $.block), ), else_clause: $ => seq( 'else', field('alternative', $.block), ), // given_statement: $ => seq( // 'given', // '(', // field('value', choice($.scalar_variable, $._scalar_type)), // ')', // field('body', $.given_body), // ), // given_body: $ => seq( // 'when', // $.parenthesized_expression, // $.block, // ), while_statement: $ => seq( optional(seq(field('label', $.identifier), ':')), 'while', field('condition', $.parenthesized_expression), field('body', $.block), optional(field('flow', $.continue)), ), continue: $ => seq( 'continue', field('body', $.block), ), until_statement: $ => seq( optional(seq(field('label', $.identifier), ':')), 'until', field('condition', $.parenthesized_expression), field('body', $.block), optional(field('flow', $.continue)), ), // the C - style for loop for_statement_1: $ => seq( optional(seq(field('label', $.identifier), ':')), choice('for', 'foreach'), $._for_parenthesize, field('body', $.block), ), for_statement_2: $ => seq( optional(seq(field('label', $.identifier), ':')), choice('for', 'foreach'), optional(choice( seq(optional($.scope), $.scalar_variable), seq('\\', optional($.scope), $.hash_variable), // \my %hash )), $.array, field('body', $.block), optional(field('flow', $.continue)), ), _for_parenthesize: $ => choice( seq( '(', field('initializer', $._expression), $.semi_colon, field('condition', $._expression), $.semi_colon, field('incrementor', $._expression), ')' ), seq( '(', $.semi_colon, $.semi_colon, ')' ) ), _declaration: $ => choice( $.function_definition, // moving variable_declaration to expressioin ), variable_declaration: $ => prec.left(seq( $.scope, // multi declaration // or single declaration without brackets choice( $.multi_var_declaration, field('name', $._variables), ), optional($._initializer), )), multi_var_declaration: $ => seq( '(', commaSeparated(field('name', $._variables)), ')', ), _initializer: $ => seq( '=', field('value', $._expression), ), scope: $ => choice( 'our', 'state', 'my', 'local', ), // why perl, why! function_definition: $ => prec.left(seq( optional($.scope), 'sub', field('name', $.identifier), choice( seq( optional($.function_prototype), optional($.function_attribute), choice( $.semi_colon, field('body', $.block), ), ), seq( $.function_prototype, optional($._function_signature), choice( $.semi_colon, field('body', $.block), ), ), seq( optional($.function_prototype), $._function_signature, choice( $.semi_colon, field('body', $.block), ), ), seq( ':', 'prototype', $.function_prototype, $._function_signature, choice( $.semi_colon, field('body', $.block), ), ), ) )), anonymous_function: $ => seq( 'sub', $.block, ), block: $ => prec(PRECEDENCE.TERM, seq( '{', optional(repeat($._block_statements)), '}' )), _function_signature: $ => alias($.array, $.function_signature), function_prototype: $ => prec(PRECEDENCE.TERM, seq( '(', optional($.prototype), ')', )), prototype: $ => /[&$@%;*\[\]\\]+/, // (\[$@%;&*]) // sub test2 : Path('/') Args(0) {} // colon and space are separators // basically they are :call_expressions() function_attribute: $ => seq( ':', repeat1( seq( $.identifier, optional($._function_signature), ) ) ), standalone_block: $ => prec(PRECEDENCE.TERM, seq( optional( seq(field('label', $.identifier), ':'), ), '{', optional(repeat($._block_statements)), '}', optional(field('flow', $.continue)), )), _block_statements: $ => choice( $._statement, seq($.return_expression, $.semi_colon), $.loop_control_statement, ), loop_control_statement: $ => seq( $.loop_control_keyword, optional(field('label', $.identifier)), choice( $._statement_modifiers, $.semi_colon ), ), // there are function calls to be precise // see - https://stackoverflow.com/questions/24526885/does-anyone-know-how-to-understand-such-kind-of-perl-code-blocks named_block_statement: $ => prec(PRECEDENCE.TERM, seq( repeat1(seq( field('function_name', $.identifier), '{', repeat(choice( $._statement, seq($.return_expression, $.semi_colon), )), '}' )), $.semi_colon, )), parenthesized_expression: $ => prec(PRECEDENCE.TERM, seq( '(', optional(commaSeparated(prec.left($._expression))), ')' )), // TODO: do this // parenthesized_condition: $ => seq( // '(', // ')' // ), // TODO: return hello => 'dsfs' && meow => 'dsf'; return_expression: $ => seq( 'return', optional($._expression), ), _expression: $ => with_or_without_brackets(choice( $._expression_without_bareword, $.call_expression_with_bareword, )), // TODO: change this to _expression_without_bareword // NOTE: just a hack to handle identifier vs subroutine call _expression_without_bareword: $ => with_or_without_brackets(choice( $._primitive_expression, $._string, $._variables, $.special_scalar_variable, $._dereference, $.package_variable, $.binary_expression, $.unary_expression, $.ternary_expression, $.call_expression, $.call_expression_recursive, $.method_invocation, $.goto_expression, // quote-like operators $.command_qx_quoted, $.backtick_quoted, $.patter_matcher_m, $.regex_pattern_qr, $.substitution_pattern_s, $.transliteration_tr_or_y, $.heredoc_initializer, $.pattern_matcher, $._i_o_operator, $.anonymous_function, // object oriented stuffs $.bless, // $.grep_or_map_function, // $.join_function, // $.sort_function, // $.unpack_function, // $.push_function, $.array_function, $.variable_declaration, $.key_value_pair, )), array_function: $ => prec(PRECEDENCE.TERM, seq( alias($.identifier, $.function_name), $.block, commaSeparated($._expression), )), // TODO: the output tree is wrong for this. fix it. package_variable: $ => seq( alias(seq( $.scalar_variable, token.immediate('::'), repeat(seq($.identifier, '::')), ), $.package_name), alias($.identifier, $.scalar_variable), ), push_function: $ => prec.right(PRECEDENCE.TERM, seq( alias('push', $.push), with_or_without_brackets(commaSeparated($._expression)), )), grep_or_map_function: $ => prec.right(PRECEDENCE.TERM, seq( choice( alias('grep', $.grep), alias('map', $.map), ), choice( seq($.list_block, $._expression), with_or_without_brackets(commaSeparated($._expression)), ), )), join_function: $ => prec.right(PRECEDENCE.TERM, seq( choice( alias('join', $.join), ), with_or_without_brackets(commaSeparated($._expression)), )), reverse_function: $ => prec.right(PRECEDENCE.TERM, seq( alias('reverse', $.reverse), optional(with_or_without_brackets(commaSeparated($._expression))), )), sort_function: $ => prec.right(PRECEDENCE.TERM, seq( alias('sort', $.sort), choice( $._expression, seq($.list_block, $._expression), seq($.call_expression, $._expression), ), )), unpack_function: $ => prec.right(PRECEDENCE.TERM, seq( alias('unpack', $.alias), with_or_without_brackets(commaSeparated($._expression)), )), // TODO: this needs more cases coverage list_block: $ => seq( '{', repeat1(choice($._statement, $._expression_without_bareword)), '}' ), bless: $ => prec.right(seq( 'bless', with_or_without_brackets( seq( field('self', $._reference), optional(seq( ',', // comma separated field('class', $._expression), )), ), ), )), goto_expression: $ => prec.right(PRECEDENCE.ASSIGNMENT_OPERATORS, seq( 'goto', choice( seq(field('label', $.identifier), ':'), field('expression', $._expression), field('subroutine', $.call_expression), ), )), // begin of operators binary_expression: $ => choice( $._exponentiation, $._binding_expression, $._bodmas_1, $._bodmas_2, $._shift_expression, $._relational_expression, $._equality_expression, $._class_instance_exp, $._bitwise_and_exp, $._bitwise_or_xor_exp, $._logical_and_exp, $._logical_ors_exp, $._range_exp, $._assignment_exp, $._logical_verbal_or_xor, ), unary_expression: $ => choice( $._auto_increment_decrement, $._symbolic_unary, // TODO: named_unary_expression $._unary_not, $._unary_and, ), ternary_expression: $ => prec.right(PRECEDENCE.TERNARY_OPERATOR, seq( field('condition', $._expression), field('operator', '?'), field('true', $._expression), field('operator', ':'), field('false', $._expression), )), // no associativity // auto increment and auto decrement _auto_increment_decrement: $ => prec(PRECEDENCE.AUTO_INCREMENT_DECREMENT, choice( seq( field('operator', choice('++', '--')), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', choice('++', '--')), ), )), // It binds even more tightly than unary minus, so -2**4 is -(2**4), not (-2)**4 _exponentiation: $ => prec.right(PRECEDENCE.EXPONENTIATION, seq( field('variable', $._expression), field('operator', '**'), field('variable', $._expression), )), _symbolic_unary: $ => prec.right(PRECEDENCE.SYMBOLIC_UNARY, choice( seq( field('operator', '!'), field('variable', $._expression), ), seq( field('operator', '~'), field('variable', $._expression), ), $.to_reference, seq( field('operator', '+'), field('variable', $._expression), ), seq( field('operator', '-'), field('variable', $._expression), ), )), _binding_expression: $ => prec.left(PRECEDENCE.BINDING_OPERATORS, choice( seq( field('variable', $._expression), field('operator', '=~'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '!~'), field('variable', $._expression), ), )), _bodmas_1: $ => prec.left(PRECEDENCE.BODMAS_1, choice( seq( field('variable', $._expression), field('operator', '*'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '/'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '%'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', 'x'), field('variable', $._expression), ), )), _bodmas_2: $ => prec.left(PRECEDENCE.BODMAS_2, choice( seq( field('variable', $._expression), field('operator', '+'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '-'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '.'), field('variable', $._expression), ), )), _shift_expression: $ => prec.left(PRECEDENCE.SHIFT_OPERATORS, choice( seq( field('variable', $._expression_without_bareword), field('operator', '<<'), field('variable', $._expression_without_bareword), ), seq( field('variable', $._expression_without_bareword), field('operator', '>>'), field('variable', $._expression_without_bareword), ), )), // has chaining. example: $a > $b > $c _relational_expression: $ => prec.left(PRECEDENCE.RELATIONAL_OPERATORS, seq( field('variable', $._expression), repeat1(seq( choice( '<', '>', '<=', '>=', 'lt', 'gt', 'le', 'ge', ), $._expression, )) )), // first few has chaining _equality_expression: $ => prec.left(PRECEDENCE.EQUALITY_OPERATORS, choice( seq( field('variable', $._expression), repeat1(seq( choice( '==', '!=', 'eq', 'ne', ), $._expression, )) ), seq( field('variable', $._expression), field('operator', '<=>'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', 'cmp'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '~~'), field('variable', $._expression), ), )), _class_instance_exp: $ => prec(PRECEDENCE.ISA_OPERATOR, seq( field('variable', $._expression), field('operator', 'isa'), field('variable', $._expression), )), _bitwise_and_exp: $ => prec.left(PRECEDENCE.BITWISE_AND, seq( field('variable', $._expression), field('operator', '&'), field('variable', $._expression), )), _bitwise_or_xor_exp: $ => prec.left(PRECEDENCE.BITWISE_OR_XOR, choice( seq( field('variable', $._expression), field('operator', '|'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '^'), field('variable', $._expression), ), )), _logical_and_exp: $ => prec.left(PRECEDENCE.LOGICAL_AND, seq( field('variable', $._expression), field('operator', '&&'), field('variable', $._expression), )), _logical_ors_exp: $ => prec.left(PRECEDENCE.LOGICAL_ORS, choice( seq( field('variable', $._expression), field('operator', '||'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '//'), field('variable', $._expression), ), )), _range_exp: $ => prec(PRECEDENCE.RANGE_OPERATOR, choice( seq( field('variable', $._expression), field('operator', '..'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', '...'), field('variable', $._expression), ), )), // **= += *= &= &.= <<= &&= // -= /= |= |.= >>= ||= // .= %= ^= ^.= //= // x= _assignment_exp: $ => prec.right(PRECEDENCE.ASSIGNMENT_OPERATORS, choice( ...[ '=', '**=', '+=', '*=', '&=', '&.=', '<<=', '&&=', '-=', '/=', '|=', '|.=', '>>=', '||=', '.=', '%=', '^=', '^.=', '//=', 'X=', ].map((operator) => seq( field('variable', $._expression), field('operator', operator), field('variable', $._expression), ), ) )), _comma_operator: $ => choice( ',', '=>', ), _unary_not: $ => prec.right(PRECEDENCE.UNARY_NOT, seq( field('operator', 'not'), field('variable', $._expression), )), _unary_and: $ => prec.left(PRECEDENCE.UNARY_AND, seq( field('operator', 'and'), field('variable', $._expression), )), _logical_verbal_or_xor: $ => prec.left(PRECEDENCE.OR_XOR, choice( seq( field('variable', $._expression), field('operator', 'or'), field('variable', $._expression), ), seq( field('variable', $._expression), field('operator', 'xor'), field('variable', $._expression), ) )), // end of operators _i_o_operator: $ => choice( $.standard_input, $.file_handle, $.standard_input_to_identifier, $.standard_input_to_variable, ), standard_input: $ => choice( /<>/, /<<>>/, //, /\\\*STDIN/, // a reference to the STDIN ), file_handle: $ => //, standard_input_to_identifier: $ => seq( '<', $.identifier, token.immediate('>'), ), standard_input_to_variable: $ => seq( '<', $.scalar_variable, token.immediate('>'), ), call_expression: $ => prec.left(PRECEDENCE.TERM, seq( optional(token.immediate('&')), optional(seq( field('package_name', $.package_name), token.immediate('::'), )), field('function_name', $.identifier), field('args', choice($.parenthesized_argument, $.arguments)), )), call_expression_with_bareword: $ => prec.left(seq( optional(token.immediate('&')), optional(seq( field('package_name', $.package_name), token.immediate('::'), )), field('function_name', $.identifier), )), method_invocation: $ => prec.left(PRECEDENCE.TERM, seq( choice( field('package_name', choice($.identifier, $.package_name, $.string_single_quoted)), field('object', $.scalar_variable), field('object_return_value', $._expression), ), prec.right(repeat1( seq( $.arrow_operator, choice( seq( optional(seq($.super, token.immediate('::'))), field('function_name', $.identifier) ), $.special_scalar_variable, $.scalar_variable, $.scalar_dereference, ), // anything with bracket is higher precedence. // so args without brackets is lower precedence. optional(field('args', choice($.parenthesized_argument, $.arguments))), // TODO: make this optional and fix errors ), ), ))), parenthesized_argument: $ => prec(PRECEDENCE.TERM, seq( '(', optional($.arguments), ')', )), arguments: $ => prec.left(PRECEDENCE.COMMA_OPERATORS, commaSeparated($.argument), ), argument: $ => prec.left(PRECEDENCE.LOWEST, $._expression, ), call_expression_recursive: $ => seq( '__SUB__', field('operator', '->'), $.parenthesized_argument, ), _primitive_expression: $ => choice( // data-types $._scalar_type, $._boolean, $._array_type, ), _variables: $ => choice( $.scalar_variable, $.array_variable, $.hash_variable, ), _scalar_type: $ => choice( $.string_single_quoted, $.string_q_quoted, // TODO: handle escape sequences $.string_double_quoted, $.string_qq_quoted, $._numeric_literals, $.array_ref, $.hash_ref, $.array_access_variable, $.hash_access_variable, ), // the strings _string: $ => prec.left(PRECEDENCE.TERM, choice( $.string_single_quoted, $.string_q_quoted, $.string_double_quoted, $.string_qq_quoted, $.heredoc_initializer, )), _resolves_to_digit: $ => choice( $.string_single_quoted, $.string_q_quoted, // TODO: handle escape sequences $.string_qq_quoted, $._numeric_literals, ), _array_type: $ => choice( $.array, $.word_list_qw, ), _numeric_literals: $ => choice( $.integer, $.floating_point, $.scientific_notation, $.hexadecimal, $.octal, ), integer: $ => /-?\d+/, floating_point: $ => /-?\d+\.\d+/, // copied shamelessly from https://stackoverflow.com/questions/638565/parsing-scientific-notation-sensibly scientific_notation: $ => /[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?/, hexadecimal: $ => /0[xX][0-9a-fA-F]+/, octal: $ => /0[1-7][0-7]*/, version: $ => choice( /v[\d.]+/, // v5.24.1 /[\d.]+/, // 5.24.1 /[\d._]+/, // 5.024_001 older syntax compatible with perl 5.6 ), identifier: $ => /[a-zA-Z0-9_]+/, // bareword: $ => /[a-zA-Z0-9_$]+/, // any characters or just bareword(s) and variables identifier_2: $ => /[a-zA-Z0-9_$:\.@%\^]+/, // TODO: this should be operators. Check. loop_control_keyword: $ => prec(PRECEDENCE.ASSIGNMENT_OPERATORS, choice( 'next', 'last', 'redo', )), package_name: $ => choice( $.identifier, seq( $.identifier, repeat1(seq( token.immediate('::'), $.identifier, )), ), // /[A-Z_a-z][0-9A-Z_a-z]*(?:::[0-9A-Z_a-z]+)*/, // /\$[0-9A-Z_a-z]*(?:::[0-9A-Z_a-z]+)*/, // TODO fix this // /\*[0-9A-Z_a-z]*(?:::[0-9A-Z_a-z]+)*/, // type glob stuff // TODO: put in other package name structures ), module_name: $ => choice( seq('\'', /.*pm/, '\''), seq('\"', /.*pm/, '\"'), ), semi_colon: $ => choice(';', $._automatic_semicolon), string_single_quoted: $ => prec(PRECEDENCE.TERM, seq( "'", repeat($._string_single_quoted_content), "'", )), // TODO change all + to * in regex // NOTE/TODO: // we are currently only supporting {, /, (, \ as delimiters // in future release should use external scanners for delimiters string_q_quoted: $ => prec(PRECEDENCE.TERM, seq( 'q', choice( seq('{', token(prec(PRECEDENCE.TERM, /[^}]+/)), '}'), seq('/', token(prec(PRECEDENCE.TERM, /[^/]+/)), '/'), seq('(', token(prec(PRECEDENCE.TERM, /[^)]+/)), ')'), seq('\'', token(prec(PRECEDENCE.TERM, /[^']+/)), '\''), ), )), string_double_quoted: $ => prec(PRECEDENCE.TERM, seq( '"', repeat(choice($.interpolation, $.escape_sequence, $._string_double_quoted_content)), '"', )), string_qq_quoted: $ => prec(PRECEDENCE.TERM, seq( 'qq', alias($._start_delimiter, $.start_delimiter), repeat(choice($._string_qq_quoted_content, $.interpolation, $.escape_sequence)), alias($._end_delimiter, $.end_delimiter), )), command_qx_quoted: $ => prec(PRECEDENCE.TERM, seq( 'qx', choice( $.string_single_quoted, // TODO: this is not working seq( alias($._start_delimiter, $.start_delimiter), repeat(choice($._string_qq_quoted_content, $.interpolation, $.escape_sequence)), alias($._end_delimiter, $.end_delimiter), ), ), )), // same as command_qx_quoted backtick_quoted: $ => prec(PRECEDENCE.TERM, seq( '`', repeat(choice($.interpolation, $.escape_sequence, token(/[^`]+/))), '`', )), word_list_qw: $ => prec(PRECEDENCE.TERM, seq( 'qw', alias($._start_delimiter_qw, $.start_delimiter_qw), repeat(alias($._element_in_qw, $.list_item)), alias($._end_delimiter_qw, $.end_delimiter_qw), )), patter_matcher_m: $ => prec(PRECEDENCE.TERM, seq( 'm', // /'.*'/, // don't interpolate for a single quote. TODO: not working alias($._start_delimiter_regex, $.start_delimiter), repeat(choice($._regex_pattern, $.interpolation, $.escape_sequence)), alias($._end_delimiter_regex, $.end_delimiter), optional($.regex_option), )), pattern_matcher: $ => prec(PRECEDENCE.TERM, seq( '/', $.regex_pattern, '/', optional($.regex_option), )), regex_pattern_qr: $ => prec(PRECEDENCE.TERM, seq( 'qr', // /'.*'/, // don't interpolate for a single quote. TODO: not working alias($._start_delimiter_regex, $.start_delimiter), repeat(choice($._regex_pattern, $.interpolation, $.escape_sequence)), alias($._end_delimiter_regex, $.end_delimiter), optional($.regex_option), )), substitution_pattern_s: $ => prec(PRECEDENCE.TERM, seq( 's', alias($._start_delimiter_search_replace, $.start_delimiter), repeat(choice($._search_replace_content, $.interpolation, $.escape_sequence)), alias($._separator_delimiter_search_replace, $.separator_delimiter), repeat(choice($._search_replace_content, $.interpolation, $.escape_sequence)), alias($._end_delimiter_search_replace, $.end_delimiter), field('regex_option', optional($.regex_option_for_substitution)), )), // TODO: revisit this transliteration_tr_or_y: $ => prec(PRECEDENCE.TERM, seq( choice('tr', 'y'), alias($._start_delimiter_transliteration, $.start_delimiter), repeat($._transliteration_content), alias($._separator_delimiter_transliteration, $.separator_delimiter), repeat($._transliteration_content), alias($._end_delimiter_transliteration, $.end_delimiter), field('regex_option', optional($.regex_option_for_transliteration)), )), // shamelessly copied from the tree-sitter-javascript regex_pattern: $ => prec(PRECEDENCE.TERM, repeat1( choice( seq( '[', repeat(choice( seq('\\', /./), // escaped character /[^\]\n\\]/ // any character besides ']' or '\n' )), ']' ), // square-bracket-delimited character class seq('\\', /./), // escaped character /[^/\\\[\n]/, // any character besides '[', '\', '/', '\n' ), )), regex_option: $ => /[msixpodualng]+/, regex_option_for_substitution: $ => /[msixpodualngcer]+/, regex_option_for_transliteration: $ => /[cdsr]+/, // https://perldoc.perl.org/perlop#Quote-and-Quote-like-Operators escape_sequence: $ => prec(PRECEDENCE.ESCAPE_SEQ, seq( '\\', /[tnrfbae]/, )), // escape_character: $ => '\\[.]+', interpolation: $ => choice( prec(PRECEDENCE.TERM, $._variables), $.scalar_dereference, $.special_scalar_variable, $.hash_access_in_interpolation, $.array_access_in_interpolation, // $.function_call_in_interpolation, ), // print "${\(hello())}"; // function_call_in_interpolation: $ => seq( // '$', '{', '\\{', '(', // $.call_expression, // ')', '}' // ), hash_access_in_interpolation: $ => prec(PRECEDENCE.HASH, seq( $.scalar_variable, repeat1(seq( choice( token.immediate('->{'), token.immediate('{'), ), field('hash_key', choice($.identifier, $.scalar_variable, $._string)), '}', )), )), array_access_in_interpolation: $ => prec(PRECEDENCE.ARRAY, seq( $.scalar_variable, repeat1(seq( choice( token.immediate('->['), token.immediate('[') ), token.immediate('['), field('index', choice($._resolves_to_digit, $.scalar_variable, $._string)), ']', )), )), _boolean: $ => choice( $.true, $.false, ), true: $ => 'true', false: $ => 'false', special_scalar_variable: $ => prec.right(PRECEDENCE.TERM + 2, seq( '$', with_or_without_curly_brackets(/#*|[!"#$%&'()*+,-./0123456789:;<=>?@\]\[\\_`|~]/), // NOTE: ab is removed as my $a = 1 is possible )), scalar_variable: $ => prec(PRECEDENCE.TERM, seq( '$', with_or_without_curly_brackets($._scalar_variable_external) )), array_access_variable: $ => prec(PRECEDENCE.ARRAY, seq( field('array_variable', $._expression), repeat1( seq( choice( token.immediate('->['), token.immediate('[') ), field('index', $._expression), ']', ) ), )), hash_access_variable: $ => prec(PRECEDENCE.HASH, seq( field('hash_variable', $._expression), repeat1( seq( choice( token.immediate('->{'), token.immediate('{'), ), field('key', choice( alias($.identifier, $.bareword), alias($.key_words_in_hash_key, $.bareword), $._expression_without_bareword, )), '}', ) ), )), array_variable: $ => prec(PRECEDENCE.TERM, choice( /@[+-_!]/, // special array variable /@\^[A-Z]/, // %^H /@[a-zA-Z0-9_]+/ )), hash_variable: $ => prec(PRECEDENCE.TERM, choice( /%[!+-]/, // special hash variables /%\^[A-Z]/, // %^H /%[a-zA-Z0-9_]+/ )), _list: $ => choice( $._array_type, $.array_variable, ), array: $ => prec(PRECEDENCE.TERM, seq( '(', optional(commaSeparated($._expression)), ')', )), array_ref: $ => seq( '[', optional(commaSeparated($._expression)), ']', ), hash_ref: $ => prec(PRECEDENCE.HASH, seq( optional('+'), // to make into a hash_ref rather than a block '{', optional(binaryCommaSeparated(choice( $.ternary_expression, $.key_value_pair, $.hash_dereference, $.hash_variable, ))), '}' )), // refer - https://perldoc.perl.org/perlref _reference: $ => choice( $.array_ref, $.hash_ref, $.scalar_variable, // doubtful $.to_reference, $.anonymous_function, ), to_reference: $ => prec.right(PRECEDENCE.SYMBOLIC_UNARY, seq( field('operator', '\\'), field('variable', $._expression), // this is to make anything a reference )), _dereference: $ => choice( $.scalar_dereference, $.array_dereference, $.hash_dereference, ), // ${\(1)} // lower precedance than a scalar variable? scalar_dereference: $ => prec.right(PRECEDENCE.TERM + 1, seq( '$', choice( with_curly_brackets($._expression), with_or_without_curly_brackets($.scalar_dereference), with_or_without_curly_brackets($.scalar_variable), ), )), array_dereference: $ => prec.left(PRECEDENCE.TERM, seq( '@', with_or_without_curly_brackets($._expression), )), hash_dereference: $ => prec.left(PRECEDENCE.TERM ,seq( '%', with_or_without_curly_brackets($._expression), )), // cat => 'meow', meta => {} key_value_pair: $ => prec.left(PRECEDENCE.COMMA_OPERATORS, seq( field('key', choice( alias($.identifier, $.bareword), alias($.key_words_in_hash_key, $.bareword), $.variable_declaration, $._expression_without_bareword, )), $.hash_arrow_operator, field('value', $._expression), )), // NOTE: this is a hack for keys that are keywords key_words_in_hash_key: $ => choice( 'sub' ), arrow_operator: $ => prec.left(PRECEDENCE.ARROW_OPERATOR, /->/), hash_arrow_operator: $ => prec.left(PRECEDENCE.COMMA_OPERATORS, /=>/), // alias comma operator // some key words super: $ => 'SUPER', no_require: $ => '-norequire', // single line comment comments: $ => prec(PRECEDENCE.COMMENTS, seq( '#', /.*/ )), } }); /** * repeats the rule comma separated, like * rule, rule => rule, rule => rule * example: my (a, b); * using it in the above. * @param {*} rule */ function commaSeparated(rule) { return prec.left(PRECEDENCE.COMMA_OPERATORS, seq( rule, repeat(seq(choice(',', '=>'), rule)), optional(','), // in perl so far you could have this )); } /** * repeats the rule binary comma (,) separated, like * rule, rule * example: my (a, b); * using it in the above. * @param {*} rule */ function binaryCommaSeparated(rule) { return prec.left(PRECEDENCE.COMMA_OPERATORS, seq( rule, repeat(seq(',', rule)), optional(','), // in perl so far you could have this )); } /** * Given a rule, returns back a rule with and without * brackets on them. * * print ("hello"); vs print "hello" * * @param {any} rule the rule * @returns choice of rules */ function with_or_without_brackets(rule) { return choice( rule, prec.left(PRECEDENCE.TERM, seq('(', rule, ')')), ); } // TODO: the above should be like this, test it // function with_or_without_brackets(rule) { // return choice( // rule, // prec(PRECEDENCE.TERM, seq('(', rule, ')')), // ); // } /** * Given a rule, returns back a rule with and without * curly brackets on them. * * @$array vs @{$array} * * @param {any} rule the rule * @returns choice of rules */ function with_or_without_curly_brackets(rule) { return choice( rule, prec.left(PRECEDENCE.TERM, seq('{', rule, '}')), ); } function with_curly_brackets(rule) { return prec.left(PRECEDENCE.TERM, seq('{', rule, '}')); } /** * Given a rule, returns back a choice of rule with and * without quotes surrounded by the rule. * * $hash->{'romantic'} vs $hash->{romantic} * * @param {any} rule the rule * @returns choice of rules */ function with_or_without_quotes(rule) { return choice( rule, seq('\'', rule, '\''), seq('"', rule, '"'), ); } // TODO: move this to a custom scanner so that it matches - https://stackoverflow.com/questions/22492028/regex-that-start-and-end-with-same-letter // /([^a-z]).*\1/, function delimited_with_interpolation($) { return choice( ); }