// LAMMPS rules for parsing // https://docs.lammps.org/Commands_parse.html // // No guarantee that this grammar totally matches /// // @ts-check // From tree-sitter python grammar. Not all is relevant // TODO: Use and tidy if need be const PREC = { conditional: -1, parenthesized_expression: 1, parenthesized_list_splat: 1, string: -1, under_var: 1, or: 10, and: 11, not: 12, compare: 13, bitwise_or: 14, bitwise_and: 15, xor: 16, shift: 17, plus: 18, times: 19, unary: 20, power: 21, call: 22, }; // Stolen from bash grammar // TODO: Some of these are not valid for lammps const SPECIAL_CHARACTERS = [ "'", '"', // '<', '>', '{', '}', '\\[', '\\]', // TODO Readd if appropriate // '(', ')', // '`', '$', // '|', '&', ';', '&', '\\', '\\s', '\\n', '#', ]; module.exports = grammar({ name: 'lammps', extras: $ => [ $.comment, /\s/, // Whitespace not parsed as node // /[ \t\r\f]/, // Whitespace except newline // /\\\r?\n/, // new line $._line_continuation, ], supertypes: $ => [$._statement], inline: $ => [$._primary_expression, $._concat_expression, $._statement, $._terminator], externals: $ => [$._concat, $._eof], word: $ => $._identifier, rules: { // TODO: Match bash grammar. Make last line terminator optional input_script: $ => optional($._statements), // As is currently need a new-line on every line. // Get some strange parsing issues without _statements: $ => prec(1, repeat1(seq( $._statement, $._terminator, )), ), _statement: $ => // Choice of LAMMPS commands. // Starts with 'keyword' commands then others choice( $._definition, $.shell, $.command,), _definition: $ => choice( $.fix, $.compute, $.variable_def, $.variable_del, ), command: $ => seq($.command_name, optional($.args_under)), fix: $ => seq('fix', field('fix_id', $.fix_id), field('group', $.group_id), field('style', $.fix_style), field('arguments', optional($.args_under))), compute: $ => seq('compute', field('compute_id', $.compute_id), field('group', $.group_id), field('style', $.compute_style), field('arguments', optional($.args_under))), variable_def: $ => choice( $._variable_equal, $._variable_atom, $._variable_vector, $._variable_other), // TODO: add other variable types _variable_equal: $ => seq('variable', field("name", $.variable), field('style', alias('equal', $.variable_style)), field("rhs", choice($.expression, $.quoted_expression))), _variable_atom: $ => seq('variable', field("name", $.variable), field('style', alias('atom', $.variable_style)), field("rhs", choice($.expression, $.quoted_expression))), _variable_vector: $ => seq('variable', field("name", $.variable), field('style', alias('vector', $.variable_style)), field("rhs", choice($.expression, $.quoted_expression, $.vector_expression))), // String-like variable styles _string_like: $ => choice($.concatenation, $.string, $.raw_string, $.triple_string, $.word, $.var_curly, $.var_round, $.simple_expansion), variable_style: _ => choice('atomfile', 'file', 'format', 'getenv', 'index', 'internal', 'loop', 'python', 'string', 'timer', 'uloop', 'universe', 'world', ), // equal and atom don't appear here due to special handling // All other styles not explicitly looked at _variable_other: $ => seq('variable', field("name", $.variable), field('style', $.variable_style), field("args", repeat1($._string_like))), variable_del: $ => seq('variable', field("name", $.variable), field('style', alias('delete', $.variable_style))), // TODO: Decide if string is a good type. // TODO: Properly match the shell syntax. Expand into arguments shell: $ => seq('shell', field('shell_cmd', $._any_string)), parens: $ => prec(PREC.parenthesized_expression, seq('(', $.expression, ')')), // Builtin LAMMPS functions in variable commands. func: $ => prec(PREC.call, seq(field("function", $.identifier,), field("args", $.argument_list))), // argument_list: $ => seq("(", commaSep1($.expression), optional(","), ")"), unary_op: $ => prec(PREC.unary, seq(choice('-', '!'), $.expression)), binary_op: $ => { const table = [ [prec.left, '+', PREC.plus], [prec.left, '-', PREC.plus], [prec.left, '*', PREC.times], [prec.left, '/', PREC.times], [prec.left, '%', PREC.times], [prec.right, '^', PREC.power], [prec.left, '==', PREC.compare], [prec.left, '!=', PREC.compare], [prec.left, '<', PREC.compare], [prec.left, '<=', PREC.compare], [prec.left, '>', PREC.compare], [prec.left, '>=', PREC.compare], [prec.left, '&&', PREC.bitwise_and], [prec.left, '||', PREC.bitwise_or], [prec.left, '|^', PREC.bitwise_or], ]; // @ts-ignore return choice(...table.map(([fn, operator, precedence]) => fn(precedence, seq( field('left', $.expression), // @ts-ignore field('operator', operator), field('right', $.expression), )))); }, // TODO: Verify this prec // TODO: create "arithmetic literal" and set that prec to 1 expression: $ => choice( $.binary_op, // $.underscore_ident, // $.identifier, $._expr_ident, $.int, $.float, $.bool, $.unary_op, // Built-in Function calls $.func, $.constant, $.simple_expansion, $.thermo_kwarg, $.atom_property, $.var_curly, $.var_round, $.parens, $.indexing, ), quoted_expression: $ => seq('"', $.expression, '"'), vector_expression: $ => seq(optional('"'), '[', commaSep1($.expression), ']', optional('"')), // TODO: Reduce to one and make a field. This would require changes to the queries fix_id: $ => $.identifier, compute_id: $ => $.identifier, //group_id: $ => choice("all", $.identifier), // TODO: make this the definition? group_id: $ => $.identifier, fix_style: $ => $._style_name, // TODO: add other fixes!!! compute_style: $ => $._style_name, // TODO: add other fixes!!! args_under: $ => repeat1(field("arg", $._arg_under)), //TODO: think carefully about how this interacts with line continuations and triple quotes _terminator: $ => token(prec(PREC.under_var, '\n',)), // TODO: add keywords for deletion commands // TODO: change identifiers to just be words or args. // True identifiers only show up in within ${} and $() in args // TODO: Adding string literal seems to have harmed performance??? // TODO: make these have fields with names // TODO: Add a concatenation arg // TODO: Remove args_under and add underscore_ident to args. _arg_under: $ => choice($._primary_expression, $.concatenation, $.raw_string), // $.string // LAMMPS builtin variables that can be accessed in expressions. thermo_kwarg: _ => token( choice('step', 'elapsed', 'elaplong', 'dt', 'time', 'cpu', 'tpcpu', 'spcpu', 'cpuremain', 'part', 'timeremain', 'atoms', 'temp', 'press', 'pe', 'ke', 'etotal', 'evdwl', 'ecoul', 'epair', 'ebond', 'eangle', 'edihed', 'eimp', 'emol', 'elong', 'etail', 'enthalpy', 'ecouple', 'econserve', 'vol', 'density', 'lx', 'ly', 'lz', 'xlo', 'xhi', 'ylo', 'yhi', 'zlo', 'zhi', 'xy', 'xz', 'yz', 'xlat', 'ylat', 'zlat', 'bonds', 'angles', 'dihedrals', 'impropers', 'pxx', 'pyy', 'pzz', 'pxy', 'pxz', 'pyz', 'fmax', 'fnorm', 'nbuild', 'ndanger', 'cella', 'cellb', 'cellc', 'cellalpha', 'cellbeta', 'cellgamma',)), // Per atom properties // Only truly valid in expressions in atom variables. // Or with indexing. atom_property: _ => token( choice('d', 'mass', 'type', 'mol', 'radius', 'q', 'x', 'y', 'z', 'vx', 'vy', 'vz', 'fx', 'fy', 'fz') ), constant: _ => token(choice('PI', 'version')), var_curly: $ => seq('${', $.variable, '}'), var_round: $ => seq('$(', $.expression, ')'), // A single character variable name simple_expansion: $ => seq('\$', $._concat, alias(/[a-zA-Z0-9]/, $.variable)), // TODO: Have this match with no whitespace between them!! // TODO: Have this beat out an `argname` in an args_under // underscore_ident: $ => seq( // $.prefix, // // $._concat, // $.bare_ident, // ), underscore_ident: $ => choice( $._variable_under, $._fix_under, $._compute_under, ), _expr_ident: $ => choice($.underscore_ident, $.identifier,), indexed_ident: $ => prec(PREC.call, seq( field("ident", choice($.underscore_ident, $.indexed_ident)), "[", choice($.int, $.glob), "]", ) ), // TODO: make prefix a dedicated token? _v_prefix: _ => token(prec(PREC.under_var, 'v_')), _f_prefix: _ => token(prec(PREC.under_var, 'f_')), _c_prefix: _ => token(prec(PREC.under_var, 'c_')), // Increased prefix to beat out raw identifiers. // prefix: _ => token(prec(PREC.under_var, choice('v_', 'f_', 'c_'))), _variable_under: $ => seq($._v_prefix, $._concat, $.variable,), _fix_under: $ => seq($._f_prefix, $._concat, $.fix_id,), _compute_under: $ => seq($._c_prefix, $._concat, $.compute_id,), // Indexing an expression. indexing: $ => prec(PREC.call, seq( field("value", $.expression), '[', field("index", choice($.int, $.glob, $.expression)), ']')), // TODO: re-add indexing int: _ => token(/[0-9]+/), // Stolen from python grammar float: _ => { const digits = repeat1(/[0-9]+_?/); const exponent = seq(/[eE][\+-]?/, digits); return token(seq( choice( seq(digits, '.', optional(digits), optional(exponent)), seq(optional(digits), '.', digits, optional(exponent)), seq(digits, exponent), ), optional(choice(/[Ll]/, /[jJ]/)), // TODO: I doubt this J thing works with lammps )); }, // TODO: rename primarmary argument? _primary_expression: $ => choice( $.word, $.underscore_ident, // Only parse as an indexed variable if underscore ident $.indexed_ident, $.string, $.triple_string, $.int, $.float, $.bool, $.var_curly, $.var_round, $.simple_expansion, ), // TODO: Perhaps readd number and bool types to this _concat_expression: $ => choice($.word, $.string, $.var_curly, $.var_round, $.simple_expansion), concatenation: $ => prec(-1, seq( choice( $._concat_expression, // alias($._special_character, $.word), ), repeat1(seq( choice($._concat, // alias(/`\s*`/, '``') ), choice( $._concat_expression, // alias($._special_character, $.word), // alias($._comment_word, $.word), // alias($._bare_dollar, '$'), ), )), optional(seq($._concat, '$')), )), // _special_character: _ => token(prec(-1, choice('{', '}', '[', ']'))), string: $ => seq( '"', repeat(seq( choice( seq(optional('$'), $.string_content), $.var_curly, $.var_round, $.simple_expansion, $.raw_string, ), )), optional('$'), '"', ), _line_continuation: _ => /&\s*\n/, // Line Continuation seems like it should have optional `\\\r?` added // NOTE: Unlike lammps, allows newlines in raw string without the & prefix // This will be parsed and checked in the grammar /** A string type that does not expand variables */ raw_string: $ => seq("'", repeat( seq( alias(/[^'\n]+/, $.string_content), // Strings that span multiple lines should be joined with continuations optional($._line_continuation) ) ), "'"), // Like double quoted strings these exapnd variables. // Unlike them, `&` characters are not needed for new lines // Double quotes are also valid in triple strings triple_string: $ => seq( '"""', repeat(seq( choice( seq(optional('$'), alias($._triple_string_content, $.string_content)), $.var_curly, $.var_round, $.simple_expansion, // variables in nested strings aren't expanded alias(/"([^"])+"/, $.sub_string_content), alias(/'([^'])+'/, $.sub_string_content), ), )), optional('$'), '"""', ), _triple_string_content: _ => token(prec(-1, /([^$"'\\\r\n]|\\(.|\r?\n))+/)), // triple_sub_string_content: _ => token(prec(-1, // /([^"])+/)), // TODO: Verify this works with LAMMPS too. // TODO: modify so the newline joiner is used not bashes one // Doing so would mean 'gaps' between string_content only at variable substitutions // NOTE: Single quotes aren't allowed because they treated as a // a sub string string_content: _ => token(prec(-1, /([^"'$\\\r\n]|\\(.|\r?\n))+/)), _any_string: $ => token(prec(-1, /[^\n\&]+/)), // TODO: Anything except & and newline. Bit of a bandage. Should really read `shell` arguments separately? command_name: $ => $._name, _name: _ => /[a-z0-9A-Z_]+/, _style_name: _ => /[a-z0-9A-Z_\/]+/, // Lammps computes and fixes can have / in their names // TODO: make this a field? variable: $ => $._identifier, // TODO: better name // A free identifier that does not have a prefix // identifier: $ => $._identifier, // Double check that the identifier // Matches the LAMMPS identifier rules // NOTE: For some reason having `_` last in regex means it can't be used for keyword extraction _identifier: _ => /[_a-z0-9A-Z]+/, identifier: $ => $._identifier, glob: _ => '*', comment: _ => token(prec(-10, /#.*/)), // comment: _ => token(/#.*/), // Want bool to not be picked out in words such as `once` etc or `region` so has lower prec bool: $ => choice($.true, $.false), true: _ => token(choice('true', 'yes', 'on')), false: _ => token(choice('false', 'no', 'off')), word: $ => token( repeat1(choice( noneOf(...SPECIAL_CHARACTERS), seq('\\', noneOf('\\s')) )) ), } }); function noneOf(...characters) { const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join('') return new RegExp('[^' + negatedString + ']') } /** * Creates a rule to match one or more of the rules separated by a comma * * @param {RuleOrLiteral} rule * * @return {SeqRule} * */ function commaSep1(rule) { return sep1(rule, ','); } /** * Creates a rule to match one or more occurrences of `rule` separated by `sep` * * @param {RuleOrLiteral} rule * * @param {RuleOrLiteral} separator * * @return {SeqRule} * */ function sep1(rule, separator) { return seq(rule, repeat(seq(separator, rule))); }