const PREC = { impl: 1, or: 2, and: 3, eq: 4, neq: 4, '<': 5, '>': 5, leq: 5, geq: 5, update: 6, not: 7, '+': 8, '-': 8, '*': 9, '/': 9, concat: 10, '?': 11, negate: 12 } module.exports = grammar({ name: 'nix', extras: $ => [ /\s/, $.comment, ], supertypes: $ => [ $._expression ], inline: $ => [ ], externals: $ => [ $.string_fragment, $._indented_string_fragment, $._path_start, $.path_fragment, $.dollar_escape, $._indented_dollar_escape, ], word: $ => $.keyword, conflicts: $ => [ ], rules: { source_code: $ => optional(field('expression', $._expression)), _expression: $ => $._expr_function_expression, // Keywords go before identifiers to let them take precedence when both are expected. // Workaround before https://github.com/tree-sitter/tree-sitter/pull/246 keyword: $ => /if|then|else|let|inherit|in|rec|with|assert/, identifier: $ => /[a-zA-Z_][a-zA-Z0-9_\'\-]*/, variable_expression: $ => field('name', $.identifier), integer_expression: $ => /[0-9]+/, float_expression: $ => /(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?/, path_expression: $=> seq( alias($._path_start, $.path_fragment), repeat( choice( $.path_fragment, alias($._immediate_interpolation, $.interpolation), ) ), ), _hpath_start: $ => /\~\/[a-zA-Z0-9\._\-\+\/]+/, hpath_expression: $=> seq( alias($._hpath_start, $.path_fragment), repeat( choice( $.path_fragment, alias($._immediate_interpolation, $.interpolation), ) ), ), spath_expression: $ => /<[a-zA-Z0-9\._\-\+]+(\/[a-zA-Z0-9\._\-\+]+)*>/, uri_expression: $ => /[a-zA-Z][a-zA-Z0-9\+\-\.]*:[a-zA-Z0-9%\/\?:@\&=\+\$,\-_\.\!\~\*\']+/, _expr_function_expression: $ => choice( $.function_expression, $.assert_expression, $.with_expression, $.let_expression, $._expr_if ), function_expression: $ => choice( seq(field('universal', $.identifier), ':', field('body', $._expr_function_expression)), seq(field('formals', $.formals), ":", field('body', $._expr_function_expression)), seq(field('formals', $.formals), '@', field('universal', $.identifier), ':', field('body', $._expr_function_expression)), seq(field('universal', $.identifier), '@', field('formals', $.formals), ':', field('body', $._expr_function_expression)), ), formals: $ => choice( seq('{', '}'), seq('{', commaSep1(field('formal', $.formal)), '}'), seq('{', commaSep1(field('formal', $.formal)), ',', field('ellipses', $.ellipses), '}'), seq('{', field('ellipses', $.ellipses), '}'), ), formal: $ => seq(field("name", $.identifier), optional(seq('?', field('default', $._expression)))), ellipses: $ => '...', assert_expression: $ => seq('assert', field('condition', $._expression), ';', field('body', $._expr_function_expression)), with_expression: $ => seq('with', field('environment', $._expression), ';', field('body', $._expr_function_expression)), let_expression: $ => seq('let', optional($.binding_set), 'in', field('body', $._expr_function_expression)), _expr_if: $ => choice( $.if_expression, $._expr_op ), if_expression: $ => seq('if', field('condition', $._expression), 'then', field('consequence', $._expression), 'else', field('alternative', $._expression)), _expr_op: $ => choice( $.has_attr_expression, $.unary_expression, $.binary_expression, $._expr_apply_expression ), // I choose to *not* have this among the binary operators because // this is the sole exception that takes an attrpath (instead of expression) // as its right operand. // My gut feeling is that this is: // 1) better in theory, and // 2) will be easier to work with in practice. has_attr_expression: $ => prec(PREC['?'], seq( field('expression', $._expr_op), field('operator', '?'), field('attrpath', $.attrpath) ) ), unary_expression: $ => choice( ...[ ['!', PREC.not], ['-', PREC.negate], ].map(([operator, precedence]) => prec(precedence, seq( field('operator', operator), field('argument', $._expr_op) )) ) ), binary_expression: $ => choice( // left assoc. ...[ ['==', PREC.eq], ['!=', PREC.neq], ['<', PREC['<']], ['<=', PREC.leq], ['>', PREC['>']], ['>=', PREC.geq], ['&&', PREC.and], ['||', PREC.or], ['+', PREC['+']], ['-', PREC['-']], ['*', PREC['*']], ['/', PREC['/']], ].map(([operator, precedence]) => prec.left(precedence, seq( field('left', $._expr_op), field('operator', operator), field('right', $._expr_op) ))), // right assoc. ...[ ['->', PREC.impl], ['//', PREC.update], ['++', PREC.concat], ].map(([operator, precedence]) => prec.right(precedence, seq( field('left', $._expr_op), field('operator', operator), field('right', $._expr_op) ))) ), _expr_apply_expression: $ => choice( $.apply_expression, $._expr_select_expression ), apply_expression: $ => seq(field('function', $._expr_apply_expression), field('argument', $._expr_select_expression)), _expr_select_expression: $ => choice( $.select_expression, $._expr_simple ), select_expression: $ => choice( seq(field('expression', $._expr_simple), '.', field('attrpath', $.attrpath)), seq(field('expression', $._expr_simple), '.', field('attrpath', $.attrpath), 'or', field('default', $._expr_select_expression)), ), _expr_simple: $ => choice( $.variable_expression, $.integer_expression, $.float_expression, $.string_expression, $.indented_string_expression, $.path_expression, $.hpath_expression, $.spath_expression, $.uri_expression, $.parenthesized_expression, $.attrset_expression, $.let_attrset_expression, $.rec_attrset_expression, $.list_expression ), parenthesized_expression: $ => seq('(', field('expression', $._expression), ')'), attrset_expression: $ => seq('{', optional($.binding_set), '}'), let_attrset_expression: $ => seq('let', '{', optional($.binding_set), '}'), rec_attrset_expression: $ => seq('rec', '{', optional($.binding_set), '}'), string_expression: $ => seq( '"', repeat(choice( $.string_fragment, $.interpolation, choice($.escape_sequence, seq($.dollar_escape, alias('$', $.string_fragment)) ) )), '"' ), escape_sequence: $ => token.immediate(/\\([^$]|\s)/), // Can also escape newline. indented_string_expression: $ => seq( "''", repeat(choice( alias($._indented_string_fragment, $.string_fragment), $.interpolation, choice(alias($._indented_escape_sequence, $.escape_sequence), seq(alias($._indented_dollar_escape, $.dollar_escape), alias('$', $.string_fragment)) ))), "''" ), _indented_escape_sequence: $ => token.immediate(/'''|''\\([^$]|\s)/), // Can also escape newline. binding_set: $ => repeat1(field('binding', choice($.binding, $.inherit, $.inherit_from))), binding: $ => seq(field('attrpath', $.attrpath), '=', field('expression', $._expression), ';'), inherit: $ => seq('inherit', field('attrs', $.inherited_attrs), ';'), inherit_from: $ => seq('inherit', '(', field('expression', $._expression), ')', field('attrs', $.inherited_attrs), ';'), attrpath: $ => sep1(field('attr', choice( $.identifier, $.string_expression, $.interpolation, )), "."), inherited_attrs: $ => repeat1(field('attr', choice( $.identifier, $.string_expression, $.interpolation, ))), _immediate_interpolation: $ => seq(token.immediate('${'), field('expression', $._expression), '}'), interpolation: $ => seq('${', field('expression', $._expression), '}'), list_expression: $ => seq('[', repeat(field('element', $._expr_select_expression)), ']'), comment: $ => token(choice( seq('#', /.*/), seq( '/*', /[^*]*\*+([^/*][^*]*\*+)*/, '/' ) )), }, }); function sep(rule, separator) { return optional(sep1(rule, separator)); } function sep1(rule, separator) { return seq(rule, repeat(seq(separator, rule))); } function commaSep1(rule) { return sep1(rule, ','); } function commaSep(rule) { return optional(commaSep1(rule)); }