// NOTE[builtin] Nickel lexes all builtin functions in lexer.rs. This is
// possible for use, but we can also choose to parse them in the parser
// instead. This prevents the grammar from having to be updated when a new
// builtin function is added. Additionally, it keeps the grammar smaller. You
// will not see rules in this grammar that match on all builtin function
// seperately.

// NOTE[typerule] In the lalrpop grammar there is a FixedType rule that is just
// a Types rule, but with post-processing. We don't do any post-processing in
// tree-sitter, so we just parse them as a `Types`.

// NOTE[special-infix] The lalrpop grammer produces an AST. In tree-sitter we
// don't have to do this. Hence, we don't have to treat the "|>" and "!="
// operators differently from others. This means we can unify them with the
// other _b_op rules.

// NOTE[scanner] The Nickel scanner is a modal one, this parser and grammar
// takes a different approach. Several special characters that may occur in
// strings (", %, #) are scanned in src/scanner.c. In particular for % we have
// to take a strange approach. Scanners in tree-sitter may only produce a
// single token and may not look ahead. So, when scanning %-signs in multiline
// strings, we always attempt to scan the end of the multistring. If that
// fails, we abort, and parse a single %-sign. This requires several changes in
// the grammar (compared to the lalrpop grammar). For instance we static
// strings may consist of multiple chunks, and interpolation and string chunks
// do not strictly alternate.

module.exports = grammar({
  name: 'nickel',

  extras: $ => [
    /\s/,
    $.comment,
  ],

  supertypes: _ => [
  ],

  inline: _ => [
  ],

  externals: $ => [
    $.multstr_start,
    $.multstr_end,
    $._str_start,
    $._str_end,
    $.interpolation_start,
    $.interpolation_end,
    $.quoted_enum_tag_start,
    $.comment,
  ],

  word: $ => $.keyword,

  rules: {
    term: $ => $.uni_term,

    ////////////////////////////
    // LEXER RELATED RULES (lexer.rs)
    ////////////////////////////
    keyword: _ => token(/if|then|else|forall|in|let|rec|match|null|true|false|fun|import|merge|default|doc|force|optional|priority|not_exported/),

    num_literal: _ => /([0-9]*\.?[0-9]+([eE][+\-]?[0-9]+)?)|0((b[01]+)|(o[0-7]+)|(x[0-9a-fA-F]+))/,

    signed_num_literal: _ => /-?(([0-9]*\.?[0-9]+([eE][+\-]?[0-9]+)?)|0((b[01]+)|(o[0-7]+)|(x[0-9a-fA-F]+)))/,

    // **IMPORTANT**
    // This regex should be kept in sync with the one for raw_enum_tag below.
    ident: _ => /_*[a-zA-Z][_a-zA-Z0-9-']*/,

    // Standard, unquoted enum tag.
    // **IMPORTANT**
    // This regex should be kept in sync with the one for identifier above.
    raw_enum_tag: _ => /'_*[a-zA-Z][_a-zA-Z0-9-']*/,

    ////////////////////////////
    // PARSER RULES (grammar.lalrpop)
    ////////////////////////////

    //grammar.lalrpop: 94
    // In the lalrpop grammar this (and the annot)-rule(s) are parameterized.
    // See NOTE[typerule].
    annot_atom: $ => choice(
      seq("|", field("ty", $.types)),
      seq("|", "default"),
      seq("|", "force"),
      seq("|", "optional"),
      seq("|", "priority", field("priority", $.signed_num_literal)),
      seq("|", "doc", field("doc", $.static_string)),
      seq("|", "rec", "force"),
      seq("|", "rec", "default"),
      seq("|", "not_exported"),
      seq(":", field("ty", $.types)),
    ),

    //grammar.lalrpop: 126
    // See NOTE[typerule].
    annot: $ => field("anns", repeat1($.annot_atom)),

    //grammar.lalrpop: 133
    types: $ => choice(
      $.infix_expr,
      $.forall,
    ),

    //grammar.lalrpop: 165
    uni_term: $ => choice(
      $.infix_expr,
      // NOTE: We seperate the rules out into their own, otherwise it would get
      // a little much for this single rule.
      $.annotated_infix_expr,
      $.forall,
      $.let_expr,
      $.fun_expr,
      $.ite_expr, // if then else
    ),

    let_expr: $ => seq(
      field("binding", $.let_in_block),
      field("t2", $.term),
    ),

    let_binding: $ => seq(
      field("pat", $.pattern),
      field("meta", optional($.annot)),
      "=",
      field("t1", $.term),
    ),

    let_in_block: $ => seq(
      "let",
      optional("rec"),
      field("bindings", seq(commaSep($.let_binding), optional(","))),
      "in",
    ),

    fun_expr: $ => seq(
      "fun",
      field("pats", repeat1($.pattern_fun)),
      "=>",
      field("t", $.term),
    ),

    match_expr: $ => seq(
      "match",
      "{",
      field("cases", seq(commaSep($.match_branch), optional(","))),
      "}",
    ),

    ite_expr: $ => seq(
      "if",
      field("cond", $.term),
      "then",
      field("t1", $.term),
      "else",
      field("t2", $.term),
    ),

    //grammar.lalrpop: 224
    annotated_infix_expr: $ => seq(
      field("t", $.infix_expr),
      field("meta", $.annot),
    ),

    //grammar.lalrpop: 232
    forall: $ => seq(
      "forall",
      field("ids", repeat1($.ident)),
      ".",
      field("ty", $.types),
    ),

    //grammar.lalrpop: 242
    applicative: $ => choice(
      seq("import", field("s", $.static_string)),
      $.type_array,
      // To avoid ambiguity with vanilla function application, we use
      // precedence, while the original LALRPOP grammar does things a bit
      // differently (see the `enum_variant` rule for more details).
      $.enum_variant,
      seq(field("t1", $.applicative), field("t2", $.record_operand)),
      // We don't explicitly have the following three rules. Instead we
      // match generically on builtin functions.
      // This is different from the lalrpop grammar. See NOTE[builtin].
      //seq($.u_op, $.record_operand),
      //seq($.b_op_pre, $.record_operand, $.atom),
      //seq($.n_op_pre),
      $.match_expr,
      $.record_operand,
    ),

    //grammar.lalrpop: 255
    type_array: $ => seq("Array", $.record_operand),

    //grammar.lalrpop: 258
    record_operand: $ => choice(
      $.atom,
      $.record_operation_chain,
    ),

    //grammar.lalrpop: 264
    record_operation_chain: $ => choice(
      seq(field("t", $.record_operand), ".", field("id", $.ident)),
      seq(field("t", $.record_operand), ".", field("t_id", $.str_chunks)),
    ),

    //grammar.lalrpop: 269
    row_tail: $ => choice(
      $.ident,
      "Dyn",
    ),

    //grammar.lalrpop: 276
    uni_record: $ => seq(
      "{",
      field("fields", repeat(seq($.record_field, ","))),
      field("last", optional($.record_last_field)),
      field("tail", optional(seq(";", $.row_tail))),
      "}",
    ),

    //grammar.lalrpop: 306
    atom: $ => choice(
      parens($.curried_op),
      parens($.uni_term),
      $.num_literal,
      "null",
      $.bool,
      $.str_chunks,
      $.ident,
      // DIFFERENT from lalrpop grammar. See NOTE[builtin].
      $.builtin,
      $.uni_record,
      $.enum_tag,
      // NOTE: Arrays may have a trailing comma in Nickel
      square(field("terms", seq(commaSep($.term), optional(",")))),
      $.type_atom,
    ),

    //grammar.lalrpop: 328
    record_field: $ => seq(
      field("path", $.field_path),
      field("ann", optional($.annot)),
      field("t", optional(seq("=", $.term))),
    ),

    //grammar.lalrpop: 348
    record_last_field: $ => choice(
      $.record_field,
      "..",
    ),

    // No field since we only have one child here
    field_path: $ => sep1($.field_path_elem, "."),

    field_path_elem: $ => choice(
      $.ident,
      $.str_chunks,
    ),

    last_field_pat: $ => choice(
      $.field_pattern,
      seq("..", optional($.ident)),
    ),

    last_elem_pat: $ => choice(
      $.pattern,
      seq("..", optional($.ident)),
    ),

    // Patterns
    //
    // The LALRPOP grammar use macros to derive various flavors of patterns and
    // their constituent parts, around parenthesization and parsing or-patterns.
    //
    // We mirror the LALRPOP grammar by adding parametric rules with the `F`
    // suffix and with the following possible values for the parameter.
    //
    // The tree-sitter grammar doesn't currently correctly handle the `or`
    // keyword used as an identifier in a pattern. This requires to either
    // update the scanner or to play with token precedence, with is left for
    // future work.
    pattern: $ => patternF($, $.enum_pattern, $.or_pattern),
    pattern_fun: $ => patternF($, $.enum_pattern_parens, $.or_pattern_parens),
    pattern_or_branch: $ => pattern_dataF($, $.enum_pattern_parens, $.or_pattern_parens),

    constant_pattern: $ => choice(
      $.signed_num_literal,
      $.bool,
      $.static_string,
      "null",
    ),

    record_pattern: $ => seq(
      "{",
      field("patterns", repeat(seq($.field_pattern, ","))),
      field("last", optional($.last_field_pat)),
      "}",
    ),

    array_pattern: $ => seq(
      "[",
      field("patterns", repeat(seq($.pattern, ","))),
      field("last", optional($.last_elem_pat)),
      "]",
    ),

    field_pattern: $ => seq(
      field("id", $.ident),
      field("anns", optional($.annot)),
      field("default", optional($.default_annot)),
      field("pat", optional(seq("=", $.pattern))),
    ),

    enum_variant_pattern: $ => seq(
      field("tag", $.enum_tag),
      field("pat", $.pattern_fun),
    ),

    enum_pattern_unparens: $ => choice(
      $.enum_tag,
      $.enum_variant_pattern,
    ),

    enum_pattern_parens: $ => choice(
      $.enum_tag,
      parens($.enum_variant_pattern),
    ),

    enum_pattern: $ => choice(
      $.enum_tag,
      $.enum_variant_pattern,
      parens($.enum_variant_pattern),
    ),

    or_pattern_unparens: $ => seq(
      field("patterns", repeat1(seq($.pattern_or_branch, "or"))),
      field("last", $.pattern_or_branch),
    ),

    or_pattern_parens: $ => parens($.or_pattern_unparens),

    or_pattern: $ => choice(
      $.or_pattern_unparens,
      $.or_pattern_parens,
    ),

    //grammar.lalrpop: 428
    default_annot: $ => seq(
      "?",
      field("t", $.term),
    ),

    //grammar.lalrpop: 437
    bool: _ => choice(
      "true",
      "false",
    ),

    //grammar.lalrpop: 443
    // Different from lalrpop grammar since we cannot assert. Instead split up
    // into two rules
    str_chunks: $ => choice(
      $.str_chunks_single,
      $.str_chunks_multi,
    ),

    str_chunks_single: $ => seq(
      $._str_start,
      field("chunks", repeat(choice(
        $.chunk_expr,
        $.chunk_literal_single,
      ))),
      $._str_end,
    ),

    str_chunks_multi: $ => seq(
      field("start", $.multstr_start),
      field("chunks", repeat(choice(
        $.chunk_expr,
        $.chunk_literal_multi,
      ))),
      field("end", $.multstr_end),
    ),

    //grammar.lalrpop: 480
    //NOTE: Because we cannot parameterize grammar rules, we instead create two
    //versions. `chunk_literal_single` and `chunk_literal_multi`.
    //chunk_literal: $ => repeat1($.chunk_literal_part),

    //grammar.lalrpop: 492
    //Field names not from lalrpop grammar
    chunk_expr: $ => seq(
      field("start", $.interpolation_start),
      field("t", $.term),
      field("end", $.interpolation_end),
    ),

    //grammar.lalrpop: 492
    //NOTE: We deal with this in the lexer.
    //interpolation: $ => choice(
    //  $._interpolation_start,
    //  $.multstr_start,
    //),

    //grammar.lalrpop: 496
    //Field names differ from lalrpop grammar.
    //See NOTE[scanner].
    static_string: $ => choice(
      // "Single line"
      seq($._str_start, repeat($.chunk_literal_single), $._str_end),
      // m%"Multi line"%m
      seq($.multstr_start, repeat($.chunk_literal_multi), $.multstr_end),
    ),

    // grammar.lalrpop (c30ad1fc6cf43a450126b3c9dd4bbe68d53ca3b2): L55
    // An enum tag escaped with double quotes, like `"enum$tag$with$spec$chars"
    quoted_enum_tag: $ =>
      seq($.quoted_enum_tag_start, repeat($.chunk_literal_single), $._str_end),

    //grammar.lalrpop: 498
    enum_tag: $ => choice(
      $.raw_enum_tag,
      $.quoted_enum_tag,
    ),

    // There's no enum_variant rule in the original grammar: an enum variant is
    // parsed as an enum tag applied (as a function) to an argument, and this
    // special case is then matched on in the action code. We don't have actions
    // in tree-sitter (we don't build the AST explicitly), and for highlighting
    // and formatting purpose, it's better to have a dedicated rule for enum
    // variants.
    enum_variant: $ => prec(1, seq(
      field("tag", $.enum_tag),
      field("arg", $.record_operand),
    )),

    enum: $ => choice(
      $.enum_tag,
      $.enum_variant,
    ),

    //grammar.lalrpop: 503
    //See NOTE[scanner].
    chunk_literal_single: $ => choice(
      $.str_esc_char,
      $.str_literal,
      $.percent,
    ),

    //See NOTE[scanner].
    chunk_literal_multi: $ => choice(
      $.str_esc_char,
      $.mult_str_literal,
      $.percent,
      $.double_quote,
    ),

    percent: _ => "%",
    double_quote: _ => "\"",

    str_literal: _ => /[^"%\\]+/,
    mult_str_literal: _ => /[^"%]+/,
    str_esc_char: _ => /\\./,

    //grammar.lalrpop: 509
    // Different from lalrpop grammar, we parse all possible builtins, not just
    // the defined ones.
    builtin: _ => seq(
      "%",
      // We are a bit more liberal with what can go in a builtin function than
      // for identifiers, because builtins are properly delimited by `%`.
      // Upstream Nickel added `/` as a valid character already, so there's a
      // precendent for extensions (although it's not very likely), so we try to
      // be a bit future-proof. We just make sure the builtin starts with either
      // a letter or an undescore, to ensure reasonable names.
      /_*[a-zA-Z][a-zA-Z0-9./_'-]*/,
      "%",
    ),

    pattern_guard: $ => seq(
      "if",
      $.term,
    ),

    match_branch: $ => seq(
      field("pat", $.pattern),
      field("guard", optional($.pattern_guard)),
      "=>",
      field("body", $.term)
    ),

    //grammar.lalrpop: 554
    infix_b_op_2: _ => choice(
      "++",
      "@",
    ),

    //grammar.lalrpop: 559
    infix_b_op_3: _ => choice(
      "*",
      "/",
      "%",
    ),

    //grammar.lalrpop: 565
    infix_b_op_4: _ => choice(
      "+",
      "-",
    ),

    //grammar.lalrpop: 570
    infix_u_op_5: _ => choice(
      "!",
    ),

    //grammar.lalrpop: 574
    infix_b_op_6: _ => choice(
      "&",
      "|>",
    ),

    //grammar.lalrpop: 578
    infix_b_op_7: _ => choice(
      "<",
      "<=",
      ">",
      ">=",
    ),

    //grammar.lalrpop: 585
    infix_b_op_8: _ => choice(
      "==",
      "!=",
    ),

    //grammar.lalrpop: 589
    infix_lazy_b_op_9: _ => choice(
      "&&",
    ),

    //grammar.lalrpop: 593
    infix_lazy_b_op_10: _ => choice(
      "||",
    ),

    //grammar.lalrpop: 597
    infix_b_op: $ => choice(
      $.infix_b_op_2,
      $.infix_b_op_3,
      $.infix_b_op_4,
      $.infix_b_op_6,
      $.infix_b_op_7,
      $.infix_b_op_8,
    ),

    //grammar.lalrpop: 606
    infix_u_op_or_lazy_b_op: $ => choice(
      $.infix_u_op_5,
      $.infix_lazy_b_op_9,
      $.infix_lazy_b_op_10,
    ),

    //grammar.lalrpop: 606
    infix_op: $ => choice(
      $.infix_b_op,
      $.infix_u_op_or_lazy_b_op,
    ),

    //grammar.lalrpop: 617
    curried_op: $ => choice(
      $.infix_op,
      // Field access isn't a proper infix operator, it's a form with special
      // treatment (in particular because the second argument isn't necessarily
      // a string but can be a statically known identifier, which can't be
      // encoded as a generic operator application). It is thus not included in
      // `infix_op`.
      //
      // Still, Nickel allows to use it as a curried operator as in `(.) foo
      // bar`, so we add it here manually.
      ".",
      // NOTE: Removed, see NOTE[special-infix].
      //"|>",
      //"!=",
    ),

    //grammar.lalrpop: 662
    // Precedence values are taken from lalrpop grammar
    // https://github.com/tweag/nickel/blob/master/src/grammar.lalrpop
    // In lalrpop the highest precedence is 0.
    // Higher numbers imply a lower precedence. In tree-sitter however, a high
    // number implies a high precedence. To solve this issue, while keeping
    // the numbers the same, all precedence values are negated.
    //
    // Additionally, we don't actually construct an AST, so special rules (such
    // as |> and !=) are standardised.
    infix_expr: $ => choice(
      prec.left(-0, $.applicative),
      prec(-1, seq(field("op", "-"), field("t", $.infix_expr))),
      prec.left(-2, seq(field("t1", $.infix_expr), field("op", $.infix_b_op_2), field("t2", $.infix_expr))),
      prec.left(-3, seq(field("t1", $.infix_expr), field("op", $.infix_b_op_3), field("t2", $.infix_expr))),
      prec.left(-4, seq(field("t1", $.infix_expr), field("op", $.infix_b_op_4), field("t2", $.infix_expr))),
      prec.left(-5, seq(field("op", $.infix_u_op_5), field("t", $.infix_expr))),
      // NOTE: The "|>" rule is part of the infix_b_op_6 rule because we don't
      // need to treat is specially. See NOTE[special-infix].
      prec.left(-6, seq(field("t1", $.infix_expr), field("op", $.infix_b_op_6), field("t2", $.infix_expr))),
      prec.left(-7, seq(field("t1", $.infix_expr), field("op", $.infix_b_op_7), field("t2", $.infix_expr))),
      // NOTE: The "!=" rule is part of the infix_b_op_8 rule because we don't
      // need to treat is specially. See NOTE[special-infix].
      prec.left(-8, seq(field("t1", $.infix_expr), field("op", $.infix_b_op_8), field("t2", $.infix_expr))),
      prec.left(-9, seq(field("t1", $.infix_expr), field("op", $.infix_lazy_b_op_9), field("t2", $.infix_expr))),
      prec.left(-10, seq(field("t1", $.infix_expr), field("op", $.infix_lazy_b_op_10), field("t2", $.infix_expr))),
      prec.right(-11, seq(field("t1", $.infix_expr), field("op", "->"), field("t2", $.infix_expr))),
    ),

    //grammar.lalrpop: 736
    type_builtin: _ => choice(
      "Dyn",
      "Number",
      "Bool",
      "String",
    ),

    //grammar.lalrpop: 743
    type_atom: $ => choice(
      $.type_builtin,
      "_",
      seq(
        "[|",
        field("rows", commaSep($.enum)),
        field("tail", optional(seq(";", $.ident))),
        "|]",
      ),
      seq(
        "{",
        "_",
        ":",
        field("types", $.types),
        "}",
      ),
      seq(
        "{",
        "_",
        "|",
        field("types", $.types),
        "}",
      ),
    ),

  },
});

// Because tree-sitter rules can't be proper functions, we need to relocate the
// macro rule definitions here as free standing functions.
function pattern_dataF($, enum_rule, or_rule) {
  return choice(
    $.record_pattern,
    $.array_pattern,
    $.constant_pattern,
    enum_rule,
    $.ident,
    "_",
    or_rule,
  )
}

function patternF($, enum_rule, or_rule) {
  return choice(
    seq(
      optional(field("alias", seq($.ident, "@"))),
      field("pat", pattern_dataF($, enum_rule, or_rule))
     ),
  )
}

function sep(rule, separator) {
  return optional(sep1(rule, separator));
}

function sep1(rule, separator) {
  return seq(rule, repeat(seq(separator, rule)));
}

function commaSep(rule) {
  return sep(rule, ",");
}

function parens(rule) {
  return seq("(", rule, ")");
}

function square(rule) {
  return seq("[", rule, "]");
}