/*
 * Copyright 2022 Arnaud Golfouse
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

// Note that we could technically use this crate to parse this... but the errors would
// be near impossible to debug :/

use ielr::{
    input::{Grammar, Node, ProdIdx, Symbol, Token},
    Algorithm,
};
use std::{
    collections::{hash_map::Entry, HashMap},
    num::NonZeroU8,
};

mod conflict;
mod example;
mod forbid;
mod precedence;

pub struct GrammarFormat {
    pub algorithm: Algorithm,
    pub grammar: Grammar,
    pub symbols: HashMap<Symbol, String>,
    pub examples: Vec<Vec<Token>>,
    pub examples_fail: Vec<Vec<Token>>,
}

#[derive(Clone, Copy)]
enum Modifier {
    None,
    /// 0 or 1
    Question,
    /// 0 or more
    Star,
    /// 1 or more
    Plus,
}

/// The grammar format is:
/// - One production per line.
/// - Empty lines are ignored.
/// - Start with the left-hand side, then a ':', then the right-hand side.
/// - Nodes are CamelCased, tokens are SCREAMING_CASE.
/// - Symbols other than ':' and letters are individual tokens.
/// - Multiple productions with the same lhs may be specified by starting a new line
/// with `|`, then the rhs.
///
/// # Special annotations
///
/// ## Examples
/// `@example(...)` will try to read the list of tokens in `...` with the generated
/// parser.
///
/// ## Conflicts
/// A conflict resolution has the form `@conflict(c1 > c2)` or `@conflict(c1 < c2)`,
/// where
/// - `c1` and `c2` are either:
///   - `reduce(Node, i)`: means 'reduction of the `i`-th production with lhs `Node`
///   - `shift(Token)`
///
/// ## Precedence
/// TODO: handle multiple precedence families.
///
/// A precedence annotation has the form `@precedence(Node, i, left = a, right = b)`,
/// where
/// - `Node, i` selects the `i`-th production with lhs `Node`.
/// - `a` is the left precedence, `b` is the right precedence. Both `left = a` and
/// `right = b` are optional.
pub fn read_grammar(mut file: &str) -> Result<GrammarFormat, String> {
    // skip leading whitespace and comments
    while file.starts_with("//") || file.starts_with('\n') {
        if let Some(end_line) = file.find('\n') {
            file = &file[end_line + 1..];
        } else {
            file = ""
        }
    }

    let algorithm = if let Some(new_file) = file.strip_prefix("@Algorithm(") {
        let close_parent = new_file
            .find(')')
            .ok_or("no closing parenthesis for @Algorithm")?;
        let algo_type = &new_file[..close_parent];
        file = &new_file[close_parent + 1..];
        let comma = algo_type.find(',').ok_or("no comma for @Algorithm")?;
        let before_comma = algo_type[..comma].trim();
        let after_comma = algo_type[comma + 1..].trim();
        let k = after_comma
            .parse::<NonZeroU8>()
            .map_err(|err| err.to_string())?;
        match before_comma {
            "lalr" => Algorithm::Lalr(k),
            "lr" => Algorithm::Lr(k),
            _ => return Err(format!("unknown algorithm '{before_comma}'")),
        }
    } else {
        Algorithm::default()
    };
    let mut nodes: HashMap<&str, (Node, Vec<ProdIdx>)> = HashMap::new();
    let mut tokens: HashMap<&str, Token> = HashMap::new();
    let mut next_node = 0;
    let mut next_node = move || {
        next_node += 1;
        (Node(next_node - 1), Vec::new())
    };
    let mut next_token = 1;
    let mut next_token = move || {
        next_token += 1;
        Token::new(next_token - 1).unwrap()
    };
    let mut grammar = Grammar::new();
    let mut examples = Vec::new();
    let mut examples_fail = Vec::new();
    let mut last_lhs: Option<(&str, Node)> = None;
    let precedence_family = grammar.add_precedence_family();
    for (line_number, mut line) in file.lines().enumerate() {
        let line_number = line_number + 1;
        line = line.trim();
        if line.is_empty() || line.starts_with("//") {
            continue;
        } else if let Some(line) = line.strip_prefix("@example_fail") {
            let example = example::parse_example_fail(line, &tokens)?;
            examples_fail.push(example);
            continue;
        } else if let Some(line) = line.strip_prefix("@example") {
            let example = example::parse_example(line, &tokens)?;
            examples.push(example);
            continue;
        } else if let Some(line) = line.strip_prefix("@conflict") {
            let conflict_solution = conflict::parse_conflict(line, &nodes, &tokens)?;
            grammar.add_conflict_solution(conflict_solution);
            continue;
        } else if let Some(line) = line.strip_prefix("@forbid") {
            let (prod_idx, index, forbidden) = forbid::parse_forbid(line, &nodes)?;
            grammar
                .get_production_mut(prod_idx)
                .unwrap()
                .forbid_derivation(index, forbidden)
                .map_err(|_| "invalid forbid".to_string())?;
            continue;
        } else if let Some(line) = line.strip_prefix("@precedence") {
            let (prod, left, right) = precedence::parse_precedence(line, &nodes)?;
            let prod = grammar.get_production_mut(prod).unwrap();
            if let Some(left) = left {
                prod.set_left_precedence(precedence_family, left);
            }
            if let Some(right) = right {
                prod.set_right_precedence(precedence_family, right);
            }
            continue;
        }
        let (lhs_word, lhs, words) = if let Some(rest) = line.strip_prefix('|') {
            let (word, lhs) = match last_lhs {
                Some(lhs) => lhs,
                None => return Err(format!("[line {line_number}] no lhs before '|'")),
            };
            (word, lhs, rest.split_whitespace())
        } else {
            match next_word(line) {
                None => continue,
                Some((word, rest)) => {
                    let rest = match rest.strip_prefix(':') {
                        Some(r) => r,
                        None => {
                            return Err(format!("[line {line_number}] Expected ':' after node"))
                        }
                    };
                    if !is_node(word) {
                        return Err(format!(
                            "[line {line_number}] word '{word}' is not a node !"
                        ));
                    }
                    #[allow(clippy::needless_borrow)]
                    let lhs = nodes.entry(word).or_insert_with(&mut next_node).0;
                    (word, lhs, rest.split_whitespace())
                }
            }
        };
        last_lhs = Some((lhs_word, lhs));

        let mut rhs = Vec::new();
        for word in words {
            let (word, modified_word, modifier) = if word.len() == 1 {
                (word, word, Modifier::None)
            } else if let Some(modified) = word.strip_suffix('?') {
                (word, modified, Modifier::Question)
            } else if let Some(modified) = word.strip_suffix('*') {
                (word, modified, Modifier::Star)
            } else if let Some(modified) = word.strip_suffix('+') {
                (word, modified, Modifier::Plus)
            } else {
                (word, word, Modifier::None)
            };
            if modified_word == "EOF" {
                return Err(String::from("EOF is a reserved identifier"));
            }
            #[allow(clippy::needless_borrow)]
            let symbol = if is_node(modified_word) {
                Symbol::Node(nodes.entry(modified_word).or_insert_with(&mut next_node).0)
            } else {
                Symbol::Token(*tokens.entry(modified_word).or_insert_with(&mut next_token))
            };
            let symbol = match modifier {
                Modifier::None => symbol,
                Modifier::Question => {
                    let question = match nodes.entry(word) {
                        Entry::Occupied(occupied) => occupied.get().0,
                        Entry::Vacant(vacant) => {
                            let (question, productions) = vacant.insert(next_node());
                            productions
                                .push(grammar.add_production(*question, vec![symbol]).unwrap());
                            productions
                                .push(grammar.add_production(*question, Vec::new()).unwrap());
                            *question
                        }
                    };
                    Symbol::Node(question)
                }
                Modifier::Star => {
                    let star = match nodes.entry(word) {
                        Entry::Occupied(occupied) => occupied.get().0,
                        Entry::Vacant(vacant) => {
                            let (star, productions) = vacant.insert(next_node());
                            productions.push(
                                grammar
                                    .add_production(*star, vec![symbol, Symbol::Node(*star)])
                                    .unwrap(),
                            );
                            productions.push(grammar.add_production(*star, Vec::new()).unwrap());
                            *star
                        }
                    };
                    Symbol::Node(star)
                }
                Modifier::Plus => {
                    let plus = match nodes.entry(word) {
                        Entry::Occupied(occupied) => occupied.get().0,
                        Entry::Vacant(vacant) => {
                            let (plus, productions) = vacant.insert(next_node());
                            productions.push(
                                grammar
                                    .add_production(*plus, vec![symbol, Symbol::Node(*plus)])
                                    .unwrap(),
                            );
                            productions.push(grammar.add_production(*plus, vec![symbol]).unwrap());
                            *plus
                        }
                    };
                    Symbol::Node(plus)
                }
            };
            rhs.push(symbol);
        }
        nodes
            .get_mut(lhs_word)
            .unwrap()
            .1
            .push(grammar.add_production(lhs, rhs).unwrap());
    }
    let mut symbol_to_str = HashMap::new();
    for (s, (node, _)) in nodes {
        symbol_to_str.insert(Symbol::Node(node), s.to_string());
    }
    for (s, token) in tokens {
        symbol_to_str.insert(Symbol::Token(token), s.to_string());
    }
    Ok(GrammarFormat {
        algorithm,
        grammar,
        symbols: symbol_to_str,
        examples,
        examples_fail,
    })
}

fn next_word(mut input: &str) -> Option<(&str, &str)> {
    input = input.trim_start();
    let mut chars_indices = input.char_indices();
    let (_, next_char) = chars_indices.next()?;
    if next_char.is_alphabetic() {
        for (index, c) in chars_indices {
            if !(c.is_alphanumeric() || c == '_') {
                let to_return = &input[..index];
                input = &input[index..];
                return Some((to_return, input));
            }
        }
        let to_return = input;
        Some((to_return, ""))
    } else {
        let to_return = match chars_indices.next() {
            Some((index, _)) => {
                let to_return = &input[..index];
                input = &input[index..];
                to_return
            }
            None => {
                let to_return = input;
                input = "";
                to_return
            }
        };
        Some((to_return, input))
    }
}

fn is_node(word: &str) -> bool {
    let mut chars = word.chars();
    let mut all_uppercase = true;
    match chars.next() {
        Some(c) if c.is_alphabetic() && c.is_uppercase() => {}
        _ => return false,
    }
    for ch in chars {
        if !(ch.is_alphanumeric() || ch == '_') {
            return false;
        }
        if ch.is_alphabetic() && ch.is_lowercase() {
            all_uppercase = false;
        }
    }
    !all_uppercase
}

fn parse_number(line: &str) -> Result<(&str, u64), String> {
    let mut num_index = 0;
    for c in line.chars() {
        if ('0'..='9').contains(&c) {
            num_index += 1;
        } else {
            break;
        }
    }
    let (number, line) = line.split_at(num_index);
    Ok((line, number.parse::<u64>().map_err(|err| err.to_string())?))
}