// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_casemap::greek_to_me::{
    self, GreekDiacritics, GreekPrecomposedLetterData, GreekVowel, PackedGreekPrecomposedLetterData,
};
use icu_casemap::CaseMapper;
use icu_normalizer::DecomposingNormalizer;
use icu_properties::{maps, GeneralCategoryGroup, Script};
use std::collections::BTreeMap;
use std::fmt::Write;

fn main() {
    let decomposer = DecomposingNormalizer::new_nfd();
    let script = maps::script();
    let gc = maps::general_category();
    let cm = CaseMapper::new();

    let mut vec_370 = vec![0; 0x400 - 0x370];
    let mut vec_1f00 = vec![0; 0x100];
    let mut extras = BTreeMap::new();

    // Loop over all codepoints
    for range in script.iter_ranges_for_value(Script::Greek) {
        for ch in range {
            if let Ok(ch) = char::try_from(ch) {
                if !GeneralCategoryGroup::Letter.contains(gc.get(ch)) {
                    continue;
                }

                let nfd = decomposer.normalize_utf8(ch.encode_utf8(&mut [0; 4]).as_bytes());
                let mut data: Option<GreekPrecomposedLetterData> = None;

                for nfd_ch in nfd.chars() {
                    match nfd_ch {
                        // accented: [:toNFD=/[\u0300\u0301\u0342\u0302\u0303\u0311]/:]&[:Grek:]&[:L:] (from the JSPs: toNFD is an extension).
                        greek_to_me::diacritics!(ACCENTS) => {
                            if let Some(GreekPrecomposedLetterData::Vowel(_, ref mut diacritics)) =
                                data
                            {
                                diacritics.accented = true;
                            } else {
                                panic!("Found character {ch} that has diacritics but is not a Greek vowel");
                            }
                        }
                        // dialytika: [:toNFD=/[\u0308]/:]&[:Grek:]&[:L:] (from the JSPs: toNFD is an extension).
                        greek_to_me::diacritics!(DIALYTIKA) => {
                            if let Some(GreekPrecomposedLetterData::Vowel(_, ref mut diacritics)) =
                                data
                            {
                                diacritics.dialytika = true;
                            } else {
                                panic!("Found character {ch} that has diacritics but is not a Greek vowel");
                            }
                        }
                        // precomposed_ypogegrammeni [:toNFD=/[\u0345]/:]&[:Grek:]&[:L:] (from the JSPs: toNFD is an extension).
                        greek_to_me::diacritics!(YPOGEGRAMMENI) => {
                            if let Some(GreekPrecomposedLetterData::Vowel(_, ref mut diacritics)) =
                                data
                            {
                                diacritics.ypogegrammeni = true;
                            } else {
                                panic!("Found character {ch} that has diacritics but is not a Greek vowel");
                            }
                        }
                        greek_to_me::diacritics!(BREATHING_AND_LENGTH)
                        | greek_to_me::diacritics!(ACCENTS) => {
                            // Rho takes breathing marks but other consonants should not
                            if let Some(GreekPrecomposedLetterData::Consonant(false)) = data {
                                panic!("Found character {ch} that has diacritics but is not a Greek vowel");
                            }
                        }
                        // Ignore all small letters
                        '\u{1D00}'..='\u{1DBF}' | '\u{AB65}' => (),
                        // caps: [[:Grek:]&[:L:]-[\u1D00-\u1DBF\uAB65]] . NFD, remove non-letters, uppercase
                        letter if GeneralCategoryGroup::Letter.contains(gc.get(letter)) => {
                            let uppercased = cm.uppercase_to_string(
                                letter.encode_utf8(&mut [0; 4]),
                                &Default::default(),
                            );
                            let mut iter = uppercased.chars();
                            let uppercased = iter.next().unwrap();
                            assert!(
                                iter.next().is_none(),
                                "{letter} Should uppercase to a single letter char, instead uppercased to {uppercased:?}"
                            );

                            if let Ok(vowel) = GreekVowel::try_from(uppercased) {
                                data = Some(GreekPrecomposedLetterData::Vowel(
                                    vowel,
                                    GreekDiacritics::default(),
                                ))
                            } else {
                                let is_rho = uppercased == greek_to_me::CAPITAL_RHO;
                                data = Some(GreekPrecomposedLetterData::Consonant(is_rho))
                            };
                        }
                        _ => (),
                    }
                }

                if let Some(data) = data {
                    let packed = PackedGreekPrecomposedLetterData::from(data);
                    assert_eq!(
                        Ok(data),
                        packed.try_into(),
                        "packed data for {ch} ({packed:?}) must roundtrip!"
                    );
                    let ch_i = ch as usize;
                    if (0x370..=0x3FF).contains(&ch_i) {
                        vec_370[ch_i - 0x370] = packed.0;
                    } else if (0x1f00..0x1fff).contains(&ch_i) {
                        vec_1f00[ch_i - 0x1f00] = packed.0;
                    } else {
                        extras.insert(ch, packed.0);
                    }
                }
            }
        }
    }

    vec_370.truncate(
        vec_370
            .iter()
            .rposition(|x| *x != 0)
            .expect("must have some greek data")
            + 1,
    );
    vec_1f00.truncate(
        vec_1f00
            .iter()
            .rposition(|x| *x != 0)
            .expect("must have some greek data")
            + 1,
    );

    let mut others = String::new();
    for (ch, data) in extras {
        writeln!(&mut others, "        '{ch}' => {data},").unwrap();
    }

    let output = format!(
        r#"// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

// This file is generated by running `cargo test --test gen_greek_to_me --features compiled_data,datagen
//
// Do not edit manually

// All u8s in this file are PackedGreekPrecomposedLetterDatas, see parent module

/// Data for characters in U+370-U+3FF
pub(crate) const DATA_370: [u8; 0x{:x}] = {vec_370:?};
/// Data for characters in U+1F00-U+1FFF
pub(crate) const DATA_1F00: [u8; 0x{:x}] = {vec_1f00:?};

/// Characters like the ohm sign that do not belong in the two blocks above
pub(crate) fn match_extras(ch: char) -> Option<u8> {{
    Some(match ch {{
{others}
        _ => return None
    }})
}}
"#,
        vec_370.len(),
        vec_1f00.len(),
    );

    let local = include_str!("../src/greek_to_me/data.rs");

    // We cannot just check if the two are unequal because on Windows core.autocrlf
    // may have messed with the line endings on the file, or it may have not (either
    // due to a changed setting, or due to the code being in a cargo cache/vendor. We also
    // cannot fix this by passing `--config newline_style=unix` to rustfmt. We must
    // perform a `\r`-agnostic comparison.
    //
    // (technically this should only catch `\r\n` and not just `\r` but for a golden
    // test on rustfmt output it does not matter)
    if local
        .trim()
        .chars()
        .filter(|&x| x != '\r')
        .ne(output.trim().chars().filter(|&x| x != '\r'))
    {
        println!(
            r#"Please copy the following file to src/greek_to_me/data.rs:
========================================================
{output}
========================================================"#
        );

        panic!("Found mismatch between generated Greek specialcasing data and checked-in data. Please check in the updated file shown above.");
    }
}