use std::env; use std::fs::OpenOptions; use std::io::BufWriter; use std::path::Path; use std::result::Result; use crate::gen_table::GenTable; use crate::Error; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd)] enum Version { SinceBeginningOfTime = 0, Since1983 = 1983, Since1997 = 1997, Since2000 = 2000, Since2004 = 2004, } #[derive(Debug)] enum Value { Unicode1(u32), Unicode2(u32, u32), Reserved(), } #[inline] fn parse_hex(s: &str) -> Result<u32, Error> { assert!(s.starts_with("0x"), "too long"); Ok(u32::from_str_radix(&s[2..], 16)?) } #[inline] fn parse_unicode(s: &str) -> Result<Vec<u32>, Error> { assert!(s.starts_with("U+"), "invalid unicode"); let v = s[2..] .split('+') .map(|x| u32::from_str_radix(x, 16)) .collect::<Result<Vec<_>, _>>(); Ok(v?) } struct Data { byte0: u8, byte1: u8, version: Version, value: Value, } fn parse_line(line: &str) -> Result<Data, Error> { let parts: Vec<&str> = line.split('#').collect(); assert!(parts.len() == 2, "invalid line"); let before: Vec<&str> = parts[0].split(' ').filter(|s| !s.is_empty()).collect(); let after: Vec<&str> = parts[1].split(' ').filter(|s| !s.is_empty()).collect(); assert!(before.len() == 1 || before.len() == 2, "invalid line"); assert!(!after.is_empty(), "invalid line"); let mut version = Version::SinceBeginningOfTime; for v in &after { match *v { "[1983]" => version = Version::Since1983, "[1997]" => version = Version::Since1997, "[2000]" => version = Version::Since2000, "[2004]" => version = Version::Since2004, _ => { continue; }, } break; } let code = parse_hex(before[0])?; let byte0 = (code >> 8) as u8; let byte1 = code as u8; if byte0 == 0x00 { return Err(Error::Unknown); } if before.len() == 1 { match after[0] { "<reserved>" => Ok(Data { byte0, byte1, version, value: Value::Reserved(), }), _ => unreachable!(), } } else { let unicode = parse_unicode(before[1])?; if unicode.len() == 1 { assert!(unicode[0] <= 0x10FFFF, "invalid unicode"); Ok(Data { byte0, byte1, version, value: Value::Unicode1(unicode[0]), }) } else if unicode.len() == 2 { assert!(unicode[0] <= 0x10FFFF, "invalid unicode"); assert!(unicode[1] <= 0x10FFFF, "invalid unicode"); Ok(Data { byte0, byte1, version, value: Value::Unicode2(unicode[0], unicode[1]), }) } else { unreachable!() } } } fn generate_table( path: &Path, name: &'static str, version: Version, data: &[Data], ) -> Result<(), Error> { let data = data .iter() .filter(|d| d.version <= version) .filter(|d| match d.value { Value::Unicode1(_) => true, Value::Unicode2(_, _) => true, Value::Reserved() => false, }) .collect::<Vec<_>>(); let lookup_byte = (0x80..0xA0).chain(0xE0..0xFF).collect::<Vec<_>>(); let mut single_lookup = Vec::<u32>::new(); let mut double_lookup = Vec::<(u32, u32)>::new(); let mut table_lookup = vec![(0_u8, 0_u8, 0_usize); 256]; for byte0 in lookup_byte { let mut u1 = data .iter() .filter(|x| x.byte0 == byte0) .filter_map(|x| match x.value { Value::Unicode1(u) => Some((x.byte1, u)), _ => None, }) .collect::<Vec<_>>(); let mut u2 = data .iter() .filter(|x| x.byte0 == byte0) .filter_map(|x| match x.value { Value::Unicode2(u1, u2) => Some((x.byte1, u1, u2)), _ => None, }) .collect::<Vec<_>>(); u1.sort_by(|a, b| a.0.cmp(&b.0)); u2.sort_by(|a, b| a.0.cmp(&b.0)); let first = u1.iter().map(|x| x.0).chain(u2.iter().map(|x| x.0)).min(); let last = u1.iter().map(|x| x.0).chain(u2.iter().map(|x| x.0)).max(); if first.is_none() || last.is_none() { continue; } let first = first.unwrap(); let last = last.unwrap(); let count = last - first + 1; let offset = single_lookup.len(); single_lookup.extend(vec![0; count as usize]); table_lookup[byte0 as usize] = (first, last, offset); for (byte1, unicode) in u1 { let relative = (byte1 - first) as usize; let index = offset + relative; assert!(unicode != 0); assert!(single_lookup[index] == 0); single_lookup[index] = unicode; } for (byte1, unicode0, unicode1) in u2 { let relative = (byte1 - first) as usize; let index = offset + relative; let value = 0x8000_0000 | (double_lookup.len() as u32); assert!(single_lookup[index] == 0); single_lookup[index] = value; double_lookup.push((unicode0, unicode1)); } } let mut output_file = OpenOptions::new() .write(true) .truncate(true) .create(true) .open(path)?; let mut buffer = BufWriter::new(&mut output_file); table_lookup.gen_table(format!("{name}_UTF8_T"), &mut buffer)?; single_lookup.gen_table(format!("{name}_UTF8_S"), &mut buffer)?; if !double_lookup.is_empty() { double_lookup.gen_table(format!("{name}_UTF8_D"), &mut buffer)?; } Ok(()) } static SHIFT_JIS_1983_2004: &str = include_str!("../assets/build/shift-jis.txt"); pub fn generate() -> Result<(), Error> { let data = SHIFT_JIS_1983_2004 .replace('\t', " ") .lines() .filter(|x| !x.starts_with('#')) .flat_map(parse_line) .collect::<Vec<_>>(); let dir = env::var_os("OUT_DIR").unwrap(); let path_1997 = Path::new(&dir).join("shift_jis_1997.rs"); generate_table(&path_1997, "SJIS_1997", Version::Since1997, &data)?; let path_2004 = Path::new(&dir).join("shift_jis_2004.rs"); generate_table(&path_2004, "SJIS_2004", Version::Since2004, &data)?; Ok(()) }