//! GB2312 Parser //! //! 1. Build generator: rustc parser.rs //! 2. Launch: ./parser >../../src/charset/data/gb2312.rs use { std::{ fs::File, io::{ self, BufReader, BufRead, }, path::Path, }, super::push_unicode, }; fn read_file>(path: P) -> io::Result<()> { let hi_limit = 0x77; let hi_size = hi_limit - 0x21 + 1; let lo_size = 0x7F - 0x21; let mut arr_decode: Vec = vec![0; hi_size * lo_size]; let mut arr_encode: Vec<(u16, u16)> = Vec::new(); let file = File::open(path)?; for line in BufReader::new(file).lines() { let line = line?; let line = line.trim_start(); if line.starts_with("#") { continue; } let line = line.trim_end(); if line.is_empty() { continue; } let mut split = line.split_whitespace(); let code = split.next().unwrap(); let code = usize::from_str_radix(&code[2 ..], 16).unwrap(); let hi = code >> 8; let lo = code & 0xFF; if hi < 0x21 || hi > 0x77 || lo < 0x21 || lo > 0x7F { continue; } let unicode = split.next().unwrap(); let unicode = u16::from_str_radix(&unicode[2 ..], 16).unwrap(); arr_encode.push((unicode, code as u16)); let code = ((hi - 0x21) * lo_size) + (lo - 0x21); arr_decode[code as usize] = unicode; } println!(""); println!("pub const DECODE_MAP_GB2312: [u16; {}] = [", arr_decode.len()); for (n, &unicode) in arr_decode.iter().enumerate() { if (n % 8) == 0 { if n > 0 { println!(""); } print!(" "); } else { print!(" "); } print!("0x{:04x},", unicode); } println!(""); println!("];"); // encode push_unicode(&mut arr_encode); arr_encode.sort_by(|a, b| { (a.0).cmp(&b.0) }); let mut code_map: Vec = vec![0; 0x100]; let mut hi_map: Vec = vec![0; 0x100]; let mut hi_byte = 0u8; let mut hi_skip = 0usize; for (unicode, code) in arr_encode.iter() { if *unicode == 0 { continue; } let hi = (unicode >> 8) as u8; let lo = (unicode & 0xFF) as u8; if hi_byte != hi { hi_byte = hi; hi_skip += 1; hi_map[usize::from(hi)] = hi_skip; for _ in 0 .. 0x100 { code_map.push(0) } } let pos = hi_skip * 0x100 + usize::from(lo); code_map[pos] = *code; } println!(""); println!("pub const HI_MAP_GB2312: [usize; {}] = [", hi_map.len()); for (n, &pos) in hi_map.iter().enumerate() { if (n % 8) == 0 { if n > 0 { println!(""); } print!(" "); } else { print!(" "); } print!("{},", pos); } println!(""); println!("];"); println!(""); println!("pub const ENCODE_MAP_GB2312: [u16; {}] = [", code_map.len()); for (n, &code) in code_map.iter().enumerate() { if (n % 8) == 0 { if n > 0 { println!(""); } print!(" "); } else { print!(" "); } let code = if code != 0 { code | 0x8080 } else { code }; print!("0x{:04x},", code); } println!(""); println!("];"); Ok(()) } pub fn build() -> io::Result<()> { let base_path = std::env::current_exe()?; let base_path = base_path.parent().unwrap(); let base_path = base_path.join("data"); println!("// Simplified Chinese. File generated with build/gb2312.rs"); println!(""); read_file(base_path.join("GB2312.TXT"))?; Ok(()) }