#!/usr/bin/env python3.9 import re import sys f = open(r"./iso639-1.txt", "r+") f2 = open(r"./iso639-2.txt", "r+") f3 = open(r"./iso-639-3.tab", "r+") f31 = open(r"./iso-639-3-macrolanguages.tab", "r+") f32 = open(r"./iso-639-3_Name_Index.tab", "r+") codes = {} code_2ts = {} code_2bs = {} code_3s = {} all_codes = [] all_map = {} name_3_map = {} for x in f.readlines(): x = x.strip() if x == "|-": continue ts = x.split("||") name = ts[0].split('|').pop() name = name.replace(']', '') name = name.replace('[', '').strip() code = ts[1].split('|').pop().replace('}}', '').strip() code_2t = ts[2].replace('', "").replace('', '').replace("'''", '').strip() code_2b = ts[3].replace('', "").replace('', '').replace("'''", '').strip() code_3 = ts[4].split('')[0].replace('', "").replace('', '').replace("'''", '').strip() c = {"code": code, "name": name, "code_2t": code_2t, "code_2b": code_2b, "code_3": code_3, "id": code} if id !="" and c["code"] not in all_map: all_codes.append(c) all_map[c["code"]] = c codes[c["code"]] = c code_2ts[c["code_2t"]] = c code_2bs[c["code_2b"]] = c code_3s[c["code_3"]] = c for x in f2.readlines(): x = x.strip() if x == "|-": continue if re.compile("\|\- id=\"\w\"").match(x): continue ts = x.split("||") code = "" code_2b = "" code_2t = "" code_3 = "" name = "" code_2 = ts[0].replace("| {{iso639-2|", "").replace("}}", "").split(" /") if len(code_2) == 1: code_2b = code_2t = code_2[0].strip() elif len(code_2) == 2: code_2b = code_2[1].replace("*", "").strip() code_2t = code_2[0].strip() code_3 = ts[1].strip() code = ts[3].strip() name = ts[4].replace("]]", "").replace("[[", "").strip() name = re.sub(re.compile("\w+ language\|"), "", name).strip() c = {"code": code, "name": name, "code_2t": code_2t, "code_2b": code_2b, "code_3": code_3} id = "" if c["code"] not in codes and c["code"] != "": codes[c["code"]] = c if id == "": id = c["code"] if c["code_2t"] not in code_2ts and c["code_2t"] != "": code_2ts[c["code_2t"]] = c if id == "": id = c["code_2t"] if c["code_2b"] not in code_2bs and c["code_2b"] != "": code_2bs[c["code_2b"]] = c if id == "": id = c["code_2b"] if c["code_3"] not in code_3s and c["code_3"] != "": code_3s[c["code_3"]] = c if id == "": id = c["code_3"] if id !="" and id not in all_map: all_codes.append(c) all_map[id] = c #print(all_map) #sys.exit(0) for x in f3.readlines(): x = x.strip() ts = x.split("\t") code = ts[3].strip() code_2b = ts[1].strip() code_2t = ts[2].strip() code_3 = ts[0].strip() if code_3 == "Id": continue name = ts[6].strip() c = {"code": code, "name": name, "code_2t": code_2t, "code_2b": code_2b, "code_3": code_3} id = "" if c["code"] not in codes and c["code"] != "": codes[c["code"]] = c if id == "": id = c["code"] if c["code_2t"] not in code_2ts and c["code_2t"] != "": code_2ts[c["code_2t"]] = c if id == "": id = c["code_2t"] if c["code_2b"] not in code_2bs and c["code_2b"] != "": code_2bs[c["code_2b"]] = c if id == "": id = c["code_2b"] if c["code_3"] not in code_3s and c["code_3"] != "": code_3s[c["code_3"]] = c if id == "": id = c["code_3"] if id !="" and id not in all_map: all_codes.append(c) all_map[id] = c # print(code_2ts) # sys.exit(0) for x in f31.readlines(): x = x.strip() ts = x.split("\t") if ts[0] == "M_Id": continue code_3 = ts[0].strip() individual_language = ts[1].strip() for x in all_codes: if "individual_languages" not in x: x["individual_languages"] = [] if code_3 == x["code_3"]: x["individual_languages"].append(individual_language) for key in all_map: if "individual_languages" not in all_map[key]: all_map[key]["individual_languages"] = [] if code_3 == x["code_3"]: all_map[key]["individual_languages"].append(individual_language) for key in codes: if "individual_languages" not in codes[key]: codes[key]["individual_languages"] = [] if code_3 == x["code_3"]: codes[key]["individual_languages"].append(individual_language) for key in code_2ts: if "individual_languages" not in code_2ts[key]: code_2ts[key]["individual_languages"] = [] if code_3 == x["code_3"]: code_2ts[key]["individual_languages"].append(individual_language) for key in code_2bs: if "individual_languages" not in code_2bs[key]: code_2bs[key]["individual_languages"] = [] if code_3 == x["code_3"]: code_2bs[key]["individual_languages"].append(individual_language) for key in code_3s: if "individual_languages" not in code_3s[key]: code_3s[key]["individual_languages"] = [] if code_3 == x["code_3"]: code_3s[key]["individual_languages"].append(individual_language) for x in f32.readlines(): x = x.strip() ts = x.split("\t") if ts[0] == "Id": continue name_3_map[ts[0]] = ts[1] for x in all_codes: if not "id" in x: x["id"] = "" if x["id"] == "": if x["code"] != "": x["id"] = x["code"] elif x["code_2t"] != "": x["id"] = x["code_2t"] elif x["code_2b"] != "": x["id"] = x["code_2b"] elif x["code_3"] != "": x["id"] = x["code_3"] prefix = """use phf::{phf_map, Map}; #[cfg(target_arch = "wasm32")] use wasm_bindgen::prelude::*; #[cfg(target_arch = "wasm32")] use js_sys::Array; #[cfg(test)] mod tests { #[test] fn test_from_code_1() { let l = crate::from_code_1("zh"); print!("test_from_code result {:?}", l) } #[test] fn test_from_code_2t() { let l = crate::from_code_2t("zho"); print!("test_from_code_2t result {:?}", l) } #[test] fn test_from_code_2b() { let l = crate::from_code_2b("chi"); print!("test_from_code_2b result {:?}", l) } #[test] fn test_from_code_3() { let l = crate::from_code_3("zho"); print!("test_from_code_3 result {:?}", l) } #[test] fn test_all() { println!("{:?}", crate::ALL); println!("{:?}", crate::ALL_1); println!("{:?}", crate::ALL_2B); println!("{:?}", crate::ALL_2T); println!("{:?}", crate::ALL_3); println!("{:?}", crate::ALL_MAP); println!("{:?}", crate::ALL_1_MAP); println!("{:?}", crate::ALL_2B_MAP); println!("{:?}", crate::ALL_2T_MAP); println!("{:?}", crate::ALL_3_MAP); } } #[cfg(target_arch = "wasm32")] #[wasm_bindgen] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct LanguageCode { ///ISO Language Name name: &'static str, ///639-1 code: &'static str, ///639-2/T code_2t: &'static str, ///639-2/B code_2b: &'static str, //639-3 Macrolanguage code_3: &'static str, individual_languages: &'static[IndividualLanguages], } #[cfg(not(target_arch = "wasm32"))] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct LanguageCode<'a> { ///ISO Language Name pub name: &'static str, ///639-1 pub code: &'static str, ///639-2/T pub code_2t: &'static str, ///639-2/B pub code_2b: &'static str, //639-3 Macrolanguage pub code_3: &'static str, pub individual_languages: &'a [IndividualLanguages], } #[cfg(target_arch = "wasm32")] #[cfg_attr(target_arch = "wasm32", wasm_bindgen)] impl LanguageCode { #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.into() } #[wasm_bindgen(getter)] pub fn code(&self) -> String { self.code.into() } #[wasm_bindgen(getter)] pub fn code_2t(&self) -> String { self.code_2t.into() } #[wasm_bindgen(getter)] pub fn code_2b(&self) -> String { self.code_2b.into() } #[wasm_bindgen(getter)] pub fn code_3(&self) -> String { self.code_3.into() } #[wasm_bindgen(getter)] pub fn individual_languages(&self) -> Array { let mut vector: Vec = Vec::new(); // self.individual_languages.into_serde().unwrap(); for i in 0..self.individual_languages.len() { vector.push(self.individual_languages[i]) } vector.into_iter().map(JsValue::from).collect() } } #[cfg(target_arch = "wasm32")] #[wasm_bindgen] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct IndividualLanguages { ///Name name: &'static str, ///Code code: &'static str, } #[cfg(not(target_arch = "wasm32"))] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct IndividualLanguages { ///Name pub name: &'static str, ///Code pub code: &'static str, } #[cfg(target_arch = "wasm32")] #[cfg_attr(target_arch = "wasm32", wasm_bindgen)] impl IndividualLanguages { #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.into() } #[wasm_bindgen(getter)] pub fn code(&self) -> String { self.code.into() } } #[cfg_attr(target_arch = "wasm32", wasm_bindgen)] pub fn from_code_1(code: &str) -> Option { let up = code.to_lowercase(); ALL_1_MAP.get(up.as_str()).cloned() } #[cfg_attr(target_arch = "wasm32", wasm_bindgen)] pub fn from_code_2t(code: &str) -> Option { let up = code.to_lowercase(); ALL_2T_MAP.get(up.as_str()).cloned() } #[cfg_attr(target_arch = "wasm32", wasm_bindgen)] pub fn from_code_2b(code: &str) -> Option { let up = code.to_lowercase(); ALL_2B_MAP.get(up.as_str()).cloned() } #[cfg_attr(target_arch = "wasm32", wasm_bindgen)] pub fn from_code_3(code: &str) -> Option { let up = code.to_lowercase(); ALL_3_MAP.get(up.as_str()).cloned() } """ print(prefix) for x in all_codes: x["id"] = re.sub(re.compile("\-.*$"), "", x["id"]).strip() x["name"] = x["name"].replace('"', '\\"') for x in all_codes: rs_code = """ pub const %s: LanguageCode = LanguageCode { name: "%s", code: "%s", code_2t: "%s", code_2b: "%s", code_3: "%s", individual_languages: &[ """ rs_code = rs_code % (x["id"].upper(), x["name"], x["code"].lower(), x["code_2t"].lower(), x["code_2b"].lower(), x["code_3"].lower()) if len(x["individual_languages"]) > 0: for individual_language in x["individual_languages"]: name = "" if individual_language in name_3_map: name = name_3_map[individual_language] rs_code += """ IndividualLanguages { name: "%s", code: "%s", },\n""" % (name, individual_language) rs_code += """ ], }; """ print(rs_code) rs_code = """ pub const ALL_CODES: & [LanguageCode] = &[ """ for x in all_codes: rs_code += x["id"].upper() + ",\n" rs_code += """ ]; """ print(rs_code) rs_code = """ pub const ALL_1: & [LanguageCode] = &[ """ for x in all_codes: if x["code"] in codes: rs_code += x["id"].upper() + ",\n" rs_code += """ ]; """ print(rs_code) rs_code = """ pub const ALL_2T: & [LanguageCode] = &[ """ for x in all_codes: if x["code_2t"] in code_2ts: rs_code += x["id"].upper() + ",\n" rs_code += """ ]; """ print(rs_code) rs_code = """ pub const ALL_2B: & [LanguageCode] = &[ """ for x in all_codes: if x["code_2b"] in code_2bs: rs_code += x["id"].upper() + ",\n" rs_code += """ ]; """ print(rs_code) rs_code = """ pub const ALL_3: & [LanguageCode] = &[ """ for x in all_codes: if x["code_3"] in code_3s: rs_code += x["id"].upper() + ",\n" rs_code += """ ]; """ print(rs_code) rs_code = """ pub const ALL_MAP: Map<&str, LanguageCode> = phf_map! { """ for x in all_codes: rs_code += "\"" + x["id"].lower() + "\" => " + x["id"].upper() + ",\n" rs_code += """ }; """ print(rs_code) rs_code = """ pub const ALL_1_MAP: Map<&str, LanguageCode> = phf_map! { """ for x in all_codes: if x["code"] in codes: rs_code += "\"" + x["id"].lower() + "\" => " + x["id"].upper() + ",\n" rs_code += """ }; """ print(rs_code) rs_code = """ pub const ALL_2B_MAP: Map<&str, LanguageCode> = phf_map! { """ for x in all_codes: if x["code_2b"] in code_2bs: rs_code += "\"" + x["code_2b"].lower() + "\" => " + \ x["id"].upper() + ",\n" rs_code += """ }; """ print(rs_code) rs_code = """ pub const ALL_2T_MAP: Map<&str, LanguageCode> = phf_map! { """ for x in all_codes: if x["code_2t"] in code_2bs: rs_code += "\"" + x["code_2t"].lower() + "\" => " + \ x["id"].upper() + ",\n" rs_code += """ }; """ print(rs_code) rs_code = """ pub const ALL_3_MAP: Map<&str, LanguageCode> = phf_map! { """ for x in all_codes: if x["code_3"] in code_3s: rs_code += "\"" + x["code_3"].lower() + "\" => " + \ x["id"].upper() + ",\n" rs_code += """ }; """ print(rs_code) sys.exit(0)