use std::io::Write; use std::path::{Path, PathBuf}; #[path = "src/cl/mod.rs"] mod cl; use cl::model::{CodeList, DataSource}; use raxb::de::from_str; use raxb::quick_xml::events::Event; #[derive(serde::Serialize)] pub struct Data { items: Vec, } impl Default for Data { fn default() -> Self { Self { items: Vec::new() } } } pub struct DataSet { file: std::fs::File, data: Data, } impl DataSet { pub fn insert(&mut self, entry: T) { self.data.items.push(entry); } } impl DataSet where T: serde::ser::Serialize, { pub fn save(&mut self) -> anyhow::Result<()> { self.file.write_all(&bson::to_vec(&self.data)?)?; Ok(()) } } pub struct DataSetFactory { data_set_path: PathBuf, } impl DataSetFactory { pub fn new>(data_set_path: P) -> anyhow::Result { if !data_set_path.as_ref().exists() { std::fs::create_dir_all(data_set_path.as_ref())?; } Ok(Self { data_set_path: data_set_path.as_ref().to_path_buf(), }) } pub fn create(&self, items: Vec) -> anyhow::Result> where T: DataSource, { let file_path = self.data_set_path.join(T::name()); if file_path.exists() { std::fs::remove_file(&file_path)?; } Ok(DataSet { file: std::fs::File::create(&file_path)?, data: Data { items }, }) } } fn write_codelist_docs(s: &mut String, codelist: &CodeList) -> anyhow::Result<()> { use std::fmt::Write; let field_ids = codelist .header .fields .iter() .enumerate() .map(|(id, v)| format!("`{}` ({id})", v.id.as_ref())) .collect::>(); writeln!( s, "## {}", codelist.header.identification.long_name.as_ref() )?; writeln!(s)?; writeln!(s, "{}", codelist.header.description.codelist_description)?; writeln!(s)?; writeln!(s, "| | |")?; writeln!(s, "| -- | -- |")?; writeln!( s, "| short name | {} |", codelist.header.identification.short_name.as_ref() )?; writeln!( s, "| canonical uri | `{}` |", codelist.header.identification.canonical_uri.as_ref() )?; writeln!( s, "| canonical version uri | {} |", codelist .header .identification .canonical_version_uri .as_ref() )?; writeln!(s)?; writeln!(s, "| Field | {} |", field_ids.join(" | "))?; write!(s, "|")?; for _ in 0..field_ids.len() { write!(s, " -- |")?; } writeln!(s, " -- |")?; write!(s, "| type ")?; for field in codelist.header.fields.iter() { write!(s, "| {:?} ", field.field_type)?; } writeln!(s, "|")?; write!(s, "| Usage ")?; for field in codelist.header.fields.iter() { write!(s, "| {:?} ", field.usage)?; } writeln!(s, "|")?; write!(s, "| Lang ")?; for field in codelist.header.fields.iter() { write!(s, "| {:?} ", field.lang)?; } writeln!(s, "|")?; writeln!(s)?; Ok(()) } fn write_readme(items: &[CodeList], version: &str) -> anyhow::Result { let mut content = format!("# XWasser codelists - version `{version}`\n\n"); for item in items { write_codelist_docs(&mut content, item)?; } Ok(content) } fn main() -> anyhow::Result<()> { let public_out = Path::new("./public"); let data_dir = Path::new("./data"); let versions = std::fs::read_dir(data_dir)?; for e in versions { let version = e?.file_name(); let version_dir = data_dir.join(&version); let json_dir = public_out.join(&version); let xml_files = std::fs::read_dir(&version_dir)?; let mut items = Vec::new(); let mut json_content = Vec::new(); std::fs::create_dir_all(&json_dir)?; let json_file = json_dir.join("codelist.json"); let readme_file = json_dir.join("README.md"); for xml_file in xml_files { let xml_file = xml_file?; eprintln!("process {}", xml_file.path().display()); if xml_file .path() .extension() .map(|ext| ext.to_ascii_lowercase().to_string_lossy().as_ref() == "xml") .unwrap_or(false) { let xml = std::fs::read(xml_file.path())?; let mut rdr = raxb::quick_xml::reader::Reader::from_reader(std::io::Cursor::new(xml.clone())); let mut buf = Vec::new(); let mut s = String::default(); let mut is_valid_utf_8 = true; loop { match rdr.read_event_into(&mut buf) { Ok(Event::Decl(decl)) => match decl.encoding() { Some(Ok(encoding)) => { if encoding.as_ref() == b"ISO-8859-1" { let encoder = encoding_rs::Encoding::for_label(b"iso-8859-1") .ok_or(anyhow::anyhow!( "unknown encoding ISO_8859_1" ))?; let (result, valid_to) = encoder.decode_with_bom_removal(&xml); eprintln!("{valid_to}"); s = result.replace("ISO-8859-1", "UTF-8"); std::fs::write(xml_file.path(), s.as_bytes())?; is_valid_utf_8 = false } } _ => break, }, Err(err) => { eprintln!("{err:#?}"); std::process::exit(1); } _ => { break; } } } if is_valid_utf_8 { s = String::from_utf8(rdr.into_inner().into_inner())?; } let result: cl::parser::input::CodeList = from_str(&s).expect("unable to deserialize xml"); let parsed: CodeList = result.into(); if !parsed.values.is_empty() { let json_obj = serde_json::to_string(&parsed)?; json_content.push(json_obj); items.push(parsed); } } } std::fs::write( &readme_file, &write_readme(&items, version.to_str().unwrap())?, )?; DataSetFactory::new( PathBuf::from(std::env::var("OUT_DIR").expect("OUT_DIR variable")) .join("data") .join(version), )? .create::(items)? .save()?; std::fs::write( &json_file, format!("[\n {}\n]", json_content.join(",\n ")), )?; } Ok(()) }