use std::env; use std::fs::{self, File}; use std::io::{Write, BufReader}; use std::path::PathBuf; use std::collections::HashMap; use heck::{CamelCase, KebabCase}; use rio_api::{ parser::TriplesParser, model::{ Triple, Term, NamedOrBlankNode, NamedNode, BlankNode, Literal, }, }; use rio_turtle::{self, TurtleParser, TurtleError}; use serde::{Serialize, Deserialize}; use serde_json; static SCHEMA_LOCATION: &'static str = "./schema/om-2.0.ttl"; // ----------------------------------------------------------------------------- // Utils // ----------------------------------------------------------------------------- /// Makes it eas(ier) to write out code struct StringWriter { string: String, indent: usize, } impl StringWriter { fn new() -> Self { Self { string: String::from(""), indent: 0 } } fn write(&mut self, val: T) where T: Into { self.string.push_str(val.into().as_str()); } fn line(&mut self, val: T) where T: Into { let indent: String = (0..(self.indent * 4)).map(|_| " ").collect::>().concat(); self.write(&indent); self.write(val); self.nl(); } fn nl(&mut self) { self.write("\n"); } fn inc_indent(&mut self) { self.indent += 1; } fn dec_indent(&mut self) { if self.indent > 0 { self.indent -= 1; } } fn to_string(self) -> String { let Self { string: val, .. } = self; val } } // ----------------------------------------------------------------------------- // Parsing enums // ----------------------------------------------------------------------------- /// This (very important) enum translates between string ids and rust types, but /// also has a number of implementation functions that help us along the way. #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] enum DataType { #[serde(rename = "bool")] #[serde(alias = "http://www.w3.org/2001/XMLSchema#boolean")] Boolean, #[serde(rename = "f64")] #[serde(alias = "http://www.w3.org/2001/XMLSchema#double")] Double, #[serde(alias = "http://www.w3.org/2001/XMLSchema#string")] String, #[serde(rename = "dtype::NumericUnion")] #[serde(alias = "http://www.linkedmodel.org/schema/dtype#numericUnion")] NumericUnion, // catch-all type, mainly for things like om2 and stuff Literal(String), // used for post-processing mainly RangeEnum(String), } /// Helps us parse out what type of node we're dealing with when looping over /// our triples. #[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] enum NodeType { #[serde(rename = "http://www.w3.org/2002/07/owl#Ontology")] Ontology, #[serde(rename = "http://www.w3.org/2002/07/owl#Class")] StructOrEnum, #[serde(rename = "http://www.w3.org/2002/07/owl#ObjectProperty")] Field, #[serde(rename = "http://www.w3.org/2002/07/owl#NamedIndividual")] EnumVal, #[serde(rename = "http://www.w3.org/2002/07/owl#DatatypeProperty")] DataType, // for values we can't classify on the first round of parsing. in the case // of enums, a second type with a #vf:* id signifies the parent, which would // have the same effect as using `domain` Literal(String), } /// Encodes the various relationships used between our RDF nodes #[derive(Debug, PartialEq, Clone, Deserialize)] // aka predicate enum Relationship { #[serde(rename = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")] Type, #[serde(rename = "http://www.w3.org/2000/01/rdf-schema#domain")] Domain, #[serde(rename = "http://www.w3.org/2000/01/rdf-schema#range")] Range, #[serde(rename = "http://www.w3.org/2000/01/rdf-schema#label")] Label, #[serde(rename = "http://www.w3.org/2000/01/rdf-schema#comment")] Comment, #[serde(rename = "http://www.w3.org/2000/01/rdf-schema#subClassOf")] Subclass, #[serde(rename = "http://www.w3.org/2003/06/sw-vocab-status/ns#term_status")] Status, #[serde(rename = "http://www.w3.org/2002/07/owl#unionOf")] Union, #[serde(rename = "http://www.w3.org/2002/07/owl#oneOf")] OneOf, #[serde(rename = "http://www.w3.org/2002/07/owl#equivalentClass")] EquivalentClass, #[serde(rename = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first")] First, #[serde(rename = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest")] Rest, #[serde(rename = "http://www.w3.org/2002/07/owl#propertyChainAxiom")] PropertyChainAxiom, #[serde(rename = "http://purl.org/dc/elements/1.1/creator")] Creator, #[serde(rename = "http://purl.org/dc/elements/1.1/date")] Date, #[serde(rename = "http://purl.org/dc/elements/1.1/identifier")] Identifier, #[serde(rename = "http://purl.org/dc/elements/1.1/title")] Title, #[serde(rename = "http://purl.org/ontology/bibo/uri")] Uri, // for values we can't classify on the first round of parsing mainly other // vf:* types that aren't in the class space yet Literal(String), } /// Helps us convert a string to an enum, as long as the enum has the Literal /// value to act as a catch-all. Maybe this could be written as a function, but /// I will be damned if I write a bunch of stupid traits/impls to avoid a four- /// line macro. macro_rules! to_enum { ($enumty:ty, $val:expr) => { match serde_json::from_str::<$enumty>(&format!(r#""{}""#, $val)) { Ok(x) => x, Err(_) => <$enumty>::Literal($val.into()) } } } // ----------------------------------------------------------------------------- // Node struct for storing and grouping parsed values // ----------------------------------------------------------------------------- #[derive(Debug, Default, PartialEq, Clone, Serialize)] struct Node { id: Option, ty: Option, label: Option, comment: Option, status: Option, // vec (good for processing unions) domain: Vec, range: Vec, subclass: Vec, rel_pairs: Vec<(String, String)>, // filled in on our second pass subnodes: Vec>, // (TypeName, Namespace) custom: Option<(String, String)>, } impl Node { fn new(id: &str) -> Self { let mut node = Self::default(); node.id = Some(id.to_string()); node } fn id_noprefix(&self) -> String { self.id.as_ref().unwrap().trim_start_matches("http://www.ontology-of-units-of-measure.org/resource/om-2/").to_string() } fn typename(&self) -> String { let id_noprefix = self.id_noprefix(); if id_noprefix.starts_with("_") { match id_noprefix.as_str() { "_0-100" => "ZeroToOneHundred", "_0-5" => "ZeroToFive", "_1-0" => "OneDividedByZero", "_1-10" => "OneToTen", "_1-5" => "OneToFive", "_1000ColonyFormingUnitPerMillilitre" => "OneThousandColonyFormingUnitPerMillilitre", _ => panic!("Node.typename() -- unknown underscore-prefixed id encountered: {}", id_noprefix), }.to_string() } else if id_noprefix.is_ascii() { id_noprefix.to_camel_case() } else { // going to have to hardcode this shit match id_noprefix.as_str() { "röntgen" => "Rontgen", _ => panic!("Node.typename() -- unknown non-ascii identifier encountered: {}", id_noprefix), }.to_string() } } fn ser_name(&self) -> String { if let Some(label) = self.label.as_ref() { label.to_string() } else { self.id_noprefix() } } } // ----------------------------------------------------------------------------- // Parsing logic // ----------------------------------------------------------------------------- /// Parses our heroic .ttl file and turns all the triples into a namespace -> /// struct/enum -> field hierarchy (sorry, anarchists) fn gen_schema() -> String { let mut out = StringWriter::new(); let file = fs::File::open(SCHEMA_LOCATION).expect("error opening schema file"); let bufread = BufReader::new(file); // our saved nodes from the first round of parsing let mut nodemap: HashMap = HashMap::new(); // first pass! we loop over the parsed turtle file and group all of our // triples by their ids effectively. this gives us a more structured set of // data we can use to make our graph let mut cur_node_id: String = "".to_string(); let mut cur_list_id: Option = None; let mut cur_list: Vec = vec![]; TurtleParser::new(bufread, "file:vf.ttl").unwrap().parse_all(&mut |t| -> Result<(), TurtleError> { // destructure our triple let Triple { subject, predicate: predicate_named, object } = t; let NamedNode { iri: predicate } = predicate_named; // grab our id, but check if the node is named or blank let (id, blank): (String, bool) = match subject { NamedOrBlankNode::NamedNode(NamedNode { iri }) => (iri.into(), false), NamedOrBlankNode::BlankNode(BlankNode { id }) => (id.into(), true), }; // destructure our object a bit let blank_id: Option = if id != "" && blank { Some(id.clone()) } else { None }; let (obj_id, obj_val, _obj_blank): (Option, Option, bool) = match object.clone() { Term::Literal(Literal::Simple { value: string }) => (None, Some(string.into()), false), // only grab english because i am super racist Term::Literal(Literal::LanguageTaggedString { value: string, language: lang }) if lang == "en" => (None, Some(string.into()), false), Term::NamedNode(NamedNode { iri }) => (Some(iri.into()), None, false), Term::BlankNode(BlankNode { id }) => (Some(id.into()), None, true), // kinda don't care... _ => return Ok(()), }; // if we have a named node, set the current id as id if !blank { cur_node_id = id.clone(); } // pull out our current node, or create if needed let cur_node = nodemap.entry(cur_node_id.clone()).or_insert(Node::new(&cur_node_id)); // we can skip parsing the ontology record itself if cur_node.id == Some("http://www.ontology-of-units-of-measure.org/resource/om-2/".to_string()) { return Ok(()); } // process the relationship let rel = to_enum!(Relationship, predicate); match rel { Relationship::Type => { let ty = to_enum!(NodeType, obj_id.as_ref().unwrap()); if cur_node.ty.is_some() { cur_node.domain.push(obj_id.unwrap()); } else { cur_node.ty = Some(ty); } } Relationship::Domain => { if obj_id.is_some() && obj_id == cur_list_id { // really ties the list together cur_node.domain = cur_list.clone(); cur_list = vec![]; } else if let Some(type_id) = obj_id { cur_node.domain = vec![type_id]; } } Relationship::Range => { if obj_id.is_some() && obj_id == cur_list_id { // really ties the list together cur_node.range = cur_list.clone(); cur_list = vec![]; } else if let Some(type_id) = obj_id { cur_node.range = vec![type_id]; } } Relationship::Subclass => { if obj_id.is_some() && obj_id == cur_list_id { // really ties the list together cur_node.subclass = cur_list.clone(); cur_list = vec![]; } else if let Some(type_id) = obj_id { cur_node.subclass = vec![type_id]; } } Relationship::Label => { cur_node.label = obj_val; } Relationship::Comment => { cur_node.comment = obj_val; } Relationship::Status => { cur_node.status = obj_val; } Relationship::Union | Relationship::OneOf | Relationship::EquivalentClass => { cur_list_id = blank_id; cur_list = vec![]; } Relationship::First => { cur_list.push(obj_id.unwrap()); } // note that we *could* implement "correct" first/rest parsing, but // because our triples are *in order* we don't really need to. so, // fuck off, Rest... Relationship::Rest => {} Relationship::PropertyChainAxiom => {} Relationship::Creator => {} Relationship::Date => {} Relationship::Identifier => {} Relationship::Title => {} Relationship::Uri => {} Relationship::Literal(val) => { cur_node.rel_pairs.push((val, obj_id.or(obj_val).unwrap())); } } Ok(()) }).expect("error parsing"); let unit_node = nodemap.get("http://www.ontology-of-units-of-measure.org/resource/om-2/Unit").unwrap().clone(); let mut units: Vec = Vec::new(); let unit_classes = vec![ "CompoundUnit", "LengthUnit", "PrefixedUnit", "TemperatureUnit", "Unit", "UnitDivision", "UnitExponentiation", "UnitMultiplication", ]; for (_, node) in nodemap { let mut node_saved = false; for class in &unit_classes { if node.domain.contains(&format!("http://www.ontology-of-units-of-measure.org/resource/om-2/{}", class)) { units.push(node.clone()); node_saved = true; break; } if node.ty == Some(NodeType::Literal(format!("http://www.ontology-of-units-of-measure.org/resource/om-2/{}", class))) { units.push(node.clone()); node_saved = true; break; } } if node_saved { continue; } } units.sort_by_key(|x| x.typename()); out.line(format!("/// {}", unit_node.comment.as_ref().unwrap())); out.line("///"); out.line(format!("/// ID: {}", unit_node.id.as_ref().unwrap())); out.line("#[derive(Debug, PartialEq, Clone)]"); out.line(r#"#[cfg_attr(feature = "with_serde", derive(Serialize, Deserialize))]"#); out.line("pub enum Unit {"); out.inc_indent(); for unit in &units { let typename = unit.typename(); let label = unit.ser_name(); let alias = label.to_kebab_case(); if let Some(comment) = unit.comment.as_ref() { out.line(format!("/// {}", comment)); } out.line(format!(r#"#[cfg_attr(feature = "with_serde", serde(rename = "{}"))]"#, label)); if alias != label { out.line(format!(r#"#[cfg_attr(feature = "with_serde", serde(alias = "{}"))]"#, alias)); } out.line(format!("{},", typename.to_camel_case())); } out.dec_indent(); out.line("}"); out.nl(); out.line("impl Unit {"); out.inc_indent(); out.line("pub fn label(&self) -> Option {"); out.inc_indent(); out.line("match self {"); out.inc_indent(); for unit in &units { let typename = unit.typename(); if let Some(label) = unit.label.as_ref() { out.line(format!(r#"Unit::{} => Some(String::from(r#"{}"{})),"#, typename, label, "#")); } else { out.line(format!(r#"Unit::{} => None,"#, typename)); } } out.dec_indent(); out.line("}"); out.dec_indent(); out.line("}"); out.nl(); out.line("pub fn symbol(&self) -> Option {"); out.inc_indent(); out.line("match self {"); out.inc_indent(); for unit in &units { let typename = unit.typename(); let symbol = unit.rel_pairs.iter() .filter(|x| x.0 == "http://www.ontology-of-units-of-measure.org/resource/om-2/symbol") .map(|x| x.1.clone()) .collect::>(); if symbol.len() > 0 { out.line(format!(r#"Unit::{} => Some(String::from(r#"{}"{})),"#, typename, symbol[0], "#")); } else { out.line(format!(r#"Unit::{} => None,"#, typename)); } } out.dec_indent(); out.line("}"); out.dec_indent(); out.line("}"); out.dec_indent(); out.line("}"); out.nl(); out.line("#[cfg(test)]"); //out.line("#[cfg(with_serde)]"); out.line(r#"#[cfg(feature = "with_serde")]"#); out.line("mod test {"); out.inc_indent(); out.line("use super::*;"); out.line("use serde_json;"); out.line("#[test]"); out.line("fn serde() {"); out.inc_indent(); for i in 0..units.len() { // fuck it, going to just test a sample. full tests keep crashing my // compiler... =[ if i % 5 != 0 { continue; } let unit = &units[i]; let typename = unit.typename().to_camel_case(); let label = unit.ser_name(); let alias = label.to_kebab_case(); out.line(format!(r#"assert_eq!(Unit::{}, serde_json::from_str(r#""{}""{}).unwrap());"#, typename, label, "#")); if alias != label { out.line(format!(r#"assert_eq!(Unit::{}, serde_json::from_str(r#""{}""{}).unwrap());"#, typename, alias, "#")); } out.line(format!(r#"assert_eq!(serde_json::to_string(&Unit::{}).unwrap(), r#""{}""{});"#, typename, label, "#")); } out.dec_indent(); out.line("}"); out.dec_indent(); out.line("}"); out.to_string() } // ----------------------------------------------------------------------------- // Output // ----------------------------------------------------------------------------- /// Prints the standard header for our generated output fn print_header() -> String { let mut header = String::new(); header.push_str(r#"#[cfg(feature = "with_serde")]"#); header.push_str("use serde_derive::{Serialize, Deserialize};\n"); header } /// Given a dump of generated code, save it to the output dir fn save(contents: String) { // write it all out to our src/gen.rs file, included by lib let out_dir = env::var("OUT_DIR").unwrap(); let mut dest_path = PathBuf::from(&out_dir); dest_path.push("om_gen.rs"); let mut f = File::create(&dest_path).unwrap(); f.write_all(contents.as_bytes()).unwrap(); } fn main() { let header = print_header(); let contents = gen_schema(); save(format!("{}\n{}", header, contents)); }