//! Rust library implementing a custom text generation/templating system. Genex //! is similar to [Tracery](https://tracery.io), but with some extra //! functionality around using external data. //! //! # Usage //! //! First create a grammar, then generate an expansion or multiple expansions //! from it. //! //! ```rust //! use std::collections::HashSet; //! use std::str::FromStr; //! use maplit::hashmap; //! use genex::Grammar; //! //! let grammar = Grammar::from_str( //! r#" //! RULES: //! top = The #action|ed# #object|a#?:[ with gusto] in . //! adj = [glistening|#adj#] //! noun = key //! place = [the #room#|#city#] //! //! WEIGHTS: //! room = 2 //! city = 1 //! "#, //! ) //! .unwrap(); //! //! let data = hashmap! { //! "action".to_string() => "pick".to_string(), //! "object".to_string() => "lizard".to_string(), //! "room".to_string() => "kitchen".to_string(), //! "city".to_string() => "New York".to_string(), //! }; //! //! // Now we find the top-scoring expansion. The score is the sum of the //! // weights of all variables used in an expansion. We know that the top //! // scoring expansion is going to end with "the kitchen" because we gave //! // `room` a higher weight than `city`. //! //! let best_expansion = grammar.generate("top", &data).unwrap().unwrap(); //! //! assert_eq!( //! best_expansion, //! "The glistening key picked a lizard in the kitchen.".to_string() //! ); //! //! // Now get all possible expansions: //! //! let all_expansions = grammar.generate_all("top", &data).unwrap(); //! //! assert_eq!( //! HashSet::<_>::from_iter(all_expansions), //! HashSet::<_>::from_iter(vec![ //! "The glistening key picked a lizard in New York.".to_string(), //! "The glistening key picked a lizard with gusto in New York.".to_string(), //! "The glistening key picked a lizard with gusto in the kitchen.".to_string(), //! "The glistening key picked a lizard in the kitchen.".to_string(), //! ]) //! ); //! ``` //! //! # Features //! //! Genex tries to make it easy to generate text based on varying amounts of //! external data. For example you can write a single expansion grammar that //! works when all you know is the name of an object, but uses the additional //! information if you know the object's size, location, color, or other //! qualities. //! //! The default behavior is for genex to try to find an expansion that uses the //! most external data possible, but by changing the weights assigned to //! variables you can prioritize which variables are used, even prioritizing the //! use of a single important variable over the use of multiple, less important //! variables. //! //! # Grammar syntax //! //! ## Rules //! //! "`RULES:`" indicates the rules section of the grammar. Rules are defined by //! a left-hand side (LHS) and a right-hand side (RHS). The LHS is the name of //! the rule. The RHS is a sequence of terms. //! //! Terms: //! * Sequence: `[term1 term2 ...]` //! * Choice: `[term1|term2|...]` (You can put a newline after a `|` character.) //! * Optional: `?:[term1 term2 ...]` //! * Variable: `#variable#` or `#variable|modifier#` //! * Non-terminal: `` //! * Plain text: `I am some plain text. I hope I get expanded.` //! //! ## Weights //! //! "`WEIGHTS:`" indicates the weights section of the grammar. Weights are of //! the form <_rule-name_> = <_number_>. //! //! ## Modifiers //! //! Modifiers are used to transform variable values during expansion. //! //! Modifiers: //! * `capitalize`: Capitalizes the first letter of the value. //! * `capitalizeAll`: Capitalizes the first letter of each word in the value. //! * `inQuotes`: Surrounds the value with double quotes. //! * `comma`: Adds a comma after the value, if it doesn't already end with punctuation. //! * `s`: Pluralizes the value. //! * `a`: Prefixes the value with an "a"/"an" article as appropriate. //! * `ed`: Changes the first word of the value to be past tense. //! pub mod error; mod modifiers; mod parser; use std::{collections::HashMap, rc::Rc, str::FromStr}; pub use crate::error::Error; use itertools::Itertools; use ordered_float::OrderedFloat; #[macro_use] extern crate lazy_static; /// A convenience type for a `Result` of `T` or [`Error`] /// /// [`Error`]: enum.Error.html pub type Result = ::std::result::Result; #[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] struct Expansion { varrefs: Vec, text: String, } impl Expansion { fn concat(self, expansion: Expansion) -> Self { let mut varrefs = self.varrefs.clone(); varrefs.extend(expansion.varrefs); let mut text = self.text; text.push_str(&expansion.text); Expansion { varrefs, text } } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] struct VarRef { var: String, modifier: Option, } impl VarRef { #[allow(dead_code)] fn with_variable(var: &str) -> Self { VarRef { var: var.to_string(), modifier: None, } } #[allow(dead_code)] fn with_variable_and_modifier(var: &str, modifier: &str) -> Self { VarRef { var: var.to_string(), modifier: Some(modifier.to_string()), } } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] enum Node { Sequence(Vec), Optional(Box), Choice(Vec), Text(String), VarRef(VarRef), NonTerminal(String), } impl Node { fn expand(&self, grammar: &Grammar, data: &HashMap) -> Result> { match self { Node::Text(text) => Ok(vec![Expansion { varrefs: vec![], text: text.clone(), }]), Node::VarRef(var) => match data.get(&var.var) { Some(value) => { let text = match &var.modifier { Some(modifier) => match grammar.get_modifier(modifier) { Some(modifier) => Ok(modifier(value)), None => Err(Error::UnknownModifierError(modifier.to_string())), }, None => Ok(value.clone()), }?; Ok(vec![Expansion { varrefs: vec![var.var.clone()], text, }]) } None => Ok(vec![]), }, Node::NonTerminal(lhs) => match grammar.rules.get(lhs) { Some(rhs) => rhs.expand(grammar, data), None => Err(Error::UnknownNonTerminalError(lhs.clone())), }, Node::Sequence(nodes) => { let x: Vec> = nodes .iter() .map(|n| n.expand(grammar, data)) .collect::>>()?; let y: Vec = x .iter() .multi_cartesian_product() .map(|c| { c.into_iter() .fold(Expansion::default(), |a, b| a.concat(b.clone())) }) .collect(); Ok(y) } Node::Optional(node) => { let mut expansions = node.expand(grammar, data)?; expansions.push(Expansion::default()); Ok(expansions) } Node::Choice(nodes) => { let expansions: Vec = nodes .iter() // See https://stackoverflow.com/a/59852696/122762, "How to // handle Result in flat_map" .map(|n| n.expand(grammar, data)) .flat_map(|result| match result { Ok(vec) => vec.into_iter().map(Ok).collect(), Err(e) => vec![Err(e)], }) .collect::>>()?; Ok(expansions) } } } } impl ToString for Node { fn to_string(&self) -> String { match self { Node::Text(text) => text.to_string(), Node::Sequence(children) => { format!("[{}]", children.iter().map(|n| n.to_string()).join("")) } Node::VarRef(var) => match &var.modifier { Some(modifier) => format!("#{}|{}#", var.var, modifier), None => format!("#{}#", var.var), }, Node::NonTerminal(id) => format!("<{}>", id), Node::Optional(ref node) => format!("?:[{}]", node.to_string()), Node::Choice(nodes) => { format!("[{}]", nodes.iter().map(|n| n.to_string()).join("|")) } } } } /// A grammar is a set of expansion rules. #[derive(Clone)] pub struct Grammar { rules: HashMap, modifiers: HashMap String>>, default_weights: HashMap, } impl Grammar { fn new() -> Grammar { Grammar { rules: HashMap::new(), modifiers: HashMap::new(), default_weights: HashMap::new(), } } fn add_rule(&mut self, name: &str, node: Node) { self.rules.insert(name.to_string(), node); } fn get_rule(&self, name: &str) -> Option<&Node> { self.rules.get(name) } fn get_modifier(&self, modifier: &str) -> Option<&dyn Fn(&str) -> String> { self.modifiers.get(modifier).map(|x| x.as_ref()) } /// Returns the top-scoring expansion of the given rule, using the supplied /// data. pub fn generate(&self, name: &str, data: &HashMap) -> Result> { self.generate_with_weights(name, data, &self.default_weights) } /// Generates all possible expansions of the given rule, using the supplied /// data. /// /// Returns expansions in descending order by score. pub fn generate_all(&self, name: &str, data: &HashMap) -> Result> { self.generate_all_with_weights(name, data, &self.default_weights) } /// Generates the top-scoring expansion of the given rule, using the /// supplied data and weights. pub fn generate_with_weights( &self, name: &str, data: &HashMap, weights: &HashMap, ) -> Result> { let node = self.get_rule(name).unwrap(); let mut expansions = node.expand(self, data)?; expansions.sort_by_cached_key(|e| OrderedFloat(score_by_varref_weights(e, weights))); Ok(expansions.last().map(|e| e.text.clone())) } /// Generates all possible expansions of the given rule, using the supplied /// data and weights. /// /// Returns expansions in descending order by score. pub fn generate_all_with_weights( &self, name: &str, data: &HashMap, weights: &HashMap, ) -> Result> { let node = self .get_rule(name) .ok_or_else(|| Error::UnknownNonTerminalError(name.to_string()))?; let mut expansions = node.expand(self, data)?; expansions.sort_by_cached_key(|e| OrderedFloat(score_by_varref_weights(e, weights))); Ok(expansions.into_iter().rev().map(|e| e.text).collect()) } } fn score_by_varref_weights(expansion: &Expansion, weights: &HashMap) -> f64 { expansion .varrefs .iter() .map(|varref| weights.get(varref).unwrap_or(&1.0)) .sum() } impl Default for Grammar { fn default() -> Self { let mut grammar = Grammar::new(); grammar.modifiers = modifiers::get_default_modifiers(); grammar } } impl ToString for Grammar { fn to_string(&self) -> String { let mut s = String::new(); for (id, node) in &self.rules { // If the RHS is a sequence, we take advantage of the fact that // RHSes are an implicit sequence, and do not print the brackets // around it. match node { Node::Sequence(children) => { s.push_str(&format!( "{} = {}\n", id, children.iter().map(|n| n.to_string()).join("") )); } _ => { s.push_str(&format!("{} = {}\n", id, node.to_string())); } } } s } } impl FromStr for Grammar { type Err = Error; fn from_str(s: &str) -> Result { let mut grammar = parser::parse_grammar(s)?; grammar.modifiers = modifiers::get_default_modifiers(); Ok(grammar) } } #[cfg(test)] mod tests { use std::collections::HashSet; use super::*; use maplit::hashmap; fn grammar_and_data() -> (Grammar, HashMap) { let mut grammar = Grammar::default(); grammar.add_rule( "location", Node::VarRef(VarRef::with_variable_and_modifier("city", "capitalize")), ); let data = hashmap! { "name".to_string() => "John".to_string(), "city".to_string() => "london".to_string(), }; (grammar, data) } #[test] fn test_expand_text() { let (grammar, data) = grammar_and_data(); let node = Node::Text("hello".to_string()); let expansions = node.expand(&grammar, &data).unwrap(); assert_eq!( expansions, vec![Expansion { varrefs: vec![], text: "hello".to_string(), }] ); } #[test] fn test_expand_varref() { let (grammar, data) = grammar_and_data(); let node = Node::VarRef(VarRef::with_variable("name")); let expansions = node.expand(&grammar, &data).unwrap(); assert_eq!( expansions, vec![Expansion { varrefs: vec!["name".to_string()], text: "John".to_string(), }] ); } #[test] fn test_expand_nonterminal() { let (grammar, data) = grammar_and_data(); let node = Node::NonTerminal("location".to_string()); let expansions = node.expand(&grammar, &data).unwrap(); assert_eq!( expansions, vec![Expansion { varrefs: vec!["city".to_string()], text: "London".to_string(), }] ); } #[test] fn test_expand_sequence() { let (grammar, data) = grammar_and_data(); let c1 = Node::Text("in ".to_string()); let c2 = Node::NonTerminal("location".to_string()); let node = Node::Sequence(vec![c1, c2]); let expansions = node.expand(&grammar, &data).unwrap(); assert_eq!( expansions, vec![Expansion { varrefs: vec!["city".to_string()], text: "in London".to_string(), }] ); } #[test] fn test_expand_optional() { let (grammar, data) = grammar_and_data(); let hello = Node::Text("Hello ".to_string()); let dear = Node::Text("dear ".to_string()); let maybe_dear = Node::Optional(Box::new(dear)); let friend = Node::Text("friend".to_string()); let seq = Node::Sequence(vec![hello, maybe_dear, friend]); let expansions = seq.expand(&grammar, &data).unwrap(); assert_eq!( HashSet::<_>::from_iter(expansions), HashSet::from_iter(vec![ Expansion { varrefs: vec![], text: "Hello friend".to_string(), }, Expansion { varrefs: vec![], text: "Hello dear friend".to_string(), } ]) ); } #[test] fn test_expand_choice() { let (grammar, data) = grammar_and_data(); let snoopy = Node::Text("Snoopy".to_string()); let name = Node::VarRef(VarRef::with_variable("name")); let linus = Node::Text("Linus".to_string()); let choice = Node::Choice(vec![snoopy, name, linus]); let expansions = choice.expand(&grammar, &data).unwrap(); assert_eq!( HashSet::<_>::from_iter(expansions), HashSet::from_iter(vec![ Expansion { varrefs: vec![], text: "Snoopy".to_string(), }, Expansion { varrefs: vec!["name".to_string()], text: "John".to_string(), }, Expansion { varrefs: vec![], text: "Linus".to_string(), }, ]) ); } #[test] fn test_to_string() { let mut grammar = Grammar::default(); grammar.add_rule( "top", Node::Sequence(vec![ Node::Text("hi ".to_string()), Node::VarRef(VarRef::with_variable("name")), Node::Text(" in ".to_string()), Node::NonTerminal("location".to_string()), ]), ); grammar.add_rule( "location", Node::Sequence(vec![ Node::Text("city of ".to_string()), Node::VarRef(VarRef::with_variable("city")), ]), ); assert_eq!( HashSet::<_>::from_iter(grammar.to_string().split('\n').filter(|s| !s.is_empty())), HashSet::from_iter(vec![ "top = hi #name# in ", "location = city of #city#", ]) ); } #[test] fn test_generate() { let grammar = Grammar::from_str( r#" top = Hi ?:[, my dear #gender#,] in . name = #name# location = [city of #city#|#city# in #county# county] "#, ) .unwrap(); let data = hashmap! { "name".to_string() => "John".to_string(), "city".to_string() => "Janesville".to_string(), "county".to_string() => "Rock".to_string(), }; let r = grammar.generate("top", &data).unwrap().unwrap(); assert_eq!(r, "Hi John in Janesville in Rock county."); let exps = HashSet::<_>::from_iter(grammar.generate_all("top", &data).unwrap()); assert_eq!( exps, HashSet::from_iter(vec![ "Hi John in Janesville in Rock county.".to_string(), "Hi John in city of Janesville.".to_string(), ]) ); } }