//A very basic xml grammar extern crate gramatica; use std::cmp::Ordering; use gramatica::{Associativity,EarleyKind,State,Parser,ParsingTablesTrait,AmbiguityInfo}; // see https://www.w3.org/People/Bos/meta-bnf // also http://cs.lmu.edu/~ray/notes/xmlgrammar/ use std::rc::Rc; //We define an auxiliar type to store XML elements #[derive(Clone,Debug,PartialEq)] struct XMLElement { name: String, attrs: Vec<(String,String)>, contents: Vec, } #[derive(Clone,Debug,PartialEq)] enum XMLContent { Element(XMLElement), Data(String), } // ---- Start of the grammar ---- re_terminal!(Space(String),"(\\s|\n)+"); re_terminal!(Ident(String),"[a-zA-Z\\x80-\\xff_][a-zA-Z0-9\\x80-\\xff_]*"); terminal LitStr(String) { fn _match(parser: &mut Parser, source:&str) -> Option<(usize,String)> { let mut ret=None; let mut characters=source.chars(); if (characters.next())!=(Some('"')) { } else { let mut size=1; let mut r=String::from("\""); while true { match characters.next() { None => break, Some('"') => { ret=(Some((size+1,r+&"\""))); break; }, Some('\\') => { match characters.next() { None => break, //Some(c) => r+='\\'+c, Some(c) => { r.push('\\'); r.push(c); } }; size+=2; }, Some(c) => { //r+=&String::from(c); r.push(c); size+=1; }, }; } } ret } } re_terminal!(CloseEmpty,"/>"); re_terminal!(BeginClose,""); re_terminal!(Other(char),"."); nonterminal Document(XMLElement) { (Element(ref elem)) => elem.clone(), } nonterminal Element(XMLElement) { (EmptyElemTag(ref name,ref attrs)) => XMLElement{name:name.clone(),attrs:attrs.clone(),contents:vec![]}, (STag(ref name, ref attrs),Content(ref content),ETag) => XMLElement{name:name.clone(),attrs:attrs.clone(),contents:content.clone()}, } nonterminal EmptyElemTag(String,Vec<(String,String)>) { (LT,Ident(ref name),Attributes(ref attrs),MaybeSpace,CloseEmpty) => (name.clone(),attrs.clone()), } nonterminal Attributes(Vec<(String,String)>) { () => vec![], (Attributes(ref attrs),Space,Attribute(ref a, ref b)) => { let mut new=(attrs.clone()); new.push((a.clone(),b.clone())); new }, } nonterminal Attribute(String,String) { (Ident(ref a),Equal,LitStr(ref b)) => (a.clone(),b.clone()), } nonterminal STag(String,Vec<(String,String)>) { (LT,Ident(ref name),Attributes(ref attrs),MaybeSpace,GT) => (name.clone(),attrs.clone()), } nonterminal ETag(String) { (BeginClose,Ident(ref s),MaybeSpace,GT) => s.clone(), } nonterminal Content(Vec) { (CharData(ref s)) => vec![XMLContent::Data(s.clone())], (CharData(ref s),Contents(ref list)) => { let mut new=vec![XMLContent::Data(s.clone())]; new.extend(list.iter().map(|x|x.clone())); new }, } nonterminal Contents(Vec) { () => vec![], (Contents(ref list),Element(ref elem),CharData(ref s)) => { let mut new=(list.clone()); new.push(XMLContent::Element(elem.clone())); if s!="" { new.push(XMLContent::Data(s.clone())); } new }, } nonterminal MaybeSpace { () => (), (Space) => (), } nonterminal CharData(String) { () => String::new(), (CharData(ref s),Space(ref o)) => format!("{}{}",s,o), (CharData(ref s),Ident(ref o)) => format!("{}{}",s,o), (CharData(ref s),Equal) => format!("{}=",s), (CharData(ref s),Other(o)) => format!("{}{}",s,o), } // ---- End of the grammar ---- use std::io::{BufRead,Read}; //As example, we parse stdin for a XML element fn main() { let stdin=std::io::stdin(); let mut buf=String::new(); stdin.lock().read_to_string(&mut buf); match Parser::::parse(&buf,None) { Err(x) => println!("error parsing: {:?}",x), Ok(x) => println!("parsed correctly: {:?}",x), }; }