//! Rustlr is an LR-style parser generator for Rust. Advanced features //! include: //! 1. Option to automatically generate the AST datatypes and semantic actions, with manual overrides possible. Rustlr's grammar format contains a sublanguage //! that controls how ASTS are created, so that the generated types do //! not necessarily reflect the format of the grammar. //! 2. Option to use [bumpalo](https://docs.rs/bumpalo/latest/bumpalo/index.html) to create //! ASTS types that enable *nested* pattern atching against recursive types. //! //! 3. Recognizes regex-style operators `*`, `+` and `?`, which simplify //! the writing of grammars and allow better ASTs to be created. //! 4. An experimental feature that recognizes *Selective Marcus-Leermakers* //! grammars. This is a class of unambiguous grammars that's //! larger than traditional LR grammars. They are especially helpful //! in avoiding conflicts when new production rules are added to a grammar. //! 5. The ability to train the parser interactively for better error reporting //! 6. Also generates parsers for F# and other .Net languages //! //! A **[tutorial]()** //! is separately available that will explain the //! format of grammars and how to generate and deploy parsers for several //! examples. The documentation found here should be used as a technical //! reference. //! //! Rustlr should be installed as an executable (**cargo install rustlr**), //! although parser generation can also be invoked with the [rustle] function. //! Many of the items exported by this crate are only required by the parsers //! that are generated, and are not intended to be used in other programs. //! However, rustlr uses traits and trait objects to loosely couple the //! various components of the runtime parser so that custom interfaces, such as //! those for graphical IDEs, can built around a basic [ZCParser::parse_core] //! function. //! #![allow(dead_code)] #![allow(unused_variables)] #![allow(non_snake_case)] #![allow(non_camel_case_types)] #![allow(unused_parens)] #![allow(unused_assignments)] #![allow(unused_doc_comments)] #![allow(unused_imports)] #[cfg(feature = "generator")] mod generator_lib; #[cfg(feature = "generator")] pub use generator_lib::*; pub mod zc_parser; use zc_parser::*; mod grammar_processor; use grammar_processor::*; mod lr_statemachine; use lr_statemachine::*; pub mod lexer_interface; pub use lexer_interface::*; pub mod runtime_parser; use runtime_parser::*; mod augmenter; use augmenter::*; pub mod generic_absyn; pub use generic_absyn::*; pub mod zc_parser; use zc_parser::*; mod parser_writer; mod sd_parserwriter; mod fs_parserwriter; mod ast_writer; mod fs_astwriter; mod bumpast_writer; mod lalr_statemachine; mod selmlk; // experimental //mod logos_lexer; mod yacc_ast; mod yaccparser; use lalr_statemachine::LALRMachine; use selmlk::{MLStatemachine}; pub use lr_statemachine::{Stateaction,decode_action}; pub use runtime_parser::{RuntimeParser,RProduction}; pub use zc_parser::{ZCParser,ZCRProduction}; //pub use enhancements::{ParseValue,ParseResult,Enhanced_Lexer}; pub const VERSION:&'static str = "0.4.13"; /// This function can be called from within Rust to generate a parser/lexer. /// It takes the same arguments as the rustlr command-line application. /// Furthermore, if given the `-trace 0` option, no output will be /// sent to stdout or stderr. Instead, a log of events is recorded and /// is returned. An `Ok(_)` result indicates that some parser was created /// and an `Err(_)` result indicates failure. /// Example: /// ```ignore /// let report = rustlr::generate("simplecalc.grammar -o src/main.rs -trace 0"); /// ``` pub fn generate(argv:&str) -> Result { let asplit:Vec<_> = argv.split_whitespace().collect(); rustle1(&asplit) } /// This function is retained for backwards compatiblity. It is recommended /// to call [generate] instead. pub fn rustle(args:&Vec) -> Result // called from main { let mut args2 = Vec::new(); for s in args { args2.push(&s[..]); } rustle1(&args2[..]) } fn rustle1(args:&[&str]) -> Result // called from main { let argc = args.len(); if argc<2 { //eprintln!("Must give path of .grammar file"); return; return Err("Must give path of .grammar file".to_owned()); } let mut filepath = ""; let mut parserfile = String::from(""); // -o target let mut lalr = false; // changed from false in version 0.2.0 let mut newlalr = true; let mut tracelev:usize = 1; // trace-level let mut verbose = false; let mut zc = true; let mut genlex = false; let mut genabsyn = false; let mut lrsd = false; let mut lrsdmaxk:usize = selmlk::MAXK; let mut regenerate = false; let mut mode = 0; let mut conv_yacc = false; let mut argi = 1; // next argument position while argi {filepath = args[argi];}, filen if filen.ends_with(".y") => { filepath=args[argi]; conv_yacc=true; break; }, "lr1" | "LR1" | "-lr1" => { lalr=false; newlalr=false; }, "lalr" | "LALR" | "-lalr" => {newlalr=true; }, "lalr1" | "LALR1" | "-lalr1" => {newlalr=true; }, "oldlalr" | "-oldlalr" | "-selML" => {newlalr=false; lalr=true;} "-lrsd" | "lrsd" => { newlalr=false; lalr=false; lrsd=true; if argi+1() { lrsdmaxk=mk; argi+=1; } // next arg is number }//if next arg exists }, "-regenerate" => { regenerate=true; }, "-fsharp" => {mode=1;}, "-trace" => { argi+=1; if argi() {tracelev=lv; } if tracelev>0 {println!("trace-level set to {}",tracelev);} } }, "verbose" | "-verbose" => { verbose=true; }, "-zc" | "zero_copy" => {zc=true;}, "genlex" | "-genlex" => {genlex=true; }, "-genabsyn" | "-ast" | "-auto" => {genabsyn = true; }, "-nozc" => {zc=false;}, "binary" | "-binary" => { verbose=false; }, "-o" => { argi+=1; if argi {}, }//match directive argi+=1; }//while there are command-line args if filepath.len()==0 { //eprintln!("Must give path of .grammar file or .y file to convert from"); return Err("Must give path of .grammar file or .y file to convert from".to_owned()); } if conv_yacc { yaccparser::convert_from_yacc(filepath); return Ok(String::new()); //return Ok(".y grammar converted to .grammar\n".to_owned()); } if zc && verbose { //eprintln!("verbose mode not compatible with -zc option"); return Err("verbose mode not compatible with -zc option".to_owned()); } if tracelev>0 && verbose {println!("verbose parsers should be used for diagnositic purposes and cannot be trained/augmented");} if tracelev>1 {println!("parsing grammar from {}",&filepath);} let mut grammar1 = Grammar::new(); grammar1.genlex = genlex; grammar1.genabsyn = genabsyn; grammar1.tracelev = tracelev; grammar1.mode = mode; // 0 for rust, 1 for fsharp let parsedok = grammar1.parse_grammar(filepath); // *** if !parsedok { //println!("\nFailed to process grammar"); return Err(format!("\nFailed to process grammar at {}",filepath)); } // Check grammar integrity: now done inside parse if grammar1.name.len()<2 { // derive grammar name from filepath let doti = if let Some(p)= filepath.rfind('.') {p} else {filepath.len()}; let mut slashi = if let Some(p) = filepath.rfind('/') {p+1} else {0}; if slashi==0 { slashi = if let Some(p) = filepath.rfind('\\') {p+1} else {0}; } grammar1.name = filepath[slashi..doti].to_string(); }// derive grammar name let gramname = grammar1.name.clone(); let pfsuffix = if mode==1 {"fs"} else {"rs"}; if grammar1.genabsyn { let mut slashpos = parserfile.rfind('/'); if let None = slashpos {slashpos = parserfile.rfind('\\');} let mut astpath = format!("{}_ast.{}",&gramname,pfsuffix); if let Some(pos) = slashpos { astpath=format!("{}{}",&parserfile[..pos+1],&astpath); } let wres; if mode==1 {wres = grammar1.write_fsast(&astpath); } else if !grammar1.bumpast { wres = grammar1.writeabsyn(&astpath); } else {wres = grammar1.write_bumpast(&astpath); } if !wres.is_ok() { //eprintln!("Failed to generate abstract syntax"); return Err("Failed to generate abstract syntax".to_owned()); } } grammar1.delay_transform(); // static delayed reduction markers if tracelev>2 {println!("computing Nullable set");} grammar1.compute_NullableRf(); if tracelev>2 {println!("computing First sets");} //grammar1.compute_FirstIM(); grammar1.compute_First(); let mut fsm0; if lrsd { grammar1.logprint(&format!("Generating Experimental LR-Selective Delay State Machine with Max Delay = {}",lrsdmaxk)); let mut lrsdfsm = MLStatemachine::new(grammar1); lrsdfsm.regenerate = regenerate; lrsdfsm.selml(lrsdmaxk); //fsm0 = lrsdfsm.to_statemachine(); if lrsdfsm.failed { //println!("NO PARSER GENERATED"); return; return Err("LR SELECTIVE DELAY FAILURE. NO PARSER GENERATED".to_owned()); } if !lrsdfsm.failed && lrsdfsm.regenerate { lrsdfsm.Gmr.logprint("Re-Generating LR(1) machine for transformed grammar..."); lrsd = false; fsm0 = Statemachine::new(lrsdfsm.Gmr); fsm0.lalr = false; fsm0.generatefsm(); //GENERATE THE FSM } else { fsm0 = lrsdfsm.to_statemachine(); } // but of course there will be more conflicts since there will be // more rules. The original rules that caused conflicts for LR are // still there?? } else // not lrsd if newlalr { // newlalr takes precedence over other flags grammar1.logprint("Generating LALR(1) state machine"); let mut lalrfsm = LALRMachine::new(grammar1); lalrfsm.generatefsm(); fsm0 = lalrfsm.to_statemachine(); } else { grammar1.logprint(&format!("Generating {} state machine for grammar {}...",if lalr {"older LALR"} else {"LR1"},&gramname)); fsm0 = Statemachine::new(grammar1); fsm0.lalr = lalr; if lalr {fsm0.Open = Vec::with_capacity(1024); } // important fsm0.generatefsm(); //GENERATE THE FSM } // old code if tracelev>2 && !newlalr && !lrsd { for state in &fsm0.States {printstate(state,&fsm0.Gmr);} } else if tracelev>1 && !newlalr && !lrsd { printstate(&fsm0.States[0],&fsm0.Gmr); }//print states if parserfile.len()<1 || parserfile.ends_with('/') || parserfile.ends_with('\\') {parserfile.push_str(&format!("{}parser.{}",&gramname,pfsuffix));} if fsm0.States.len()>65536 { return Err(format!("too many states: {} execeeds limit of 65536",fsm0.States.len())); } let write_result = if mode==1 { fsm0.writefsparser(&parserfile) } else if zc { // write zero-copy parser //fsm0.writezcparser(&parserfile) //fsm0.writelbaparser(&parserfile) if !lrsd {fsm0.writeenumparser(&parserfile)} else {fsm0.writelrsdparser(&parserfile)} } else { // non-zc, original before version 0.2.0 if verbose /*fsm0.States.len()<=16*/ {fsm0.write_verbose(&parserfile)} else {fsm0.writeparser(&parserfile)} }; // write_result = //if tracelev>0 && !lrsd {eprintln!("{} total states",fsm0.FSM.len());} fsm0.Gmr.logprint(&format!("{} total states",fsm0.FSM.len())); if let Ok(_) = write_result { fsm0.Gmr.logprint(&format!("Parser saved in {}",&parserfile)); } else if let Err(err) = write_result { return Err(format!("failed to write parser, likely due to invalid -o destination\n{:?}",err)); } let mut savedlog = String::new(); if tracelev==0 {fsm0.Gmr.swap_log(&mut savedlog);} Ok(savedlog) }//rustle