//! Some common and useful nom recipes, shared by all other modules. use nom::{ branch::alt, bytes::complete::{tag, take_until}, character::complete::{char, multispace1}, combinator::{cut, opt, value}, error::{ErrorKind as NomErrorKind, ParseError}, multi::many0, sequence::tuple, Parser, }; use super::error::{Error, ErrorKind}; use super::types::{Input, ParseResult}; /// Right trim after the given parser. pub(crate) fn rtrim<'a, F, O>(mut inner: F) -> impl FnMut(Input<'a>) -> ParseResult<'a, O> where F: Parser, O, Error> + 'a, { move |input| { let (mut input, output) = inner.parse(input)?; input.save_cursor_before_rtrim(); let (input, _) = opt(many0(alt(( multiline_comment, singleline_comment, value((), multispace1), )))) .parse(input)?; Ok((input, output)) } } /// Left trim the input. pub(crate) fn ltrim(mut input: Input) -> ParseResult<()> { loop { match alt(( multiline_comment, singleline_comment, value((), multispace1), ))(input) { Ok((i, ())) => input = i, Err(nom::Err::Error(_)) => return Ok((input, ())), err @ Err(_) => return err, } } } /// Accepts a first parser, only if the second one does not match afterwards pub(crate) fn not_followed<'a, F, G, OF, OG>( mut f: F, mut g: G, ) -> impl FnMut(Input<'a>) -> ParseResult<'a, OF> where F: Parser, OF, Error> + 'a, G: Parser, OG, Error> + 'a, { move |input| { let (input, output) = f.parse(input)?; if g.parse(input).is_ok() { return Err(nom::Err::Error(Error::from_error_kind( input, NomErrorKind::IsNot, ))); } Ok((input, output)) } } /// Accepts a single character if the passed function returns true on it. pub(crate) fn take_one(f: F) -> impl for<'a> Fn(Input<'a>) -> ParseResult<'a, char> where F: Fn(char) -> bool, { move |mut input| match input.cursor().chars().next().map(|c| (c, f(c))) { Some((c, true)) => { input.advance(c.len_utf8()); Ok((input, c)) } _ => Err(nom::Err::Error(Error::from_char(input, '0'))), } } /// Recognize a textual tag. /// /// This is the same as [`nom::bytes::complete::tag`], but ensures the /// following character is not alphanumeric. /// This avoids recognizing a tag inside a word, for example, recognizing /// `foo` in `foobar`. pub(crate) fn textual_tag( tag: &'static str, ) -> impl for<'a> Fn(Input<'a>) -> ParseResult<'a, &'static str> { move |input: Input| { if let Some(input) = input.strip_prefix(tag) { match input.cursor().chars().next() { Some(c) if c.is_alphanumeric() => Err(nom::Err::Error(Error::from_error_kind( input, NomErrorKind::Tag, ))), _ => Ok((input, tag)), } } else { Err(nom::Err::Error(Error::from_error_kind( input, NomErrorKind::Tag, ))) } } } /// Parse a C-style /* ... */ comment. /// /// Equivalent to the `comment` state in libyara. fn multiline_comment(input: Input) -> ParseResult<()> { value( (), tuple((tag("/*"), cut(take_until("*/")), cut(tag("*/")))), )(input) } /// Parse single line // ... comments. fn singleline_comment(input: Input) -> ParseResult<()> { value( (), tuple((tag("//"), cut(take_until("\n")), cut(char('\n')))), )(input) } /// Equivalent to [`nom::combinator::map_res`] but expects an /// [`super::types::ErrorKind`] type of error. /// /// This allows using the starting input to generate a proper span /// for the error. pub(crate) fn map_res<'a, O1, O2, F, G>( mut parser: F, mut f: G, ) -> impl FnMut(Input<'a>) -> ParseResult where F: Parser, O1, Error>, G: FnMut(O1) -> Result, { move |input: Input| { let start = input.pos(); let (input, o1) = parser.parse(input)?; match f(o1) { Ok(o2) => Ok((input, o2)), Err(kind) => Err(nom::Err::Failure(Error::new( input.get_span_from(start), kind, ))), } } } #[cfg(test)] mod tests { use super::*; use crate::test_helpers::{parse, parse_err}; fn dummy_parser(input: Input) -> ParseResult { char('-')(input) } #[test] fn test_rtrim() { parse(dummy_parser, "- b", " b", '-'); parse(rtrim(dummy_parser), "- b", "b", '-'); parse(rtrim(dummy_parser), "-/* */ b", "b", '-'); parse(rtrim(dummy_parser), "- /* */b", "b", '-'); parse(rtrim(dummy_parser), "- /* */ /* */ b", "b", '-'); parse(rtrim(dummy_parser), "- // /* foo\n /**/ b", "b", '-'); } #[test] fn test_ltrim() { parse(ltrim, " - b", "- b", ()); parse(ltrim, "/* */ - b", "- b", ()); parse(ltrim, " /* */- b", "- b", ()); parse(ltrim, "/* */ /* */ b", "b", ()); parse(ltrim, "// /* foo\n /**/ ", "", ()); parse_err(ltrim, "/*"); parse_err(ltrim, "//"); } #[test] fn test_take_one() { parse(take_one(char::is_lowercase), "bc", "c", 'b'); parse_err(take_one(char::is_lowercase), "Bc"); } #[test] fn test_multiline_comment() { parse(multiline_comment, "/**/a", "a", ()); parse(multiline_comment, "/* a\n */\n", "\n", ()); parse(multiline_comment, "/*** a\n\n**//* a */c", "/* a */c", ()); parse(multiline_comment, "/*** a\n//*/\n*/", "\n*/", ()); parse_err(multiline_comment, "/"); parse_err(multiline_comment, "/*"); parse_err(multiline_comment, "/*/"); parse_err(multiline_comment, "/*\n/*"); parse_err(multiline_comment, "/ * */"); parse_err(multiline_comment, "/* * /"); } #[test] fn test_singleline_comment() { parse(singleline_comment, "//\n", "", ()); parse(singleline_comment, "// comment\n// 2", "// 2", ()); parse_err(singleline_comment, "/"); parse_err(singleline_comment, "//"); parse_err(singleline_comment, "// comment"); parse_err(singleline_comment, "// comment //"); } }