use { grep_matcher::{Captures, Match, Matcher}, regex::bytes::Regex, }; use crate::util::{RegexMatcher, RegexMatcherNoCaps}; fn matcher(pattern: &str) -> RegexMatcher { RegexMatcher::new(Regex::new(pattern).unwrap()) } fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps { RegexMatcherNoCaps(Regex::new(pattern).unwrap()) } fn m(start: usize, end: usize) -> Match { Match::new(start, end) } #[test] fn find() { let matcher = matcher(r"(\w+)\s+(\w+)"); assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14))); } #[test] fn find_iter() { let matcher = matcher(r"(\w+)\s+(\w+)"); let mut matches = vec![]; matcher .find_iter(b"aa bb cc dd", |m| { matches.push(m); true }) .unwrap(); assert_eq!(matches, vec![m(0, 5), m(6, 11)]); // Test that find_iter respects short circuiting. matches.clear(); matcher .find_iter(b"aa bb cc dd", |m| { matches.push(m); false }) .unwrap(); assert_eq!(matches, vec![m(0, 5)]); } #[test] fn try_find_iter() { #[derive(Clone, Debug, Eq, PartialEq)] struct MyError; let matcher = matcher(r"(\w+)\s+(\w+)"); let mut matches = vec![]; let err = matcher .try_find_iter(b"aa bb cc dd", |m| { if matches.is_empty() { matches.push(m); Ok(true) } else { Err(MyError) } }) .unwrap() .unwrap_err(); assert_eq!(matches, vec![m(0, 5)]); assert_eq!(err, MyError); } #[test] fn shortest_match() { let matcher = matcher(r"a+"); // This tests that the default impl isn't doing anything smart, and simply // defers to `find`. assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3)); // The actual underlying regex is smarter. assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1)); } #[test] fn captures() { let matcher = matcher(r"(?P\w+)\s+(?P\w+)"); assert_eq!(matcher.capture_count(), 3); assert_eq!(matcher.capture_index("a"), Some(1)); assert_eq!(matcher.capture_index("b"), Some(2)); assert_eq!(matcher.capture_index("nada"), None); let mut caps = matcher.new_captures().unwrap(); assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap()); assert_eq!(caps.get(0), Some(m(1, 14))); assert_eq!(caps.get(1), Some(m(1, 6))); assert_eq!(caps.get(2), Some(m(7, 14))); } #[test] fn captures_iter() { let matcher = matcher(r"(?P\w+)\s+(?P\w+)"); let mut caps = matcher.new_captures().unwrap(); let mut matches = vec![]; matcher .captures_iter(b"aa bb cc dd", &mut caps, |caps| { matches.push(caps.get(0).unwrap()); matches.push(caps.get(1).unwrap()); matches.push(caps.get(2).unwrap()); true }) .unwrap(); assert_eq!( matches, vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),] ); // Test that captures_iter respects short circuiting. matches.clear(); matcher .captures_iter(b"aa bb cc dd", &mut caps, |caps| { matches.push(caps.get(0).unwrap()); matches.push(caps.get(1).unwrap()); matches.push(caps.get(2).unwrap()); false }) .unwrap(); assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]); } #[test] fn try_captures_iter() { #[derive(Clone, Debug, Eq, PartialEq)] struct MyError; let matcher = matcher(r"(?P\w+)\s+(?P\w+)"); let mut caps = matcher.new_captures().unwrap(); let mut matches = vec![]; let err = matcher .try_captures_iter(b"aa bb cc dd", &mut caps, |caps| { if matches.is_empty() { matches.push(caps.get(0).unwrap()); matches.push(caps.get(1).unwrap()); matches.push(caps.get(2).unwrap()); Ok(true) } else { Err(MyError) } }) .unwrap() .unwrap_err(); assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]); assert_eq!(err, MyError); } // Test that our default impls for capturing are correct. Namely, when // capturing isn't supported by the underlying matcher, then all of the // various capturing related APIs fail fast. #[test] fn no_captures() { let matcher = matcher_no_caps(r"(?P\w+)\s+(?P\w+)"); assert_eq!(matcher.capture_count(), 0); assert_eq!(matcher.capture_index("a"), None); assert_eq!(matcher.capture_index("b"), None); assert_eq!(matcher.capture_index("nada"), None); let mut caps = matcher.new_captures().unwrap(); assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap()); let mut called = false; matcher .captures_iter(b"homer simpson", &mut caps, |_| { called = true; true }) .unwrap(); assert!(!called); } #[test] fn replace() { let matcher = matcher(r"(\w+)\s+(\w+)"); let mut dst = vec![]; matcher .replace(b"aa bb cc dd", &mut dst, |_, dst| { dst.push(b'z'); true }) .unwrap(); assert_eq!(dst, b"z z"); // Test that replacements respect short circuiting. dst.clear(); matcher .replace(b"aa bb cc dd", &mut dst, |_, dst| { dst.push(b'z'); false }) .unwrap(); assert_eq!(dst, b"z cc dd"); } #[test] fn replace_with_captures() { let matcher = matcher(r"(\w+)\s+(\w+)"); let haystack = b"aa bb cc dd"; let mut caps = matcher.new_captures().unwrap(); let mut dst = vec![]; matcher .replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| { caps.interpolate( |name| matcher.capture_index(name), haystack, b"$2 $1", dst, ); true }) .unwrap(); assert_eq!(dst, b"bb aa dd cc"); // Test that replacements respect short circuiting. dst.clear(); matcher .replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| { caps.interpolate( |name| matcher.capture_index(name), haystack, b"$2 $1", dst, ); false }) .unwrap(); assert_eq!(dst, b"bb aa cc dd"); }