use std::{fs, ops::Range}; use iregex::{Alternation, Atom, CompoundAutomaton, Concatenation, IRegEx}; use iregex_automata::{any_char, dot::DotDisplay, nfa::U32StateBuilder, Map, RangeSet, NFA}; #[test] fn no_matches_anchored() { let vectors = [( Atom::<_, ()>::star(Atom::Token(['a', 'b', 'c'].into_iter().collect()).into()).into(), "abcd", )]; for (root, haystack) in vectors { let ire = IRegEx::anchored(root); let aut = ire.compile(U32StateBuilder::default()).unwrap(); let mut matches = aut.matches(haystack.chars()); assert_eq!(matches.next(), None); } } #[test] fn single_match_anchored() { let vectors = [ (Concatenation::<_, ()>::new().into(), ""), (Atom::Token(any_char()).into(), "a"), (Atom::star(Atom::Token(any_char()).into()).into(), "abcd"), ]; for (root, haystack) in vectors { let ire = IRegEx::anchored(root); let aut = ire.compile(U32StateBuilder::default()).unwrap(); let mut matches = aut.matches(haystack.chars()); assert_eq!(matches.next(), Some(0..haystack.len())); assert_eq!(matches.next(), None); } } #[test] fn single_match_unanchored() { let b: RangeSet = ['b'].into_iter().collect(); let vectors = [ (Atom::Token(b.clone()).into(), "aba", 1..2), ( [Atom::Token(b.clone()).into(), Atom::Token(b).into()] .into_iter() .collect::() .into(), "abba", 1..3, ), ]; for (root, haystack, expected) in vectors { let ire = IRegEx::unanchored(root); let aut = ire.compile(U32StateBuilder::default()).unwrap(); let mut matches = aut.matches(haystack.chars()); assert_eq!(matches.next(), Some(expected)); assert_eq!(matches.next(), None); } } #[test] fn many_matches_unanchored() { let a = Atom::Token(['a'].into_iter().collect()); let b = Atom::Token(['b'].into_iter().collect()); let vectors: [(Alternation, &str, &[Range]); 3] = [ ( Concatenation::new().into(), "aaa", &[0..0, 1..1, 2..2, 3..3], ), (Atom::Token(any_char()).into(), "aaa", &[0..1, 1..2, 2..3]), ( [Concatenation::from(a), Concatenation::from(b)] .into_iter() .collect(), "abab", &[0..1, 1..2, 2..3, 3..4], ), ]; for (i, (root, haystack, expected)) in vectors.into_iter().enumerate() { let ire = IRegEx::unanchored(root); let aut = ire.compile(U32StateBuilder::default()).unwrap(); let matches: Vec<_> = aut.matches(haystack.chars()).collect(); if matches != expected { write_compound_automaton(format!("many_matches_unanchored_{i}"), &aut); } assert_eq!(matches, expected); } } fn write_compound_automaton(basename: String, aut: &CompoundAutomaton) { write_automaton(format!("{basename}_prefix.dot"), &aut.prefix); write_automaton(format!("{basename}_root.dot"), &aut.root.get(&()).unwrap()); write_automaton( format!("{basename}_suffix.dot"), &aut.suffix.get(&()).unwrap(), ); } fn write_automaton(path: String, aut: &NFA) { fs::write(&path, aut.dot().to_string()).unwrap(); }