/*! Provides implementations of `fst::Automaton` for Aho-Corasick automata. This works by providing two wrapper types, [`Anchored`] and [`Unanchored`]. The former executes an anchored search on an FST while the latter executes an unanchored search. Building these wrappers is fallible and will fail if the underlying Aho-Corasick automaton does not support the type of search it represents. */ use crate::{ automaton::{Automaton, StateID}, Anchored as AcAnchored, Input, MatchError, }; /// Represents an unanchored Aho-Corasick search of a finite state transducer. /// /// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the /// underlying automaton does not support unanchored searches. /// /// # Example /// /// This shows how to build an FST of keys and then run an unanchored search on /// those keys using an Aho-Corasick automaton. /// /// ``` /// use aho_corasick::{nfa::contiguous::NFA, transducer::Unanchored}; /// use fst::{Automaton, IntoStreamer, Set, Streamer}; /// /// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); /// let nfa = NFA::new(&["bcd", "x"]).unwrap(); /// // NFAs always support both unanchored and anchored searches. /// let searcher = Unanchored::new(&nfa).unwrap(); /// /// let mut stream = set.search(searcher).into_stream(); /// let mut results = vec![]; /// while let Some(key) = stream.next() { /// results.push(std::str::from_utf8(key).unwrap().to_string()); /// } /// assert_eq!(vec!["abcd", "bcd", "xyz"], results); /// ``` #[derive(Clone, Debug)] pub struct Unanchored(A); impl Unanchored { /// Create a new `Unanchored` implementation of the `fst::Automaton` trait. /// /// If the given Aho-Corasick automaton does not support unanchored /// searches, then this returns an error. pub fn new(aut: A) -> Result, MatchError> { let input = Input::new("").anchored(AcAnchored::No); let _ = aut.start_state(&input)?; Ok(Unanchored(aut)) } /// Returns a borrow to the underlying automaton. pub fn as_ref(&self) -> &A { &self.0 } /// Unwrap this value and return the inner automaton. pub fn into_inner(self) -> A { self.0 } } impl fst::Automaton for Unanchored { type State = StateID; #[inline] fn start(&self) -> StateID { let input = Input::new("").anchored(AcAnchored::No); self.0.start_state(&input).expect("support for unanchored searches") } #[inline] fn is_match(&self, state: &StateID) -> bool { self.0.is_match(*state) } #[inline] fn accept(&self, state: &StateID, byte: u8) -> StateID { if fst::Automaton::is_match(self, state) { return *state; } self.0.next_state(AcAnchored::No, *state, byte) } #[inline] fn can_match(&self, state: &StateID) -> bool { !self.0.is_dead(*state) } } /// Represents an anchored Aho-Corasick search of a finite state transducer. /// /// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the /// underlying automaton does not support unanchored searches. /// /// # Example /// /// This shows how to build an FST of keys and then run an anchored search on /// those keys using an Aho-Corasick automaton. /// /// ``` /// use aho_corasick::{nfa::contiguous::NFA, transducer::Anchored}; /// use fst::{Automaton, IntoStreamer, Set, Streamer}; /// /// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); /// let nfa = NFA::new(&["bcd", "x"]).unwrap(); /// // NFAs always support both unanchored and anchored searches. /// let searcher = Anchored::new(&nfa).unwrap(); /// /// let mut stream = set.search(searcher).into_stream(); /// let mut results = vec![]; /// while let Some(key) = stream.next() { /// results.push(std::str::from_utf8(key).unwrap().to_string()); /// } /// assert_eq!(vec!["bcd", "xyz"], results); /// ``` /// /// This is like the example above, except we use an Aho-Corasick DFA, which /// requires explicitly configuring it to support anchored searches. (NFAs /// unconditionally support both unanchored and anchored searches.) /// /// ``` /// use aho_corasick::{dfa::DFA, transducer::Anchored, StartKind}; /// use fst::{Automaton, IntoStreamer, Set, Streamer}; /// /// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); /// let dfa = DFA::builder() /// .start_kind(StartKind::Anchored) /// .build(&["bcd", "x"]) /// .unwrap(); /// // We've explicitly configured our DFA to support anchored searches. /// let searcher = Anchored::new(&dfa).unwrap(); /// /// let mut stream = set.search(searcher).into_stream(); /// let mut results = vec![]; /// while let Some(key) = stream.next() { /// results.push(std::str::from_utf8(key).unwrap().to_string()); /// } /// assert_eq!(vec!["bcd", "xyz"], results); /// ``` #[derive(Clone, Debug)] pub struct Anchored(A); impl Anchored { /// Create a new `Anchored` implementation of the `fst::Automaton` trait. /// /// If the given Aho-Corasick automaton does not support anchored searches, /// then this returns an error. pub fn new(aut: A) -> Result, MatchError> { let input = Input::new("").anchored(AcAnchored::Yes); let _ = aut.start_state(&input)?; Ok(Anchored(aut)) } /// Returns a borrow to the underlying automaton. pub fn as_ref(&self) -> &A { &self.0 } /// Unwrap this value and return the inner automaton. pub fn into_inner(self) -> A { self.0 } } impl fst::Automaton for Anchored { type State = StateID; #[inline] fn start(&self) -> StateID { let input = Input::new("").anchored(AcAnchored::Yes); self.0.start_state(&input).expect("support for unanchored searches") } #[inline] fn is_match(&self, state: &StateID) -> bool { self.0.is_match(*state) } #[inline] fn accept(&self, state: &StateID, byte: u8) -> StateID { if fst::Automaton::is_match(self, state) { return *state; } self.0.next_state(AcAnchored::Yes, *state, byte) } #[inline] fn can_match(&self, state: &StateID) -> bool { !self.0.is_dead(*state) } } #[cfg(test)] mod tests { use alloc::{string::String, vec, vec::Vec}; use fst::{Automaton, IntoStreamer, Set, Streamer}; use crate::{ dfa::DFA, nfa::{contiguous, noncontiguous}, StartKind, }; use super::*; fn search>( set: &Set, aut: A, ) -> Vec { let mut stream = set.search(aut).into_stream(); let mut results = vec![]; while let Some(key) = stream.next() { results.push(String::from(core::str::from_utf8(key).unwrap())); } results } #[test] fn unanchored() { let set = Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) .unwrap(); let patterns = vec!["baz", "bax"]; let expected = vec!["baz", "xbax"]; let aut = Unanchored(noncontiguous::NFA::new(&patterns).unwrap()); let got = search(&set, &aut); assert_eq!(got, expected); let aut = Unanchored(contiguous::NFA::new(&patterns).unwrap()); let got = search(&set, &aut); assert_eq!(got, expected); let aut = Unanchored(DFA::new(&patterns).unwrap()); let got = search(&set, &aut); assert_eq!(got, expected); } #[test] fn anchored() { let set = Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) .unwrap(); let patterns = vec!["baz", "bax"]; let expected = vec!["baz"]; let aut = Anchored(noncontiguous::NFA::new(&patterns).unwrap()); let got = search(&set, &aut); assert_eq!(got, expected); let aut = Anchored(contiguous::NFA::new(&patterns).unwrap()); let got = search(&set, &aut); assert_eq!(got, expected); let aut = Anchored( DFA::builder() .start_kind(StartKind::Anchored) .build(&patterns) .unwrap(), ); let got = search(&set, &aut); assert_eq!(got, expected); } }