use std::io; use grep_matcher::LineTerminator; use crate::{ lines::LineIter, searcher::{ConfigError, Searcher}, }; /// A trait that describes errors that can be reported by searchers and /// implementations of `Sink`. /// /// Unless you have a specialized use case, you probably don't need to /// implement this trait explicitly. It's likely that using `std::io::Error` /// (which implements this trait) for your error type is good enough, /// largely because most errors that occur during search will likely be an /// `std::io::Error`. pub trait SinkError: Sized { /// A constructor for converting any value that satisfies the /// `std::fmt::Display` trait into an error. fn error_message(message: T) -> Self; /// A constructor for converting I/O errors that occur while searching into /// an error of this type. /// /// By default, this is implemented via the `error_message` constructor. fn error_io(err: io::Error) -> Self { Self::error_message(err) } /// A constructor for converting configuration errors that occur while /// building a searcher into an error of this type. /// /// By default, this is implemented via the `error_message` constructor. fn error_config(err: ConfigError) -> Self { Self::error_message(err) } } /// An `std::io::Error` can be used as an error for `Sink` implementations out /// of the box. impl SinkError for io::Error { fn error_message(message: T) -> io::Error { io::Error::new(io::ErrorKind::Other, message.to_string()) } fn error_io(err: io::Error) -> io::Error { err } } /// A `Box` can be used as an error for `Sink` /// implementations out of the box. impl SinkError for Box { fn error_message( message: T, ) -> Box { Box::::from(message.to_string()) } } /// A trait that defines how results from searchers are handled. /// /// In this crate, a searcher follows the "push" model. What that means is that /// the searcher drives execution, and pushes results back to the caller. This /// is in contrast to a "pull" model where the caller drives execution and /// takes results as they need them. These are also known as "internal" and /// "external" iteration strategies, respectively. /// /// For a variety of reasons, including the complexity of the searcher /// implementation, this crate chooses the "push" or "internal" model of /// execution. Thus, in order to act on search results, callers must provide /// an implementation of this trait to a searcher, and the searcher is then /// responsible for calling the methods on this trait. /// /// This trait defines several behaviors: /// /// * What to do when a match is found. Callers must provide this. /// * What to do when an error occurs. Callers must provide this via the /// [`SinkError`] trait. Generally, callers can just use `std::io::Error` for /// this, which already implements `SinkError`. /// * What to do when a contextual line is found. By default, these are /// ignored. /// * What to do when a gap between contextual lines has been found. By /// default, this is ignored. /// * What to do when a search has started. By default, this does nothing. /// * What to do when a search has finished successfully. By default, this does /// nothing. /// /// Callers must, at minimum, specify the behavior when an error occurs and /// the behavior when a match occurs. The rest is optional. For each behavior, /// callers may report an error (say, if writing the result to another /// location failed) or simply return `false` if they want the search to stop /// (e.g., when implementing a cap on the number of search results to show). /// /// When errors are reported (whether in the searcher or in the implementation /// of `Sink`), then searchers quit immediately without calling `finish`. /// /// For simpler uses of `Sink`, callers may elect to use one of /// the more convenient but less flexible implementations in the /// [`sinks`] module. pub trait Sink { /// The type of an error that should be reported by a searcher. /// /// Errors of this type are not only returned by the methods on this /// trait, but the constructors defined in `SinkError` are also used in /// the searcher implementation itself. e.g., When a I/O error occurs when /// reading data from a file. type Error: SinkError; /// This method is called whenever a match is found. /// /// If multi line is enabled on the searcher, then the match reported here /// may span multiple lines and it may include multiple matches. When multi /// line is disabled, then the match is guaranteed to span exactly one /// non-empty line (where a single line is, at minimum, a line terminator). /// /// If this returns `true`, then searching continues. If this returns /// `false`, then searching is stopped immediately and `finish` is called. /// /// If this returns an error, then searching is stopped immediately, /// `finish` is not called and the error is bubbled back up to the caller /// of the searcher. fn matched( &mut self, _searcher: &Searcher, _mat: &SinkMatch<'_>, ) -> Result; /// This method is called whenever a context line is found, and is optional /// to implement. By default, it does nothing and returns `true`. /// /// In all cases, the context given is guaranteed to span exactly one /// non-empty line (where a single line is, at minimum, a line terminator). /// /// If this returns `true`, then searching continues. If this returns /// `false`, then searching is stopped immediately and `finish` is called. /// /// If this returns an error, then searching is stopped immediately, /// `finish` is not called and the error is bubbled back up to the caller /// of the searcher. #[inline] fn context( &mut self, _searcher: &Searcher, _context: &SinkContext<'_>, ) -> Result { Ok(true) } /// This method is called whenever a break in contextual lines is found, /// and is optional to implement. By default, it does nothing and returns /// `true`. /// /// A break can only occur when context reporting is enabled (that is, /// either or both of `before_context` or `after_context` are greater than /// `0`). More precisely, a break occurs between non-contiguous groups of /// lines. /// /// If this returns `true`, then searching continues. If this returns /// `false`, then searching is stopped immediately and `finish` is called. /// /// If this returns an error, then searching is stopped immediately, /// `finish` is not called and the error is bubbled back up to the caller /// of the searcher. #[inline] fn context_break( &mut self, _searcher: &Searcher, ) -> Result { Ok(true) } /// This method is called whenever binary detection is enabled and binary /// data is found. If binary data is found, then this is called at least /// once for the first occurrence with the absolute byte offset at which /// the binary data begins. /// /// If this returns `true`, then searching continues. If this returns /// `false`, then searching is stopped immediately and `finish` is called. /// /// If this returns an error, then searching is stopped immediately, /// `finish` is not called and the error is bubbled back up to the caller /// of the searcher. /// /// By default, it does nothing and returns `true`. #[inline] fn binary_data( &mut self, _searcher: &Searcher, _binary_byte_offset: u64, ) -> Result { Ok(true) } /// This method is called when a search has begun, before any search is /// executed. By default, this does nothing. /// /// If this returns `true`, then searching continues. If this returns /// `false`, then searching is stopped immediately and `finish` is called. /// /// If this returns an error, then searching is stopped immediately, /// `finish` is not called and the error is bubbled back up to the caller /// of the searcher. #[inline] fn begin(&mut self, _searcher: &Searcher) -> Result { Ok(true) } /// This method is called when a search has completed. By default, this /// does nothing. /// /// If this returns an error, the error is bubbled back up to the caller of /// the searcher. #[inline] fn finish( &mut self, _searcher: &Searcher, _: &SinkFinish, ) -> Result<(), Self::Error> { Ok(()) } } impl<'a, S: Sink> Sink for &'a mut S { type Error = S::Error; #[inline] fn matched( &mut self, searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { (**self).matched(searcher, mat) } #[inline] fn context( &mut self, searcher: &Searcher, context: &SinkContext<'_>, ) -> Result { (**self).context(searcher, context) } #[inline] fn context_break( &mut self, searcher: &Searcher, ) -> Result { (**self).context_break(searcher) } #[inline] fn binary_data( &mut self, searcher: &Searcher, binary_byte_offset: u64, ) -> Result { (**self).binary_data(searcher, binary_byte_offset) } #[inline] fn begin(&mut self, searcher: &Searcher) -> Result { (**self).begin(searcher) } #[inline] fn finish( &mut self, searcher: &Searcher, sink_finish: &SinkFinish, ) -> Result<(), S::Error> { (**self).finish(searcher, sink_finish) } } impl Sink for Box { type Error = S::Error; #[inline] fn matched( &mut self, searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { (**self).matched(searcher, mat) } #[inline] fn context( &mut self, searcher: &Searcher, context: &SinkContext<'_>, ) -> Result { (**self).context(searcher, context) } #[inline] fn context_break( &mut self, searcher: &Searcher, ) -> Result { (**self).context_break(searcher) } #[inline] fn binary_data( &mut self, searcher: &Searcher, binary_byte_offset: u64, ) -> Result { (**self).binary_data(searcher, binary_byte_offset) } #[inline] fn begin(&mut self, searcher: &Searcher) -> Result { (**self).begin(searcher) } #[inline] fn finish( &mut self, searcher: &Searcher, sink_finish: &SinkFinish, ) -> Result<(), S::Error> { (**self).finish(searcher, sink_finish) } } /// Summary data reported at the end of a search. /// /// This reports data such as the total number of bytes searched and the /// absolute offset of the first occurrence of binary data, if any were found. /// /// A searcher that stops early because of an error does not call `finish`. /// A searcher that stops early because the `Sink` implementor instructed it /// to will still call `finish`. #[derive(Clone, Debug)] pub struct SinkFinish { pub(crate) byte_count: u64, pub(crate) binary_byte_offset: Option, } impl SinkFinish { /// Return the total number of bytes searched. #[inline] pub fn byte_count(&self) -> u64 { self.byte_count } /// If binary detection is enabled and if binary data was found, then this /// returns the absolute byte offset of the first detected byte of binary /// data. /// /// Note that since this is an absolute byte offset, it cannot be relied /// upon to index into any addressable memory. #[inline] pub fn binary_byte_offset(&self) -> Option { self.binary_byte_offset } } /// A type that describes a match reported by a searcher. #[derive(Clone, Debug)] pub struct SinkMatch<'b> { pub(crate) line_term: LineTerminator, pub(crate) bytes: &'b [u8], pub(crate) absolute_byte_offset: u64, pub(crate) line_number: Option, pub(crate) buffer: &'b [u8], pub(crate) bytes_range_in_buffer: std::ops::Range, } impl<'b> SinkMatch<'b> { /// Returns the bytes for all matching lines, including the line /// terminators, if they exist. #[inline] pub fn bytes(&self) -> &'b [u8] { self.bytes } /// Return an iterator over the lines in this match. /// /// If multi line search is enabled, then this may yield more than one /// line (but always at least one line). If multi line search is disabled, /// then this always reports exactly one line (but may consist of just /// the line terminator). /// /// Lines yielded by this iterator include their terminators. #[inline] pub fn lines(&self) -> LineIter<'b> { LineIter::new(self.line_term.as_byte(), self.bytes) } /// Returns the absolute byte offset of the start of this match. This /// offset is absolute in that it is relative to the very beginning of the /// input in a search, and can never be relied upon to be a valid index /// into an in-memory slice. #[inline] pub fn absolute_byte_offset(&self) -> u64 { self.absolute_byte_offset } /// Returns the line number of the first line in this match, if available. /// /// Line numbers are only available when the search builder is instructed /// to compute them. #[inline] pub fn line_number(&self) -> Option { self.line_number } /// Exposes as much of the underlying buffer that was search as possible. #[inline] pub fn buffer(&self) -> &'b [u8] { self.buffer } /// Returns a range that corresponds to where [`SinkMatch::bytes`] appears /// in [`SinkMatch::buffer`]. #[inline] pub fn bytes_range_in_buffer(&self) -> std::ops::Range { self.bytes_range_in_buffer.clone() } } /// The type of context reported by a searcher. #[derive(Clone, Debug, Eq, PartialEq)] pub enum SinkContextKind { /// The line reported occurred before a match. Before, /// The line reported occurred after a match. After, /// Any other type of context reported, e.g., as a result of a searcher's /// "passthru" mode. Other, } /// A type that describes a contextual line reported by a searcher. #[derive(Clone, Debug)] pub struct SinkContext<'b> { #[cfg(test)] pub(crate) line_term: LineTerminator, pub(crate) bytes: &'b [u8], pub(crate) kind: SinkContextKind, pub(crate) absolute_byte_offset: u64, pub(crate) line_number: Option, } impl<'b> SinkContext<'b> { /// Returns the context bytes, including line terminators. #[inline] pub fn bytes(&self) -> &'b [u8] { self.bytes } /// Returns the type of context. #[inline] pub fn kind(&self) -> &SinkContextKind { &self.kind } /// Return an iterator over the lines in this match. /// /// This always yields exactly one line (and that one line may contain just /// the line terminator). /// /// Lines yielded by this iterator include their terminators. #[cfg(test)] pub(crate) fn lines(&self) -> LineIter<'b> { LineIter::new(self.line_term.as_byte(), self.bytes) } /// Returns the absolute byte offset of the start of this context. This /// offset is absolute in that it is relative to the very beginning of the /// input in a search, and can never be relied upon to be a valid index /// into an in-memory slice. #[inline] pub fn absolute_byte_offset(&self) -> u64 { self.absolute_byte_offset } /// Returns the line number of the first line in this context, if /// available. /// /// Line numbers are only available when the search builder is instructed /// to compute them. #[inline] pub fn line_number(&self) -> Option { self.line_number } } /// A collection of convenience implementations of `Sink`. /// /// Each implementation in this module makes some kind of sacrifice in the name /// of making common cases easier to use. Most frequently, each type is a /// wrapper around a closure specified by the caller that provides limited /// access to the full suite of information available to implementors of /// `Sink`. /// /// For example, the `UTF8` sink makes the following sacrifices: /// /// * All matches must be UTF-8. An arbitrary `Sink` does not have this /// restriction and can deal with arbitrary data. If this sink sees invalid /// UTF-8, then an error is returned and searching stops. (Use the `Lossy` /// sink instead to suppress this error.) /// * The searcher must be configured to report line numbers. If it isn't, /// an error is reported at the first match and searching stops. /// * Context lines, context breaks and summary data reported at the end of /// a search are all ignored. /// * Implementors are forced to use `std::io::Error` as their error type. /// /// If you need more flexibility, then you're advised to implement the `Sink` /// trait directly. pub mod sinks { use std::io; use crate::searcher::Searcher; use super::{Sink, SinkError, SinkMatch}; /// A sink that provides line numbers and matches as strings while ignoring /// everything else. /// /// This implementation will return an error if a match contains invalid /// UTF-8 or if the searcher was not configured to count lines. Errors /// on invalid UTF-8 can be suppressed by using the `Lossy` sink instead /// of this one. /// /// The closure accepts two parameters: a line number and a UTF-8 string /// containing the matched data. The closure returns a /// `Result`. If the `bool` is `false`, then the /// search stops immediately. Otherwise, searching continues. /// /// If multi line mode was enabled, the line number refers to the line /// number of the first line in the match. #[derive(Clone, Debug)] pub struct UTF8(pub F) where F: FnMut(u64, &str) -> Result; impl Sink for UTF8 where F: FnMut(u64, &str) -> Result, { type Error = io::Error; fn matched( &mut self, _searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { let matched = match std::str::from_utf8(mat.bytes()) { Ok(matched) => matched, Err(err) => return Err(io::Error::error_message(err)), }; let line_number = match mat.line_number() { Some(line_number) => line_number, None => { let msg = "line numbers not enabled"; return Err(io::Error::error_message(msg)); } }; (self.0)(line_number, &matched) } } /// A sink that provides line numbers and matches as (lossily converted) /// strings while ignoring everything else. /// /// This is like `UTF8`, except that if a match contains invalid UTF-8, /// then it will be lossily converted to valid UTF-8 by substituting /// invalid UTF-8 with Unicode replacement characters. /// /// This implementation will return an error on the first match if the /// searcher was not configured to count lines. /// /// The closure accepts two parameters: a line number and a UTF-8 string /// containing the matched data. The closure returns a /// `Result`. If the `bool` is `false`, then the /// search stops immediately. Otherwise, searching continues. /// /// If multi line mode was enabled, the line number refers to the line /// number of the first line in the match. #[derive(Clone, Debug)] pub struct Lossy(pub F) where F: FnMut(u64, &str) -> Result; impl Sink for Lossy where F: FnMut(u64, &str) -> Result, { type Error = io::Error; fn matched( &mut self, _searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { use std::borrow::Cow; let matched = match std::str::from_utf8(mat.bytes()) { Ok(matched) => Cow::Borrowed(matched), // TODO: In theory, it should be possible to amortize // allocation here, but `std` doesn't provide such an API. // Regardless, this only happens on matches with invalid UTF-8, // which should be pretty rare. Err(_) => String::from_utf8_lossy(mat.bytes()), }; let line_number = match mat.line_number() { Some(line_number) => line_number, None => { let msg = "line numbers not enabled"; return Err(io::Error::error_message(msg)); } }; (self.0)(line_number, &matched) } } /// A sink that provides line numbers and matches as raw bytes while /// ignoring everything else. /// /// This implementation will return an error on the first match if the /// searcher was not configured to count lines. /// /// The closure accepts two parameters: a line number and a raw byte string /// containing the matched data. The closure returns a /// `Result`. If the `bool` is `false`, then the /// search stops immediately. Otherwise, searching continues. /// /// If multi line mode was enabled, the line number refers to the line /// number of the first line in the match. #[derive(Clone, Debug)] pub struct Bytes(pub F) where F: FnMut(u64, &[u8]) -> Result; impl Sink for Bytes where F: FnMut(u64, &[u8]) -> Result, { type Error = io::Error; fn matched( &mut self, _searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { let line_number = match mat.line_number() { Some(line_number) => line_number, None => { let msg = "line numbers not enabled"; return Err(io::Error::error_message(msg)); } }; (self.0)(line_number, mat.bytes()) } } }