#![doc = include_str!("./README.md")] #![cfg_attr(not(feature = "std"), no_std)] mod core_wrapper; pub use core_wrapper::core as core_transpiled; pub mod ffi; mod util; #[cfg(not(feature = "std"))] extern crate alloc; #[cfg(not(feature = "std"))] use alloc::{boxed::Box, format, string::String, string::ToString, vec::Vec}; use core::{ char, ffi::{c_char, c_void, CStr}, fmt::{self, Write}, hash, iter, marker::PhantomData, mem::MaybeUninit, num::NonZeroU16, ops::{self, Deref}, ptr::{self, NonNull}, slice, str, sync::atomic::AtomicUsize, }; #[cfg(feature = "std")] use std::error; #[cfg(feature = "capi")] #[cfg(all(feature = "std", any(unix, target_os = "wasi")))] use std::os::fd::AsRawFd; #[cfg(all(windows, feature = "std"))] use std::os::windows::io::AsRawHandle; use streaming_iterator::{StreamingIterator, StreamingIteratorMut}; use tree_sitter_language::LanguageFn; #[cfg(feature = "wasm")] mod wasm_language; #[cfg(feature = "wasm")] pub use wasm_language::*; /// The latest ABI version that is supported by the current version of the /// library. /// /// When Languages are generated by the Tree-sitter CLI, they are /// assigned an ABI version number that corresponds to the current CLI version. /// The Tree-sitter library is generally backwards-compatible with languages /// generated using older CLI versions, but is not forwards-compatible. #[doc(alias = "TREE_SITTER_LANGUAGE_VERSION")] pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION as usize; /// The earliest ABI version that is supported by the current version of the /// library. #[doc(alias = "TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION")] pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION as usize; pub const PARSER_HEADER: &str = include_str!("../src/parser.h"); /// An opaque object that defines how to parse a particular language. The code /// for each `Language` is generated by the Tree-sitter CLI. #[doc(alias = "TSLanguage")] #[derive(Debug, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct Language(*const ffi::TSLanguage); pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>); /// A tree that represents the syntactic structure of a source code file. #[doc(alias = "TSTree")] pub struct Tree(NonNull); /// A position in a multi-line text document, in terms of rows and columns. /// /// Rows and columns are zero-based. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Point { pub row: usize, pub column: usize, } /// A range of positions in a multi-line text document, both in terms of bytes /// and of rows and columns. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct Range { pub start_byte: usize, pub end_byte: usize, pub start_point: Point, pub end_point: Point, } /// A summary of a change to a text document. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct InputEdit { pub start_byte: usize, pub old_end_byte: usize, pub new_end_byte: usize, pub start_position: Point, pub old_end_position: Point, pub new_end_position: Point, } /// A single node within a syntax [`Tree`]. #[doc(alias = "TSNode")] #[derive(Clone, Copy)] #[repr(transparent)] pub struct Node<'tree>(ffi::TSNode, PhantomData<&'tree ()>); /// A stateful object that this is used to produce a [`Tree`] based on some /// source code. #[doc(alias = "TSParser")] pub struct Parser(NonNull); /// A stateful object that is used to look up symbols valid in a specific parse /// state #[doc(alias = "TSLookaheadIterator")] pub struct LookaheadIterator(NonNull); struct LookaheadNamesIterator<'a>(&'a mut LookaheadIterator); /// A type of log message. #[derive(Debug, PartialEq, Eq)] pub enum LogType { Parse, Lex, } type FieldId = NonZeroU16; /// A callback that receives log messages during parser. type Logger<'a> = Box; /// A stateful object for walking a syntax [`Tree`] efficiently. #[doc(alias = "TSTreeCursor")] pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); /// A set of patterns that match nodes in a syntax tree. #[doc(alias = "TSQuery")] #[derive(Debug)] #[allow(clippy::type_complexity)] pub struct Query { ptr: NonNull, capture_names: Box<[&'static str]>, capture_quantifiers: Box<[Box<[CaptureQuantifier]>]>, text_predicates: Box<[Box<[TextPredicateCapture]>]>, property_settings: Box<[Box<[QueryProperty]>]>, property_predicates: Box<[Box<[(QueryProperty, bool)]>]>, general_predicates: Box<[Box<[QueryPredicate]>]>, } /// A quantifier for captures #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum CaptureQuantifier { Zero, ZeroOrOne, ZeroOrMore, One, OneOrMore, } impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { ffi::TSQuantifierZero => Self::Zero, ffi::TSQuantifierZeroOrOne => Self::ZeroOrOne, ffi::TSQuantifierZeroOrMore => Self::ZeroOrMore, ffi::TSQuantifierOne => Self::One, ffi::TSQuantifierOneOrMore => Self::OneOrMore, _ => panic!("Unrecognized quantifier: {value}"), } } } /// A stateful object for executing a [`Query`] on a syntax [`Tree`]. #[doc(alias = "TSQueryCursor")] pub struct QueryCursor { ptr: NonNull, } /// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryProperty { pub key: Box, pub value: Option>, pub capture_id: Option, } #[derive(Debug, PartialEq, Eq)] pub enum QueryPredicateArg { Capture(u32), String(Box), } /// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryPredicate { pub operator: Box, pub args: Box<[QueryPredicateArg]>, } /// A match of a [`Query`] to a particular set of [`Node`]s. pub struct QueryMatch<'cursor, 'tree> { pub pattern_index: usize, pub captures: &'cursor [QueryCapture<'tree>], id: u32, cursor: *mut ffi::TSQueryCursor, } /// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`]. pub struct QueryMatches<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, query: &'query Query, text_provider: T, buffer1: Vec, buffer2: Vec, current_match: Option>, _phantom: PhantomData<(&'tree (), I)>, } /// A sequence of [`QueryCapture`]s associated with a given [`QueryCursor`]. pub struct QueryCaptures<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, query: &'query Query, text_provider: T, buffer1: Vec, buffer2: Vec, current_match: Option<(QueryMatch<'query, 'tree>, usize)>, _phantom: PhantomData<(&'tree (), I)>, } pub trait TextProvider where I: AsRef<[u8]>, { type I: Iterator; fn text(&mut self, node: Node) -> Self::I; } /// A particular [`Node`] that has been captured with a particular name within a /// [`Query`]. #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct QueryCapture<'tree> { pub node: Node<'tree>, pub index: u32, } /// An error that occurred when trying to assign an incompatible [`Language`] to /// a [`Parser`]. #[derive(Debug, PartialEq, Eq)] pub struct LanguageError { version: usize, } /// An error that occurred in [`Parser::set_included_ranges`]. #[derive(Debug, PartialEq, Eq)] pub struct IncludedRangesError(pub usize); /// An error that occurred when trying to create a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryError { pub row: usize, pub column: usize, pub offset: usize, pub message: String, pub kind: QueryErrorKind, } #[derive(Debug, PartialEq, Eq)] pub enum QueryErrorKind { Syntax, NodeType, Field, Capture, Predicate, Structure, Language, } #[derive(Debug)] /// The first item is the capture index /// The next is capture specific, depending on what item is expected /// The first bool is if the capture is positive /// The last item is a bool signifying whether or not it's meant to match /// any or all captures enum TextPredicateCapture { EqString(u32, Box, bool, bool), EqCapture(u32, u32, bool, bool), MatchString(u32, regex::bytes::Regex, bool, bool), AnyString(u32, Box<[Box]>, bool), } // TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` // is ever stabilized. pub struct LossyUtf8<'a> { bytes: &'a [u8], in_replacement: bool, } impl Language { #[must_use] pub fn new(builder: LanguageFn) -> Self { Self(unsafe { builder.into_raw()().cast() }) } /// Get the ABI version number that indicates which version of the /// Tree-sitter CLI that was used to generate this [`Language`]. #[doc(alias = "ts_language_version")] #[must_use] pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } } /// Get the number of distinct node types in this language. #[doc(alias = "ts_language_symbol_count")] #[must_use] pub fn node_kind_count(&self) -> usize { unsafe { ffi::ts_language_symbol_count(self.0) as usize } } /// Get the number of valid states in this language. #[doc(alias = "ts_language_state_count")] #[must_use] pub fn parse_state_count(&self) -> usize { unsafe { ffi::ts_language_state_count(self.0) as usize } } /// Get the name of the node kind for the given numerical id. #[doc(alias = "ts_language_symbol_name")] #[must_use] pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_symbol_name(self.0, id) }; (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } /// Get the numeric id for the given node kind. #[doc(alias = "ts_language_symbol_for_name")] #[must_use] pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 { unsafe { ffi::ts_language_symbol_for_name( self.0, kind.as_bytes().as_ptr().cast::(), kind.len() as u32, named, ) } } /// Check if the node type for the given numerical id is named (as opposed /// to an anonymous node type). #[must_use] pub fn node_kind_is_named(&self, id: u16) -> bool { unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolTypeRegular } } #[doc(alias = "ts_language_symbol_type")] #[must_use] pub fn node_kind_is_visible(&self, id: u16) -> bool { unsafe { ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolTypeAnonymous } } /// Get the number of distinct field names in this language. #[doc(alias = "ts_language_field_count")] #[must_use] pub fn field_count(&self) -> usize { unsafe { ffi::ts_language_field_count(self.0) as usize } } /// Get the field names for the given numerical id. #[doc(alias = "ts_language_field_name_for_id")] #[must_use] pub fn field_name_for_id(&self, field_id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_field_name_for_id(self.0, field_id) }; (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } /// Get the numerical id for the given field name. #[doc(alias = "ts_language_field_id_for_name")] #[must_use] pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); let id = unsafe { ffi::ts_language_field_id_for_name( self.0, field_name.as_ptr().cast::(), field_name.len() as u32, ) }; FieldId::new(id) } /// Get the next parse state. Combine this with /// [`lookahead_iterator`](Language::lookahead_iterator) to /// generate completion suggestions or valid symbols in error nodes. /// /// Example: /// ``` /// let state = language.next_state(node.parse_state(), node.grammar_id()); /// ``` #[doc(alias = "ts_language_next_state")] #[must_use] pub fn next_state(&self, state: u16, id: u16) -> u16 { unsafe { ffi::ts_language_next_state(self.0, state, id) } } /// Create a new lookahead iterator for this language and parse state. /// /// This returns `None` if state is invalid for this language. /// /// Iterating [`LookaheadIterator`] will yield valid symbols in the given /// parse state. Newly created lookahead iterators will return the `ERROR` /// symbol from [`LookaheadIterator::current_symbol`]. /// /// Lookahead iterators can be useful to generate suggestions and improve /// syntax error diagnostics. To get symbols valid in an ERROR node, use the /// lookahead iterator on its first leaf node state. For `MISSING` nodes, a /// lookahead iterator created on the previous non-extra leaf node may be /// appropriate. #[doc(alias = "ts_lookahead_iterator_new")] #[must_use] pub fn lookahead_iterator(&self, state: u16) -> Option { let ptr = unsafe { ffi::ts_lookahead_iterator_new(self.0, state) }; (!ptr.is_null()).then(|| unsafe { LookaheadIterator::from_raw(ptr) }) } } impl From for Language { fn from(value: LanguageFn) -> Self { Self::new(value) } } impl Clone for Language { fn clone(&self) -> Self { unsafe { Self(ffi::ts_language_copy(self.0)) } } } impl Drop for Language { fn drop(&mut self) { unsafe { ffi::ts_language_delete(self.0) } } } impl<'a> Deref for LanguageRef<'a> { type Target = Language; fn deref(&self) -> &Self::Target { unsafe { &*(core::ptr::addr_of!(self.0).cast::()) } } } impl Default for Parser { fn default() -> Self { Self::new() } } impl Parser { /// Create a new parser. #[doc(alias = "ts_parser_new")] #[must_use] pub fn new() -> Self { unsafe { let parser = ffi::ts_parser_new(); Self(NonNull::new_unchecked(parser)) } } /// Set the language that the parser should use for parsing. /// /// Returns a Result indicating whether or not the language was successfully /// assigned. True means assignment succeeded. False means there was a /// version mismatch: the language was generated with an incompatible /// version of the Tree-sitter CLI. Check the language's version using /// [`Language::version`] and compare it to this library's /// [`LANGUAGE_VERSION`](LANGUAGE_VERSION) and /// [`MIN_COMPATIBLE_LANGUAGE_VERSION`](MIN_COMPATIBLE_LANGUAGE_VERSION) /// constants. #[doc(alias = "ts_parser_set_language")] pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> { let version = language.version(); if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) { unsafe { ffi::ts_parser_set_language(self.0.as_ptr(), language.0); } Ok(()) } else { Err(LanguageError { version }) } } /// Get the parser's current language. #[doc(alias = "ts_parser_language")] #[must_use] pub fn language(&self) -> Option { let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) }; (!ptr.is_null()).then(|| Language(ptr)) } /// Get the parser's current logger. #[doc(alias = "ts_parser_logger")] #[must_use] pub fn logger(&self) -> Option<&Logger> { let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; unsafe { logger.payload.cast::().as_ref() } } /// Set the logging callback that a parser should use during parsing. #[doc(alias = "ts_parser_set_logger")] pub fn set_logger(&mut self, logger: Option) { let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; if !prev_logger.payload.is_null() { drop(unsafe { Box::from_raw(prev_logger.payload.cast::()) }); } let c_logger; if let Some(logger) = logger { let container = Box::new(logger); unsafe extern "C" fn log( payload: *mut c_void, c_log_type: ffi::TSLogType, c_message: *const c_char, ) { let callback = payload.cast::().as_mut().unwrap(); if let Ok(message) = CStr::from_ptr(c_message).to_str() { let log_type = if c_log_type == ffi::TSLogTypeParse { LogType::Parse } else { LogType::Lex }; callback(log_type, message); } } let raw_container = Box::into_raw(container); c_logger = ffi::TSLogger { payload: raw_container.cast::(), log: Some(log), }; } else { c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None, }; } unsafe { ffi::ts_parser_set_logger(self.0.as_ptr(), c_logger) }; } #[cfg(feature = "capi")] /// Set the destination to which the parser should write debugging graphs /// during parsing. The graphs are formatted in the DOT language. You may /// want to pipe these graphs directly to a `dot(1)` process in order to /// generate SVG output. #[doc(alias = "ts_parser_print_dot_graphs")] #[cfg(not(target_os = "wasi"))] #[cfg(feature = "std")] pub fn print_dot_graphs( &mut self, #[cfg(unix)] file: &impl AsRawFd, #[cfg(windows)] file: &impl AsRawHandle, ) { #[cfg(unix)] { let fd = file.as_raw_fd(); unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(fd)); } } #[cfg(windows)] { let handle = file.as_raw_handle(); unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(handle)); } } } /// Stop the parser from printing debugging graphs while parsing. #[doc(alias = "ts_parser_print_dot_graphs")] pub fn stop_printing_dot_graphs(&mut self) { unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), -1) } } /// Parse a slice of UTF8 text. /// /// # Arguments: /// * `text` The UTF8-encoded text to parse. /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the /// document has changed since `old_tree` was created, then you must edit `old_tree` to match /// the new text using [`Tree::edit`]. /// /// Returns a [`Tree`] if parsing succeeded, or `None` if: /// * The parser has not yet had a language assigned with [`Parser::set_language`] /// * The timeout set with [`Parser::set_timeout_micros`] expired /// * The cancellation flag set with [`Parser::set_cancellation_flag`] was flipped #[doc(alias = "ts_parser_parse")] pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = text.as_ref(); let len = bytes.len(); self.parse_with( &mut |i, _| (i < len).then(|| &bytes[i..]).unwrap_or_default(), old_tree, ) } /// Parse a slice of UTF16 text. /// /// # Arguments: /// * `text` The UTF16-encoded text to parse. /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the /// document has changed since `old_tree` was created, then you must edit `old_tree` to match /// the new text using [`Tree::edit`]. pub fn parse_utf16( &mut self, input: impl AsRef<[u16]>, old_tree: Option<&Tree>, ) -> Option { let code_points = input.as_ref(); let len = code_points.len(); self.parse_utf16_with( &mut |i, _| (i < len).then(|| &code_points[i..]).unwrap_or_default(), old_tree, ) } /// Parse UTF8 text provided in chunks by a callback. /// /// # Arguments: /// * `callback` A function that takes a byte offset and position and returns a slice of /// UTF8-encoded text starting at that byte offset and position. The slices can be of any /// length. If the given position is at the end of the text, the callback should return an /// empty slice. /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the /// document has changed since `old_tree` was created, then you must edit `old_tree` to match /// the new text using [`Tree::edit`]. pub fn parse_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, ) -> Option { // A pointer to this payload is passed on every call to the `read` C function. // The payload contains two things: // 1. A reference to the rust `callback`. // 2. The text that was returned from the previous call to `callback`. This allows the // callback to return owned values like vectors. let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let (callback, text) = payload.cast::<(&mut F, Option)>().as_mut().unwrap(); *text = Some(callback(byte_offset as usize, position.into())); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32; slice.as_ptr().cast::() } let c_input = ffi::TSInput { payload: core::ptr::addr_of_mut!(payload).cast::(), read: Some(read::), encoding: ffi::TSInputEncodingUTF8, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); unsafe { let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input); NonNull::new(c_new_tree).map(Tree) } } /// Parse UTF16 text provided in chunks by a callback. /// /// # Arguments: /// * `callback` A function that takes a code point offset and position and returns a slice of /// UTF16-encoded text starting at that byte offset and position. The slices can be of any /// length. If the given position is at the end of the text, the callback should return an /// empty slice. /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the /// document has changed since `old_tree` was created, then you must edit `old_tree` to match /// the new text using [`Tree::edit`]. pub fn parse_utf16_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, ) -> Option { // A pointer to this payload is passed on every call to the `read` C function. // The payload contains two things: // 1. A reference to the rust `callback`. // 2. The text that was returned from the previous call to `callback`. This allows the // callback to return owned values like vectors. let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let (callback, text) = payload.cast::<(&mut F, Option)>().as_mut().unwrap(); *text = Some(callback( (byte_offset / 2) as usize, Point { row: position.row as usize, column: position.column as usize / 2, }, )); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32 * 2; slice.as_ptr().cast::() } let c_input = ffi::TSInput { payload: core::ptr::addr_of_mut!(payload).cast::(), read: Some(read::), encoding: ffi::TSInputEncodingUTF16, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); unsafe { let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input); NonNull::new(c_new_tree).map(Tree) } } /// Instruct the parser to start the next parse from the beginning. /// /// If the parser previously failed because of a timeout or a cancellation, /// then by default, it will resume where it left off on the next call /// to [`parse`](Parser::parse) or other parsing functions. If you don't /// want to resume, and instead intend to use this parser to parse some /// other document, you must call `reset` first. #[doc(alias = "ts_parser_reset")] pub fn reset(&mut self) { unsafe { ffi::ts_parser_reset(self.0.as_ptr()) } } /// Get the duration in microseconds that parsing is allowed to take. /// /// This is set via [`set_timeout_micros`](Parser::set_timeout_micros). #[doc(alias = "ts_parser_timeout_micros")] #[must_use] pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) } } /// Set the maximum duration in microseconds that parsing should be allowed /// to take before halting. /// /// If parsing takes longer than this, it will halt early, returning `None`. /// See [`parse`](Parser::parse) for more information. #[doc(alias = "ts_parser_set_timeout_micros")] pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) } } /// Set the ranges of text that the parser should include when parsing. /// /// By default, the parser will always include entire documents. This /// function allows you to parse only a *portion* of a document but /// still return a syntax tree whose ranges match up with the document /// as a whole. You can also pass multiple disjoint ranges. /// /// If `ranges` is empty, then the entire document will be parsed. /// Otherwise, the given ranges must be ordered from earliest to latest /// in the document, and they must not overlap. That is, the following /// must hold for all `i` < `length - 1`: /// ```text /// ranges[i].end_byte <= ranges[i + 1].start_byte /// ``` /// If this requirement is not satisfied, method will return /// [`IncludedRangesError`] error with an offset in the passed ranges /// slice pointing to a first incorrect range. #[doc(alias = "ts_parser_set_included_ranges")] pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), IncludedRangesError> { let ts_ranges = ranges.iter().copied().map(Into::into).collect::>(); let result = unsafe { ffi::ts_parser_set_included_ranges( self.0.as_ptr(), ts_ranges.as_ptr(), ts_ranges.len() as u32, ) }; if result { Ok(()) } else { let mut prev_end_byte = 0; for (i, range) in ranges.iter().enumerate() { if range.start_byte < prev_end_byte || range.end_byte < range.start_byte { return Err(IncludedRangesError(i)); } prev_end_byte = range.end_byte; } Err(IncludedRangesError(0)) } } /// Get the ranges of text that the parser will include when parsing. #[doc(alias = "ts_parser_included_ranges")] #[must_use] pub fn included_ranges(&self) -> Vec { let mut count = 0u32; unsafe { let ptr = ffi::ts_parser_included_ranges(self.0.as_ptr(), core::ptr::addr_of_mut!(count)); let ranges = slice::from_raw_parts(ptr, count as usize); let result = ranges.iter().copied().map(Into::into).collect(); result } } /// Get the parser's current cancellation flag pointer. /// /// # Safety /// /// It uses FFI #[doc(alias = "ts_parser_cancellation_flag")] #[must_use] pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> { ffi::ts_parser_cancellation_flag(self.0.as_ptr()) .cast::() .as_ref() } /// Set the parser's current cancellation flag pointer. /// /// If a pointer is assigned, then the parser will periodically read from /// this pointer during parsing. If it reads a non-zero value, it will halt /// early, returning `None`. See [`parse`](Parser::parse) for more /// information. /// /// # Safety /// /// It uses FFI #[doc(alias = "ts_parser_set_cancellation_flag")] pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { ffi::ts_parser_set_cancellation_flag( self.0.as_ptr(), (flag as *const AtomicUsize).cast::(), ); } else { ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null()); } } } impl Drop for Parser { fn drop(&mut self) { self.stop_printing_dot_graphs(); self.set_logger(None); unsafe { ffi::ts_parser_delete(self.0.as_ptr()) } } } impl Tree { /// Get the root node of the syntax tree. #[doc(alias = "ts_tree_root_node")] #[must_use] pub fn root_node(&self) -> Node { Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap() } /// Get the root node of the syntax tree, but with its position shifted /// forward by the given offset. #[doc(alias = "ts_tree_root_node_with_offset")] #[must_use] pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node { Node::new(unsafe { ffi::ts_tree_root_node_with_offset( self.0.as_ptr(), offset_bytes as u32, offset_extent.into(), ) }) .unwrap() } /// Get the language that was used to parse the syntax tree. #[doc(alias = "ts_tree_language")] #[must_use] pub fn language(&self) -> LanguageRef { LanguageRef( unsafe { ffi::ts_tree_language(self.0.as_ptr()) }, PhantomData, ) } /// Edit the syntax tree to keep it in sync with source code that has been /// edited. /// /// You must describe the edit both in terms of byte offsets and in terms of /// row/column coordinates. #[doc(alias = "ts_tree_edit")] pub fn edit(&mut self, edit: &InputEdit) { let edit = edit.into(); unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) }; } /// Create a new [`TreeCursor`] starting from the root of the tree. #[must_use] pub fn walk(&self) -> TreeCursor { self.root_node().walk() } /// Compare this old edited syntax tree to a new syntax tree representing /// the same document, returning a sequence of ranges whose syntactic /// structure has changed. /// /// For this to work correctly, this syntax tree must have been edited such /// that its ranges match up to the new tree. Generally, you'll want to /// call this method right after calling one of the [`Parser::parse`] /// functions. Call it on the old tree that was passed to parse, and /// pass the new tree that was returned from `parse`. #[doc(alias = "ts_tree_get_changed_ranges")] #[must_use] pub fn changed_ranges(&self, other: &Self) -> impl ExactSizeIterator { let mut count = 0u32; unsafe { let ptr = ffi::ts_tree_get_changed_ranges( self.0.as_ptr(), other.0.as_ptr(), core::ptr::addr_of_mut!(count), ); util::CBufferIter::new(ptr, count as usize).map(Into::into) } } /// Get the included ranges that were used to parse the syntax tree. #[doc(alias = "ts_tree_included_ranges")] #[must_use] pub fn included_ranges(&self) -> Vec { let mut count = 0u32; unsafe { let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), core::ptr::addr_of_mut!(count)); let ranges = slice::from_raw_parts(ptr, count as usize); let result = ranges.iter().copied().map(Into::into).collect(); ffi::ts_free(ptr.cast::()); result } } #[cfg(feature = "capi")] /// Print a graph of the tree to the given file descriptor. /// The graph is formatted in the DOT language. You may want to pipe this /// graph directly to a `dot(1)` process in order to generate SVG /// output. #[doc(alias = "ts_tree_print_dot_graph")] #[cfg(not(target_os = "wasi"))] #[cfg(feature = "std")] pub fn print_dot_graph( &self, #[cfg(unix)] file: &impl AsRawFd, #[cfg(windows)] file: &impl AsRawHandle, ) { #[cfg(unix)] { let fd = file.as_raw_fd(); unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } } #[cfg(windows)] { let handle = file.as_raw_handle(); unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), handle as i32) } } } } impl fmt::Debug for Tree { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{{Tree {:?}}}", self.root_node()) } } impl Drop for Tree { fn drop(&mut self) { unsafe { ffi::ts_tree_delete(self.0.as_ptr()) } } } impl Clone for Tree { fn clone(&self) -> Self { unsafe { Self(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } } } impl<'tree> Node<'tree> { fn new(node: ffi::TSNode) -> Option { (!node.id.is_null()).then_some(Node(node, PhantomData)) } /// Get a numeric id for this node that is unique. /// /// Within a given syntax tree, no two nodes have the same id. However, if /// a new tree is created based on an older tree, and a node from the old /// tree is reused in the process, then that node will have the same id in /// both trees. #[must_use] pub fn id(&self) -> usize { self.0.id as usize } /// Get this node's type as a numerical id. #[doc(alias = "ts_node_symbol")] #[must_use] pub fn kind_id(&self) -> u16 { unsafe { ffi::ts_node_symbol(self.0) } } /// Get the node's type as a numerical id as it appears in the grammar /// ignoring aliases. #[doc(alias = "ts_node_grammar_symbol")] #[must_use] pub fn grammar_id(&self) -> u16 { unsafe { ffi::ts_node_grammar_symbol(self.0) } } /// Get this node's type as a string. #[doc(alias = "ts_node_type")] #[must_use] pub fn kind(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } .to_str() .unwrap() } /// Get this node's symbol name as it appears in the grammar ignoring /// aliases as a string. #[doc(alias = "ts_node_grammar_type")] #[must_use] pub fn grammar_name(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_grammar_type(self.0)) } .to_str() .unwrap() } /// Get the [`Language`] that was used to parse this node's syntax tree. #[doc(alias = "ts_node_language")] #[must_use] pub fn language(&self) -> LanguageRef { LanguageRef(unsafe { ffi::ts_node_language(self.0) }, PhantomData) } /// Check if this node is *named*. /// /// Named nodes correspond to named rules in the grammar, whereas /// *anonymous* nodes correspond to string literals in the grammar. #[doc(alias = "ts_node_is_named")] #[must_use] pub fn is_named(&self) -> bool { unsafe { ffi::ts_node_is_named(self.0) } } /// Check if this node is *extra*. /// /// Extra nodes represent things like comments, which are not required the /// grammar, but can appear anywhere. #[doc(alias = "ts_node_is_extra")] #[must_use] pub fn is_extra(&self) -> bool { unsafe { ffi::ts_node_is_extra(self.0) } } /// Check if this node has been edited. #[doc(alias = "ts_node_has_changes")] #[must_use] pub fn has_changes(&self) -> bool { unsafe { ffi::ts_node_has_changes(self.0) } } /// Check if this node represents a syntax error or contains any syntax /// errors anywhere within it. #[doc(alias = "ts_node_has_error")] #[must_use] pub fn has_error(&self) -> bool { unsafe { ffi::ts_node_has_error(self.0) } } /// Check if this node represents a syntax error. /// /// Syntax errors represent parts of the code that could not be incorporated /// into a valid syntax tree. #[doc(alias = "ts_node_is_error")] #[must_use] pub fn is_error(&self) -> bool { unsafe { ffi::ts_node_is_error(self.0) } } /// Get this node's parse state. #[doc(alias = "ts_node_parse_state")] #[must_use] pub fn parse_state(&self) -> u16 { unsafe { ffi::ts_node_parse_state(self.0) } } /// Get the parse state after this node. #[doc(alias = "ts_node_next_parse_state")] #[must_use] pub fn next_parse_state(&self) -> u16 { unsafe { ffi::ts_node_next_parse_state(self.0) } } /// Check if this node is *missing*. /// /// Missing nodes are inserted by the parser in order to recover from /// certain kinds of syntax errors. #[doc(alias = "ts_node_is_missing")] #[must_use] pub fn is_missing(&self) -> bool { unsafe { ffi::ts_node_is_missing(self.0) } } /// Get the byte offsets where this node starts. #[doc(alias = "ts_node_start_byte")] #[must_use] pub fn start_byte(&self) -> usize { unsafe { ffi::ts_node_start_byte(self.0) as usize } } /// Get the byte offsets where this node end. #[doc(alias = "ts_node_end_byte")] #[must_use] pub fn end_byte(&self) -> usize { unsafe { ffi::ts_node_end_byte(self.0) as usize } } /// Get the byte range of source code that this node represents. #[must_use] pub fn byte_range(&self) -> core::ops::Range { self.start_byte()..self.end_byte() } /// Get the range of source code that this node represents, both in terms of /// raw bytes and of row/column coordinates. #[must_use] pub fn range(&self) -> Range { Range { start_byte: self.start_byte(), end_byte: self.end_byte(), start_point: self.start_position(), end_point: self.end_position(), } } /// Get this node's start position in terms of rows and columns. #[doc(alias = "ts_node_start_point")] #[must_use] pub fn start_position(&self) -> Point { let result = unsafe { ffi::ts_node_start_point(self.0) }; result.into() } /// Get this node's end position in terms of rows and columns. #[doc(alias = "ts_node_end_point")] #[must_use] pub fn end_position(&self) -> Point { let result = unsafe { ffi::ts_node_end_point(self.0) }; result.into() } /// Get the node's child at the given index, where zero represents the first /// child. /// /// This method is fairly fast, but its cost is technically log(i), so if /// you might be iterating over a long list of children, you should use /// [`Node::children`] instead. #[doc(alias = "ts_node_child")] #[must_use] pub fn child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } /// Get this node's number of children. #[doc(alias = "ts_node_child_count")] #[must_use] pub fn child_count(&self) -> usize { unsafe { ffi::ts_node_child_count(self.0) as usize } } /// Get this node's *named* child at the given index. /// /// See also [`Node::is_named`]. /// This method is fairly fast, but its cost is technically log(i), so if /// you might be iterating over a long list of children, you should use /// [`Node::named_children`] instead. #[doc(alias = "ts_node_named_child")] #[must_use] pub fn named_child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) } /// Get this node's number of *named* children. /// /// See also [`Node::is_named`]. #[doc(alias = "ts_node_named_child_count")] #[must_use] pub fn named_child_count(&self) -> usize { unsafe { ffi::ts_node_named_child_count(self.0) as usize } } /// Get the first child with the given field name. /// /// If multiple children may have the same field name, access them using /// [`children_by_field_name`](Node::children_by_field_name) #[doc(alias = "ts_node_child_by_field_name")] #[must_use] pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); Self::new(unsafe { ffi::ts_node_child_by_field_name( self.0, field_name.as_ptr().cast::(), field_name.len() as u32, ) }) } /// Get this node's child with the given numerical field id. /// /// See also [`child_by_field_name`](Node::child_by_field_name). You can /// convert a field name to an id using [`Language::field_id_for_name`]. #[doc(alias = "ts_node_child_by_field_id")] #[must_use] pub fn child_by_field_id(&self, field_id: u16) -> Option { Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) } /// Get the field name of this node's child at the given index. #[doc(alias = "ts_node_field_name_for_child")] #[must_use] pub fn field_name_for_child(&self, child_index: u32) -> Option<&'static str> { unsafe { let ptr = ffi::ts_node_field_name_for_child(self.0, child_index); (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } /// Get the field name of this node's named child at the given index. pub fn field_name_for_named_child(&self, named_child_index: u32) -> Option<&'static str> { unsafe { let ptr = ffi::ts_node_field_name_for_named_child(self.0, named_child_index); (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } /// Iterate over this node's children. /// /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid /// unnecessary allocations, you should reuse the same cursor for /// subsequent calls to this method. /// /// If you're walking the tree recursively, you may want to use the /// [`TreeCursor`] APIs directly instead. pub fn children<'cursor>( &self, cursor: &'cursor mut TreeCursor<'tree>, ) -> impl ExactSizeIterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); (0..self.child_count()).map(move |_| { let result = cursor.node(); cursor.goto_next_sibling(); result }) } /// Iterate over this node's named children. /// /// See also [`Node::children`]. pub fn named_children<'cursor>( &self, cursor: &'cursor mut TreeCursor<'tree>, ) -> impl ExactSizeIterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); (0..self.named_child_count()).map(move |_| { while !cursor.node().is_named() { if !cursor.goto_next_sibling() { break; } } let result = cursor.node(); cursor.goto_next_sibling(); result }) } /// Iterate over this node's children with a given field name. /// /// See also [`Node::children`]. pub fn children_by_field_name<'cursor>( &self, field_name: &str, cursor: &'cursor mut TreeCursor<'tree>, ) -> impl Iterator> + 'cursor { let field_id = self.language().field_id_for_name(field_name); let mut done = field_id.is_none(); if !done { cursor.reset(*self); cursor.goto_first_child(); } iter::from_fn(move || { if !done { while cursor.field_id() != field_id { if !cursor.goto_next_sibling() { return None; } } let result = cursor.node(); if !cursor.goto_next_sibling() { done = true; } return Some(result); } None }) } /// Iterate over this node's children with a given field id. /// /// See also [`Node::children_by_field_name`]. pub fn children_by_field_id<'cursor>( &self, field_id: FieldId, cursor: &'cursor mut TreeCursor<'tree>, ) -> impl Iterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); let mut done = false; iter::from_fn(move || { if !done { while cursor.field_id() != Some(field_id) { if !cursor.goto_next_sibling() { return None; } } let result = cursor.node(); if !cursor.goto_next_sibling() { done = true; } return Some(result); } None }) } /// Get this node's immediate parent. /// Prefer [`child_containing_descendant`](Node::child_containing_descendant) /// for iterating over this node's ancestors. #[doc(alias = "ts_node_parent")] #[must_use] pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } /// Get this node's child containing `descendant`. This will not return /// the descendant if it is a direct child of `self`, for that use /// [`Node::child_contains_descendant`]. #[doc(alias = "ts_node_child_containing_descendant")] #[must_use] #[deprecated(since = "0.24.0", note = "Prefer child_with_descendant instead")] pub fn child_containing_descendant(&self, descendant: Self) -> Option { Self::new(unsafe { ffi::ts_node_child_containing_descendant(self.0, descendant.0) }) } /// Get the node that contains `descendant`. /// /// Note that this can return `descendant` itself, unlike the deprecated function /// [`Node::child_containing_descendant`]. #[doc(alias = "ts_node_child_with_descendant")] #[must_use] pub fn child_with_descendant(&self, descendant: Self) -> Option { Self::new(unsafe { ffi::ts_node_child_with_descendant(self.0, descendant.0) }) } /// Get this node's next sibling. #[doc(alias = "ts_node_next_sibling")] #[must_use] pub fn next_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) } /// Get this node's previous sibling. #[doc(alias = "ts_node_prev_sibling")] #[must_use] pub fn prev_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) } /// Get this node's next named sibling. #[doc(alias = "ts_node_next_named_sibling")] #[must_use] pub fn next_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) } /// Get this node's previous named sibling. #[doc(alias = "ts_node_prev_named_sibling")] #[must_use] pub fn prev_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) } /// Get the node's first child that extends beyond the given byte offset. #[doc(alias = "ts_node_first_child_for_byte")] #[must_use] pub fn first_child_for_byte(&self, byte: usize) -> Option { Self::new(unsafe { ffi::ts_node_first_child_for_byte(self.0, byte as u32) }) } /// Get the node's first named child that extends beyond the given byte offset. #[doc(alias = "ts_node_first_named_child_for_point")] #[must_use] pub fn first_named_child_for_byte(&self, byte: usize) -> Option { Self::new(unsafe { ffi::ts_node_first_named_child_for_byte(self.0, byte as u32) }) } /// Get the node's number of descendants, including one for the node itself. #[doc(alias = "ts_node_descendant_count")] #[must_use] pub fn descendant_count(&self) -> usize { unsafe { ffi::ts_node_descendant_count(self.0) as usize } } /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_byte_range")] #[must_use] pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32) }) } /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_byte_range")] #[must_use] pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32) }) } /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_point_range")] #[must_use] pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into()) }) } /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_point_range")] #[must_use] pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into()) }) } #[doc(alias = "ts_node_string")] #[must_use] pub fn to_sexp(&self) -> String { let c_string = unsafe { ffi::ts_node_string(self.0) }; let result = unsafe { CStr::from_ptr(c_string) } .to_str() .unwrap() .to_string(); unsafe { ffi::ts_free(c_string.cast::()) }; result } pub fn utf8_text<'a>(&self, source: &'a [u8]) -> Result<&'a str, str::Utf8Error> { str::from_utf8(&source[self.start_byte()..self.end_byte()]) } #[must_use] pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { &source[self.start_byte()..self.end_byte()] } /// Create a new [`TreeCursor`] starting from this node. #[doc(alias = "ts_tree_cursor_new")] #[must_use] pub fn walk(&self) -> TreeCursor<'tree> { TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) } /// Edit this node to keep it in-sync with source code that has been edited. /// /// This function is only rarely needed. When you edit a syntax tree with /// the [`Tree::edit`] method, all of the nodes that you retrieve from /// the tree afterward will already reflect the edit. You only need to /// use [`Node::edit`] when you have a specific [`Node`] instance that /// you want to keep and continue to use after an edit. #[doc(alias = "ts_node_edit")] pub fn edit(&mut self, edit: &InputEdit) { let edit = edit.into(); unsafe { ffi::ts_node_edit(core::ptr::addr_of_mut!(self.0), &edit) } } } impl PartialEq for Node<'_> { fn eq(&self, other: &Self) -> bool { self.0.id == other.0.id } } impl Eq for Node<'_> {} impl hash::Hash for Node<'_> { fn hash(&self, state: &mut H) { self.0.id.hash(state); self.0.context[0].hash(state); self.0.context[1].hash(state); self.0.context[2].hash(state); self.0.context[3].hash(state); } } impl fmt::Debug for Node<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "{{Node {} {} - {}}}", self.kind(), self.start_position(), self.end_position() ) } } impl fmt::Display for Node<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let sexp = self.to_sexp(); if sexp.is_empty() { write!(f, "") } else if !f.alternate() { write!(f, "{sexp}") } else { write!(f, "{}", format_sexp(&sexp, f.width().unwrap_or(0))) } } } impl<'cursor> TreeCursor<'cursor> { /// Get the tree cursor's current [`Node`]. #[doc(alias = "ts_tree_cursor_current_node")] #[must_use] pub fn node(&self) -> Node<'cursor> { Node( unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, PhantomData, ) } /// Get the numerical field id of this tree cursor's current node. /// /// See also [`field_name`](TreeCursor::field_name). #[doc(alias = "ts_tree_cursor_current_field_id")] #[must_use] pub fn field_id(&self) -> Option { let id = unsafe { ffi::ts_tree_cursor_current_field_id(&self.0) }; FieldId::new(id) } /// Get the field name of this tree cursor's current node. #[doc(alias = "ts_tree_cursor_current_field_name")] #[must_use] pub fn field_name(&self) -> Option<&'static str> { unsafe { let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } /// Get the depth of the cursor's current node relative to the original /// node that the cursor was constructed with. #[doc(alias = "ts_tree_cursor_current_depth")] #[must_use] pub fn depth(&self) -> u32 { unsafe { ffi::ts_tree_cursor_current_depth(&self.0) } } /// Get the index of the cursor's current node out of all of the /// descendants of the original node that the cursor was constructed with #[doc(alias = "ts_tree_cursor_current_descendant_index")] #[must_use] pub fn descendant_index(&self) -> usize { unsafe { ffi::ts_tree_cursor_current_descendant_index(&self.0) as usize } } /// Move this cursor to the first child of its current node. /// /// This returns `true` if the cursor successfully moved, and returns /// `false` if there were no children. #[doc(alias = "ts_tree_cursor_goto_first_child")] pub fn goto_first_child(&mut self) -> bool { unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) } } /// Move this cursor to the last child of its current node. /// /// This returns `true` if the cursor successfully moved, and returns /// `false` if there were no children. /// /// Note that this function may be slower than /// [`goto_first_child`](TreeCursor::goto_first_child) because it needs to /// iterate through all the children to compute the child's position. #[doc(alias = "ts_tree_cursor_goto_last_child")] pub fn goto_last_child(&mut self) -> bool { unsafe { ffi::ts_tree_cursor_goto_last_child(&mut self.0) } } /// Move this cursor to the parent of its current node. /// /// This returns `true` if the cursor successfully moved, and returns /// `false` if there was no parent node (the cursor was already on the /// root node). #[doc(alias = "ts_tree_cursor_goto_parent")] pub fn goto_parent(&mut self) -> bool { unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) } } /// Move this cursor to the next sibling of its current node. /// /// This returns `true` if the cursor successfully moved, and returns /// `false` if there was no next sibling node. #[doc(alias = "ts_tree_cursor_goto_next_sibling")] pub fn goto_next_sibling(&mut self) -> bool { unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) } } /// Move the cursor to the node that is the nth descendant of /// the original node that the cursor was constructed with, where /// zero represents the original node itself. #[doc(alias = "ts_tree_cursor_goto_descendant")] pub fn goto_descendant(&mut self, descendant_index: usize) { unsafe { ffi::ts_tree_cursor_goto_descendant(&mut self.0, descendant_index as u32) } } /// Move this cursor to the previous sibling of its current node. /// /// This returns `true` if the cursor successfully moved, and returns /// `false` if there was no previous sibling node. /// /// Note, that this function may be slower than /// [`goto_next_sibling`](TreeCursor::goto_next_sibling) due to how node /// positions are stored. In the worst case, this will need to iterate /// through all the children upto the previous sibling node to recalculate /// its position. #[doc(alias = "ts_tree_cursor_goto_previous_sibling")] pub fn goto_previous_sibling(&mut self) -> bool { unsafe { ffi::ts_tree_cursor_goto_previous_sibling(&mut self.0) } } /// Move this cursor to the first child of its current node that extends /// beyond the given byte offset. /// /// This returns the index of the child node if one was found, and returns /// `None` if no such child was found. #[doc(alias = "ts_tree_cursor_goto_first_child_for_byte")] pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; (result >= 0).then_some(result as usize) } /// Move this cursor to the first child of its current node that extends /// beyond the given byte offset. /// /// This returns the index of the child node if one was found, and returns /// `None` if no such child was found. #[doc(alias = "ts_tree_cursor_goto_first_child_for_point")] pub fn goto_first_child_for_point(&mut self, point: Point) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_point(&mut self.0, point.into()) }; (result >= 0).then_some(result as usize) } /// Re-initialize this tree cursor to start at the original node that the /// cursor was constructed with. #[doc(alias = "ts_tree_cursor_reset")] pub fn reset(&mut self, node: Node<'cursor>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } /// Re-initialize a tree cursor to the same position as another cursor. /// /// Unlike [`reset`](TreeCursor::reset), this will not lose parent /// information and allows reusing already created cursors. #[doc(alias = "ts_tree_cursor_reset_to")] pub fn reset_to(&mut self, cursor: &Self) { unsafe { ffi::ts_tree_cursor_reset_to(&mut self.0, &cursor.0) }; } } impl Clone for TreeCursor<'_> { fn clone(&self) -> Self { TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) } } impl Drop for TreeCursor<'_> { fn drop(&mut self) { unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } } } impl LookaheadIterator { /// Get the current language of the lookahead iterator. #[doc(alias = "ts_lookahead_iterator_language")] #[must_use] pub fn language(&self) -> LanguageRef<'_> { LanguageRef( unsafe { ffi::ts_lookahead_iterator_language(self.0.as_ptr()) }, PhantomData, ) } /// Get the current symbol of the lookahead iterator. #[doc(alias = "ts_lookahead_iterator_current_symbol")] #[must_use] pub fn current_symbol(&self) -> u16 { unsafe { ffi::ts_lookahead_iterator_current_symbol(self.0.as_ptr()) } } /// Get the current symbol name of the lookahead iterator. #[doc(alias = "ts_lookahead_iterator_current_symbol_name")] #[must_use] pub fn current_symbol_name(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_lookahead_iterator_current_symbol_name( self.0.as_ptr(), )) .to_str() .unwrap() } } /// Reset the lookahead iterator. /// /// This returns `true` if the language was set successfully and `false` /// otherwise. #[doc(alias = "ts_lookahead_iterator_reset")] pub fn reset(&mut self, language: &Language, state: u16) -> bool { unsafe { ffi::ts_lookahead_iterator_reset(self.0.as_ptr(), language.0, state) } } /// Reset the lookahead iterator to another state. /// /// This returns `true` if the iterator was reset to the given state and /// `false` otherwise. #[doc(alias = "ts_lookahead_iterator_reset_state")] pub fn reset_state(&mut self, state: u16) -> bool { unsafe { ffi::ts_lookahead_iterator_reset_state(self.0.as_ptr(), state) } } /// Iterate symbol names. pub fn iter_names(&mut self) -> impl Iterator + '_ { LookaheadNamesIterator(self) } } impl Iterator for LookaheadNamesIterator<'_> { type Item = &'static str; #[doc(alias = "ts_lookahead_iterator_next")] fn next(&mut self) -> Option { unsafe { ffi::ts_lookahead_iterator_next(self.0 .0.as_ptr()) } .then(|| self.0.current_symbol_name()) } } impl Iterator for LookaheadIterator { type Item = u16; #[doc(alias = "ts_lookahead_iterator_next")] fn next(&mut self) -> Option { // the first symbol is always `0` so we can safely skip it unsafe { ffi::ts_lookahead_iterator_next(self.0.as_ptr()) }.then(|| self.current_symbol()) } } impl Drop for LookaheadIterator { #[doc(alias = "ts_lookahead_iterator_delete")] fn drop(&mut self) { unsafe { ffi::ts_lookahead_iterator_delete(self.0.as_ptr()) } } } impl Query { /// Create a new query from a string containing one or more S-expression /// patterns. /// /// The query is associated with a particular language, and can only be run /// on syntax nodes parsed with that language. References to Queries can be /// shared between multiple threads. pub fn new(language: &Language, source: &str) -> Result { let mut error_offset = 0u32; let mut error_type: ffi::TSQueryError = 0; let bytes = source.as_bytes(); // Compile the query. let ptr = unsafe { ffi::ts_query_new( language.0, bytes.as_ptr().cast::(), bytes.len() as u32, core::ptr::addr_of_mut!(error_offset), core::ptr::addr_of_mut!(error_type), ) }; // On failure, build an error based on the error code and offset. if ptr.is_null() { if error_type == ffi::TSQueryErrorLanguage { return Err(QueryError { row: 0, column: 0, offset: 0, message: LanguageError { version: language.version(), } .to_string(), kind: QueryErrorKind::Language, }); } let offset = error_offset as usize; let mut line_start = 0; let mut row = 0; let mut line_containing_error = None; for line in source.lines() { let line_end = line_start + line.len() + 1; if line_end > offset { line_containing_error = Some(line); break; } line_start = line_end; row += 1; } let column = offset - line_start; let kind; let message; match error_type { // Error types that report names ffi::TSQueryErrorNodeType | ffi::TSQueryErrorField | ffi::TSQueryErrorCapture => { let suffix = source.split_at(offset).1; let end_offset = suffix .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') .unwrap_or(suffix.len()); message = suffix.split_at(end_offset).0.to_string(); kind = match error_type { ffi::TSQueryErrorNodeType => QueryErrorKind::NodeType, ffi::TSQueryErrorField => QueryErrorKind::Field, ffi::TSQueryErrorCapture => QueryErrorKind::Capture, _ => unreachable!(), }; } // Error types that report positions _ => { message = line_containing_error.map_or_else( || "Unexpected EOF".to_string(), |line| line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^", ); kind = match error_type { ffi::TSQueryErrorStructure => QueryErrorKind::Structure, _ => QueryErrorKind::Syntax, }; } }; return Err(QueryError { row, column, offset, message, kind, }); } unsafe { Self::from_raw_parts(ptr, source) } } #[doc(hidden)] unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { let ptr = { struct TSQueryDrop(*mut ffi::TSQuery); impl Drop for TSQueryDrop { fn drop(&mut self) { unsafe { ffi::ts_query_delete(self.0) } } } TSQueryDrop(ptr) }; let string_count = unsafe { ffi::ts_query_string_count(ptr.0) }; let capture_count = unsafe { ffi::ts_query_capture_count(ptr.0) }; let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr.0) as usize }; let mut capture_names = Vec::with_capacity(capture_count as usize); let mut capture_quantifiers_vec = Vec::with_capacity(pattern_count as usize); let mut text_predicates_vec = Vec::with_capacity(pattern_count); let mut property_predicates_vec = Vec::with_capacity(pattern_count); let mut property_settings_vec = Vec::with_capacity(pattern_count); let mut general_predicates_vec = Vec::with_capacity(pattern_count); // Build a vector of strings to store the capture names. for i in 0..capture_count { unsafe { let mut length = 0u32; let name = ffi::ts_query_capture_name_for_id(ptr.0, i, core::ptr::addr_of_mut!(length)) .cast::(); let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); capture_names.push(name); } } // Build a vector to store capture qunatifiers. for i in 0..pattern_count { let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); for j in 0..capture_count { unsafe { let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr.0, i as u32, j); capture_quantifiers.push(quantifier.into()); } } capture_quantifiers_vec.push(capture_quantifiers.into()); } // Build a vector of strings to represent literal values used in predicates. let string_values = (0..string_count) .map(|i| unsafe { let mut length = 0u32; let value = ffi::ts_query_string_value_for_id(ptr.0, i, core::ptr::addr_of_mut!(length)) .cast::(); let value = slice::from_raw_parts(value, length as usize); let value = str::from_utf8_unchecked(value); value }) .collect::>(); // Build a vector of predicates for each pattern. for i in 0..pattern_count { let predicate_steps = unsafe { let mut length = 0u32; let raw_predicates = ffi::ts_query_predicates_for_pattern( ptr.0, i as u32, core::ptr::addr_of_mut!(length), ); (length > 0) .then(|| slice::from_raw_parts(raw_predicates, length as usize)) .unwrap_or_default() }; let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr.0, i as u32) }; let row = source .char_indices() .take_while(|(i, _)| *i < byte_offset as usize) .filter(|(_, c)| *c == '\n') .count(); use ffi::TSQueryPredicateStepType as T; const TYPE_DONE: T = ffi::TSQueryPredicateStepTypeDone; const TYPE_CAPTURE: T = ffi::TSQueryPredicateStepTypeCapture; const TYPE_STRING: T = ffi::TSQueryPredicateStepTypeString; let mut text_predicates = Vec::new(); let mut property_predicates = Vec::new(); let mut property_settings = Vec::new(); let mut general_predicates = Vec::new(); for p in predicate_steps.split(|s| s.type_ == TYPE_DONE) { if p.is_empty() { continue; } if p[0].type_ != TYPE_STRING { return Err(predicate_error( row, format!( "Expected predicate to start with a function name. Got @{}.", capture_names[p[0].value_id as usize], ), )); } // Build a predicate for each of the known predicate function names. let operator_name = string_values[p[0].value_id as usize]; match operator_name { "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => { if p.len() != 3 { return Err(predicate_error( row, format!( "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", p.len() - 1 ), )); } if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } let is_positive = operator_name == "eq?" || operator_name == "any-eq?"; let match_all = match operator_name { "eq?" | "not-eq?" => true, "any-eq?" | "any-not-eq?" => false, _ => unreachable!(), }; text_predicates.push(if p[2].type_ == TYPE_CAPTURE { TextPredicateCapture::EqCapture( p[1].value_id, p[2].value_id, is_positive, match_all, ) } else { TextPredicateCapture::EqString( p[1].value_id, string_values[p[2].value_id as usize].to_string().into(), is_positive, match_all, ) }); } "match?" | "not-match?" | "any-match?" | "any-not-match?" => { if p.len() != 3 { return Err(predicate_error(row, format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } if p[2].type_ == TYPE_CAPTURE { return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", capture_names[p[2].value_id as usize], ))); } let is_positive = operator_name == "match?" || operator_name == "any-match?"; let match_all = match operator_name { "match?" | "not-match?" => true, "any-match?" | "any-not-match?" => false, _ => unreachable!(), }; let regex = &string_values[p[2].value_id as usize]; text_predicates.push(TextPredicateCapture::MatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { predicate_error(row, format!("Invalid regex '{regex}'")) })?, is_positive, match_all, )); } "set!" => property_settings.push(Self::parse_property( row, operator_name, &capture_names, &string_values, &p[1..], )?), "is?" | "is-not?" => property_predicates.push(( Self::parse_property( row, operator_name, &capture_names, &string_values, &p[1..], )?, operator_name == "is?", )), "any-of?" | "not-any-of?" => { if p.len() < 2 { return Err(predicate_error(row, format!( "Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.", p.len() - 1 ))); } if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #any-of? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } let is_positive = operator_name == "any-of?"; let mut values = Vec::new(); for arg in &p[2..] { if arg.type_ == TYPE_CAPTURE { return Err(predicate_error(row, format!( "Arguments to #any-of? predicate must be literals. Got capture @{}.", capture_names[arg.value_id as usize], ))); } values.push(string_values[arg.value_id as usize]); } text_predicates.push(TextPredicateCapture::AnyString( p[1].value_id, values .iter() .map(|x| (*x).to_string().into()) .collect::>() .into(), is_positive, )); } _ => general_predicates.push(QueryPredicate { operator: operator_name.to_string().into(), args: p[1..] .iter() .map(|a| { if a.type_ == TYPE_CAPTURE { QueryPredicateArg::Capture(a.value_id) } else { QueryPredicateArg::String( string_values[a.value_id as usize].to_string().into(), ) } }) .collect(), }), } } text_predicates_vec.push(text_predicates.into()); property_predicates_vec.push(property_predicates.into()); property_settings_vec.push(property_settings.into()); general_predicates_vec.push(general_predicates.into()); } let result = Self { ptr: unsafe { NonNull::new_unchecked(ptr.0) }, capture_names: capture_names.into(), capture_quantifiers: capture_quantifiers_vec.into(), text_predicates: text_predicates_vec.into(), property_predicates: property_predicates_vec.into(), property_settings: property_settings_vec.into(), general_predicates: general_predicates_vec.into(), }; core::mem::forget(ptr); Ok(result) } /// Get the byte offset where the given pattern starts in the query's /// source. #[doc(alias = "ts_query_start_byte_for_pattern")] #[must_use] pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize { assert!( pattern_index < self.text_predicates.len(), "Pattern index is {pattern_index} but the pattern count is {}", self.text_predicates.len(), ); unsafe { ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize } } /// Get the byte offset where the given pattern ends in the query's /// source. #[doc(alias = "ts_query_end_byte_for_pattern")] #[must_use] pub fn end_byte_for_pattern(&self, pattern_index: usize) -> usize { assert!( pattern_index < self.text_predicates.len(), "Pattern index is {pattern_index} but the pattern count is {}", self.text_predicates.len(), ); unsafe { ffi::ts_query_end_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize } } /// Get the number of patterns in the query. #[doc(alias = "ts_query_pattern_count")] #[must_use] pub fn pattern_count(&self) -> usize { unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize } } /// Get the names of the captures used in the query. #[must_use] pub const fn capture_names(&self) -> &[&str] { &self.capture_names } /// Get the quantifiers of the captures used in the query. #[must_use] pub const fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { &self.capture_quantifiers[index] } /// Get the index for a given capture name. #[must_use] pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names .iter() .position(|n| *n == name) .map(|ix| ix as u32) } /// Get the properties that are checked for the given pattern index. /// /// This includes predicates with the operators `is?` and `is-not?`. #[must_use] pub const fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { &self.property_predicates[index] } /// Get the properties that are set for the given pattern index. /// /// This includes predicates with the operator `set!`. #[must_use] pub const fn property_settings(&self, index: usize) -> &[QueryProperty] { &self.property_settings[index] } /// Get the other user-defined predicates associated with the given index. /// /// This includes predicate with operators other than: /// * `match?` /// * `eq?` and `not-eq?` /// * `is?` and `is-not?` /// * `set!` #[must_use] pub const fn general_predicates(&self, index: usize) -> &[QueryPredicate] { &self.general_predicates[index] } /// Disable a certain capture within a query. /// /// This prevents the capture from being returned in matches, and also /// avoids any resource usage associated with recording the capture. #[doc(alias = "ts_query_disable_capture")] pub fn disable_capture(&mut self, name: &str) { unsafe { ffi::ts_query_disable_capture( self.ptr.as_ptr(), name.as_bytes().as_ptr().cast::(), name.len() as u32, ); } } /// Disable a certain pattern within a query. /// /// This prevents the pattern from matching, and also avoids any resource /// usage associated with the pattern. #[doc(alias = "ts_query_disable_pattern")] pub fn disable_pattern(&mut self, index: usize) { unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } } /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_rooted")] #[must_use] pub fn is_pattern_rooted(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_rooted(self.ptr.as_ptr(), index as u32) } } /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_non_local")] #[must_use] pub fn is_pattern_non_local(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_non_local(self.ptr.as_ptr(), index as u32) } } /// Check if a given step in a query is 'definite'. /// /// A query step is 'definite' if its parent pattern will be guaranteed to /// match successfully once it reaches the step. #[doc(alias = "ts_query_is_pattern_guaranteed_at_step")] #[must_use] pub fn is_pattern_guaranteed_at_step(&self, byte_offset: usize) -> bool { unsafe { ffi::ts_query_is_pattern_guaranteed_at_step(self.ptr.as_ptr(), byte_offset as u32) } } fn parse_property( row: usize, function_name: &str, capture_names: &[&str], string_values: &[&str], args: &[ffi::TSQueryPredicateStep], ) -> Result { if args.is_empty() || args.len() > 3 { return Err(predicate_error( row, format!( "Wrong number of arguments to {function_name} predicate. Expected 1 to 3, got {}.", args.len(), ), )); } let mut capture_id = None; let mut key = None; let mut value = None; for arg in args { if arg.type_ == ffi::TSQueryPredicateStepTypeCapture { if capture_id.is_some() { return Err(predicate_error( row, format!( "Invalid arguments to {function_name} predicate. Unexpected second capture name @{}", capture_names[arg.value_id as usize] ), )); } capture_id = Some(arg.value_id as usize); } else if key.is_none() { key = Some(&string_values[arg.value_id as usize]); } else if value.is_none() { value = Some(string_values[arg.value_id as usize]); } else { return Err(predicate_error( row, format!( "Invalid arguments to {function_name} predicate. Unexpected third argument @{}", string_values[arg.value_id as usize] ), )); } } if let Some(key) = key { Ok(QueryProperty::new(key, value, capture_id)) } else { Err(predicate_error( row, format!("Invalid arguments to {function_name} predicate. Missing key argument",), )) } } } impl Default for QueryCursor { fn default() -> Self { Self::new() } } impl QueryCursor { /// Create a new cursor for executing a given query. /// /// The cursor stores the state that is needed to iteratively search for /// matches. #[doc(alias = "ts_query_cursor_new")] #[must_use] pub fn new() -> Self { Self { ptr: unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }, } } /// Return the maximum number of in-progress matches for this cursor. #[doc(alias = "ts_query_cursor_match_limit")] #[must_use] pub fn match_limit(&self) -> u32 { unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) } } /// Set the maximum number of in-progress matches for this cursor. The /// limit must be > 0 and <= 65536. #[doc(alias = "ts_query_cursor_set_match_limit")] pub fn set_match_limit(&mut self, limit: u32) { unsafe { ffi::ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit); } } /// Set the maximum duration in microseconds that query execution should be allowed to /// take before halting. /// /// If query execution takes longer than this, it will halt early, returning None. #[doc(alias = "ts_query_cursor_set_timeout_micros")] pub fn set_timeout_micros(&mut self, timeout: u64) { unsafe { ffi::ts_query_cursor_set_timeout_micros(self.ptr.as_ptr(), timeout); } } /// Get the duration in microseconds that query execution is allowed to take. /// /// This is set via [`set_timeout_micros`](QueryCursor::set_timeout_micros). #[doc(alias = "ts_query_cursor_timeout_micros")] #[must_use] pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_query_cursor_timeout_micros(self.ptr.as_ptr()) } } /// Check if, on its last execution, this cursor exceeded its maximum number /// of in-progress matches. #[doc(alias = "ts_query_cursor_did_exceed_match_limit")] #[must_use] pub fn did_exceed_match_limit(&self) -> bool { unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) } } /// Iterate over all of the matches in the order that they were found. /// /// Each match contains the index of the pattern that matched, and a list of /// captures. Because multiple patterns can match the same set of nodes, /// one match may contain captures that appear *before* some of the /// captures from a previous match. #[doc(alias = "ts_query_cursor_exec")] pub fn matches<'query, 'cursor: 'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( &'cursor mut self, query: &'query Query, node: Node<'tree>, text_provider: T, ) -> QueryMatches<'query, 'tree, T, I> { let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryMatches { ptr, query, text_provider, buffer1: Vec::default(), buffer2: Vec::default(), current_match: None, _phantom: PhantomData, } } /// Iterate over all of the individual captures in the order that they /// appear. /// /// This is useful if you don't care about which pattern matched, and just /// want a single, ordered sequence of captures. #[doc(alias = "ts_query_cursor_exec")] pub fn captures<'query, 'cursor: 'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( &'cursor mut self, query: &'query Query, node: Node<'tree>, text_provider: T, ) -> QueryCaptures<'query, 'tree, T, I> { let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryCaptures { ptr, query, text_provider, buffer1: Vec::default(), buffer2: Vec::default(), current_match: None, _phantom: PhantomData, } } /// Set the range in which the query will be executed, in terms of byte /// offsets. #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) -> &mut Self { unsafe { ffi::ts_query_cursor_set_byte_range( self.ptr.as_ptr(), range.start as u32, range.end as u32, ); } self } /// Set the range in which the query will be executed, in terms of rows and /// columns. #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) -> &mut Self { unsafe { ffi::ts_query_cursor_set_point_range( self.ptr.as_ptr(), range.start.into(), range.end.into(), ); } self } /// Set the maximum start depth for a query cursor. /// /// This prevents cursors from exploring children nodes at a certain depth. /// Note if a pattern includes many children, then they will still be /// checked. /// /// The zero max start depth value can be used as a special behavior and /// it helps to destructure a subtree by staying on a node and using /// captures for interested parts. Note that the zero max start depth /// only limit a search depth for a pattern's root node but other nodes /// that are parts of the pattern may be searched at any depth what /// defined by the pattern structure. /// /// Set to `None` to remove the maximum start depth. #[doc(alias = "ts_query_cursor_set_max_start_depth")] pub fn set_max_start_depth(&mut self, max_start_depth: Option) -> &mut Self { unsafe { ffi::ts_query_cursor_set_max_start_depth( self.ptr.as_ptr(), max_start_depth.unwrap_or(u32::MAX), ); } self } } impl<'tree> QueryMatch<'_, 'tree> { #[must_use] pub const fn id(&self) -> u32 { self.id } #[doc(alias = "ts_query_cursor_remove_match")] pub fn remove(&self) { unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) } } pub fn nodes_for_capture_index( &self, capture_ix: u32, ) -> impl Iterator> + '_ { self.captures .iter() .filter_map(move |capture| (capture.index == capture_ix).then_some(capture.node)) } fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { QueryMatch { cursor, id: m.id, pattern_index: m.pattern_index as usize, captures: (m.capture_count > 0) .then(|| unsafe { slice::from_raw_parts( m.captures.cast::>(), m.capture_count as usize, ) }) .unwrap_or_default(), } } pub fn satisfies_text_predicates>( &self, query: &Query, buffer1: &mut Vec, buffer2: &mut Vec, text_provider: &mut impl TextProvider, ) -> bool { struct NodeText<'a, T> { buffer: &'a mut Vec, first_chunk: Option, } impl<'a, T: AsRef<[u8]>> NodeText<'a, T> { fn new(buffer: &'a mut Vec) -> Self { Self { buffer, first_chunk: None, } } fn get_text(&mut self, chunks: &mut impl Iterator) -> &[u8] { self.first_chunk = chunks.next(); if let Some(next_chunk) = chunks.next() { self.buffer.clear(); self.buffer .extend_from_slice(self.first_chunk.as_ref().unwrap().as_ref()); self.buffer.extend_from_slice(next_chunk.as_ref()); for chunk in chunks { self.buffer.extend_from_slice(chunk.as_ref()); } self.buffer.as_slice() } else if let Some(ref first_chunk) = self.first_chunk { first_chunk.as_ref() } else { &[] } } } let mut node_text1 = NodeText::new(buffer1); let mut node_text2 = NodeText::new(buffer2); query.text_predicates[self.pattern_index] .iter() .all(|predicate| match predicate { TextPredicateCapture::EqCapture(i, j, is_positive, match_all_nodes) => { let mut nodes_1 = self.nodes_for_capture_index(*i); let mut nodes_2 = self.nodes_for_capture_index(*j); while let (Some(node1), Some(node2)) = (nodes_1.next(), nodes_2.next()) { let mut text1 = text_provider.text(node1); let mut text2 = text_provider.text(node2); let text1 = node_text1.get_text(&mut text1); let text2 = node_text2.get_text(&mut text2); let is_positive_match = text1 == text2; if is_positive_match != *is_positive && *match_all_nodes { return false; } if is_positive_match == *is_positive && !*match_all_nodes { return true; } } nodes_1.next().is_none() && nodes_2.next().is_none() } TextPredicateCapture::EqString(i, s, is_positive, match_all_nodes) => { let nodes = self.nodes_for_capture_index(*i); for node in nodes { let mut text = text_provider.text(node); let text = node_text1.get_text(&mut text); let is_positive_match = text == s.as_bytes(); if is_positive_match != *is_positive && *match_all_nodes { return false; } if is_positive_match == *is_positive && !*match_all_nodes { return true; } } true } TextPredicateCapture::MatchString(i, r, is_positive, match_all_nodes) => { let nodes = self.nodes_for_capture_index(*i); for node in nodes { let mut text = text_provider.text(node); let text = node_text1.get_text(&mut text); let is_positive_match = r.is_match(text); if is_positive_match != *is_positive && *match_all_nodes { return false; } if is_positive_match == *is_positive && !*match_all_nodes { return true; } } true } TextPredicateCapture::AnyString(i, v, is_positive) => { let nodes = self.nodes_for_capture_index(*i); for node in nodes { let mut text = text_provider.text(node); let text = node_text1.get_text(&mut text); if (v.iter().any(|s| text == s.as_bytes())) != *is_positive { return false; } } true } }) } } impl QueryProperty { #[must_use] pub fn new(key: &str, value: Option<&str>, capture_id: Option) -> Self { Self { capture_id, key: key.to_string().into(), value: value.map(|s| s.to_string().into()), } } } /// Provide StreamingIterator instead of traditional one as the underlying object in the C library /// gets updated on each iteration. Created copies would have their internal state overwritten, /// leading to Undefined Behavior impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIterator for QueryMatches<'query, 'tree, T, I> { type Item = QueryMatch<'query, 'tree>; fn advance(&mut self) { self.current_match = unsafe { loop { let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_match(self.ptr, m.as_mut_ptr()) { let result = QueryMatch::new(&m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, &mut self.buffer2, &mut self.text_provider, ) { break Some(result); } } else { break None; } } }; } fn get(&self) -> Option<&Self::Item> { self.current_match.as_ref() } } impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIteratorMut for QueryMatches<'query, 'tree, T, I> { fn get_mut(&mut self) -> Option<&mut Self::Item> { self.current_match.as_mut() } } impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIterator for QueryCaptures<'query, 'tree, T, I> { type Item = (QueryMatch<'query, 'tree>, usize); fn advance(&mut self) { self.current_match = unsafe { loop { let mut capture_index = 0u32; let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_capture( self.ptr, m.as_mut_ptr(), core::ptr::addr_of_mut!(capture_index), ) { let result = QueryMatch::new(&m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, &mut self.buffer2, &mut self.text_provider, ) { break Some((result, capture_index as usize)); } result.remove(); } else { break None; } } } } fn get(&self) -> Option<&Self::Item> { self.current_match.as_ref() } } impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIteratorMut for QueryCaptures<'query, 'tree, T, I> { fn get_mut(&mut self) -> Option<&mut Self::Item> { self.current_match.as_mut() } } impl, I: AsRef<[u8]>> QueryMatches<'_, '_, T, I> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); } } #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); } } } impl, I: AsRef<[u8]>> QueryCaptures<'_, '_, T, I> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); } } #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); } } } impl fmt::Debug for QueryMatch<'_, '_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "QueryMatch {{ id: {}, pattern_index: {}, captures: {:?} }}", self.id, self.pattern_index, self.captures ) } } impl TextProvider for F where F: FnMut(Node) -> R, R: Iterator, I: AsRef<[u8]>, { type I = R; fn text(&mut self, node: Node) -> Self::I { (self)(node) } } impl<'a> TextProvider<&'a [u8]> for &'a [u8] { type I = iter::Once<&'a [u8]>; fn text(&mut self, node: Node) -> Self::I { iter::once(&self[node.byte_range()]) } } impl PartialEq for Query { fn eq(&self, other: &Self) -> bool { self.ptr == other.ptr } } impl Drop for Query { fn drop(&mut self) { unsafe { ffi::ts_query_delete(self.ptr.as_ptr()) } } } impl Drop for QueryCursor { fn drop(&mut self) { unsafe { ffi::ts_query_cursor_delete(self.ptr.as_ptr()) } } } impl Point { #[must_use] pub const fn new(row: usize, column: usize) -> Self { Self { row, column } } } impl fmt::Display for Point { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "({}, {})", self.row, self.column) } } impl From for ffi::TSPoint { fn from(val: Point) -> Self { Self { row: val.row as u32, column: val.column as u32, } } } impl From for Point { fn from(point: ffi::TSPoint) -> Self { Self { row: point.row as usize, column: point.column as usize, } } } impl From for ffi::TSRange { fn from(val: Range) -> Self { Self { start_byte: val.start_byte as u32, end_byte: val.end_byte as u32, start_point: val.start_point.into(), end_point: val.end_point.into(), } } } impl From for Range { fn from(range: ffi::TSRange) -> Self { Self { start_byte: range.start_byte as usize, end_byte: range.end_byte as usize, start_point: range.start_point.into(), end_point: range.end_point.into(), } } } impl From<&'_ InputEdit> for ffi::TSInputEdit { fn from(val: &'_ InputEdit) -> Self { Self { start_byte: val.start_byte as u32, old_end_byte: val.old_end_byte as u32, new_end_byte: val.new_end_byte as u32, start_point: val.start_position.into(), old_end_point: val.old_end_position.into(), new_end_point: val.new_end_position.into(), } } } impl<'a> LossyUtf8<'a> { #[must_use] pub const fn new(bytes: &'a [u8]) -> Self { LossyUtf8 { bytes, in_replacement: false, } } } impl<'a> Iterator for LossyUtf8<'a> { type Item = &'a str; fn next(&mut self) -> Option<&'a str> { if self.bytes.is_empty() { return None; } if self.in_replacement { self.in_replacement = false; return Some("\u{fffd}"); } match core::str::from_utf8(self.bytes) { Ok(valid) => { self.bytes = &[]; Some(valid) } Err(error) => { if let Some(error_len) = error.error_len() { let error_start = error.valid_up_to(); if error_start > 0 { let result = unsafe { core::str::from_utf8_unchecked(&self.bytes[..error_start]) }; self.bytes = &self.bytes[(error_start + error_len)..]; self.in_replacement = true; Some(result) } else { self.bytes = &self.bytes[error_len..]; Some("\u{fffd}") } } else { None } } } } } #[must_use] const fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, row, column: 0, offset: 0, message, } } impl fmt::Display for IncludedRangesError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Incorrect range by index: {}", self.0) } } impl fmt::Display for LanguageError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "Incompatible language version {}. Expected minimum {}, maximum {}", self.version, MIN_COMPATIBLE_LANGUAGE_VERSION, LANGUAGE_VERSION, ) } } impl fmt::Display for QueryError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let msg = match self.kind { QueryErrorKind::Field => "Invalid field name ", QueryErrorKind::NodeType => "Invalid node type ", QueryErrorKind::Capture => "Invalid capture name ", QueryErrorKind::Predicate => "Invalid predicate: ", QueryErrorKind::Structure => "Impossible pattern:\n", QueryErrorKind::Syntax => "Invalid syntax:\n", QueryErrorKind::Language => "", }; if msg.is_empty() { write!(f, "{}", self.message) } else { write!( f, "Query error at {}:{}. {}{}", self.row + 1, self.column + 1, msg, self.message ) } } } #[doc(hidden)] #[must_use] pub fn format_sexp(sexp: &str, initial_indent_level: usize) -> String { let mut indent_level = initial_indent_level; let mut formatted = String::new(); let mut has_field = false; let mut c_iter = sexp.chars().peekable(); let mut s = String::with_capacity(sexp.len()); let mut quote = '\0'; let mut saw_paren = false; let mut did_last = false; let mut fetch_next_str = |next: &mut String| { next.clear(); while let Some(c) = c_iter.next() { if c == '\'' || c == '"' { quote = c; } else if c == ' ' || (c == ')' && quote != '\0') { if let Some(next_c) = c_iter.peek() { if *next_c == quote { next.push(c); next.push(*next_c); c_iter.next(); quote = '\0'; continue; } } break; } if c == ')' { saw_paren = true; break; } next.push(c); } // at the end if c_iter.peek().is_none() && next.is_empty() { if saw_paren { // but did we see a ) before ending? saw_paren = false; return Some(()); } if !did_last { // but did we account for the end empty string as if we're splitting? did_last = true; return Some(()); } return None; } Some(()) }; while fetch_next_str(&mut s).is_some() { if s.is_empty() && indent_level > 0 { // ")" indent_level -= 1; write!(formatted, ")").unwrap(); } else if s.starts_with('(') { if has_field { has_field = false; } else { if indent_level > 0 { writeln!(formatted).unwrap(); for _ in 0..indent_level { write!(formatted, " ").unwrap(); } } indent_level += 1; } // "(node_name" write!(formatted, "{s}").unwrap(); // "(MISSING node_name" or "(UNEXPECTED 'x'" if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") { fetch_next_str(&mut s).unwrap(); if s.is_empty() { while indent_level > 0 { indent_level -= 1; write!(formatted, ")").unwrap(); } } else { write!(formatted, " {s}").unwrap(); } } } else if s.ends_with(':') { // "field:" writeln!(formatted).unwrap(); for _ in 0..indent_level { write!(formatted, " ").unwrap(); } write!(formatted, "{s} ").unwrap(); has_field = true; indent_level += 1; } } formatted } pub fn wasm_stdlib_symbols() -> impl Iterator { const WASM_STDLIB_SYMBOLS: &str = include_str!(concat!(env!("OUT_DIR"), "/stdlib-symbols.txt")); WASM_STDLIB_SYMBOLS .lines() .map(|s| s.trim_matches(|c| c == '"' || c == ',')) } /// Sets the memory allocation functions that the core library should use. /// /// # Safety /// /// This function uses FFI and mutates a static global. #[doc(alias = "ts_set_allocator")] pub unsafe fn set_allocator( new_malloc: Option *mut c_void>, new_calloc: Option *mut c_void>, new_realloc: Option *mut c_void>, new_free: Option, ) { ffi::ts_set_allocator(new_malloc, new_calloc, new_realloc, new_free); } #[cfg(feature = "std")] impl error::Error for IncludedRangesError {} #[cfg(feature = "std")] impl error::Error for LanguageError {} #[cfg(feature = "std")] impl error::Error for QueryError {} unsafe impl Send for Language {} unsafe impl Sync for Language {} unsafe impl Send for Node<'_> {} unsafe impl Sync for Node<'_> {} unsafe impl Send for LookaheadIterator {} unsafe impl Sync for LookaheadIterator {} unsafe impl Send for LookaheadNamesIterator<'_> {} unsafe impl Sync for LookaheadNamesIterator<'_> {} unsafe impl Send for Parser {} unsafe impl Sync for Parser {} unsafe impl Send for Query {} unsafe impl Sync for Query {} unsafe impl Send for QueryCursor {} unsafe impl Sync for QueryCursor {} unsafe impl Send for Tree {} unsafe impl Sync for Tree {} unsafe impl Send for TreeCursor<'_> {} unsafe impl Sync for TreeCursor<'_> {}