summaryrefslogtreecommitdiff
path: root/src/treesit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/treesit.c')
-rw-r--r--src/treesit.c2327
1 files changed, 2327 insertions, 0 deletions
diff --git a/src/treesit.c b/src/treesit.c
new file mode 100644
index 00000000000..77b48133ba8
--- /dev/null
+++ b/src/treesit.c
@@ -0,0 +1,2327 @@
+/* Tree-sitter integration for GNU Emacs.
+
+Copyright (C) 2021-2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "treesit.h"
+
+/* Commentary
+
+ The Emacs wrapper of tree-sitter does not expose everything the C
+ API provides, most notably:
+
+ - It doesn't expose a syntax tree, we put the syntax tree in the
+ parser object, and updating the tree is handled in the C level.
+
+ - We don't expose tree cursor either. I think Lisp is slow enough
+ to nullify any performance advantage of using a cursor, though I
+ don't have evidence. Also I want to minimize the number of new
+ types we introduce, currently we only add parser and node type.
+
+ - Because updating the change is handled in the C level as each
+ change is made in the buffer, there is no way for Lisp to update
+ a node. But since we can just retrieve a new node, it shouldn't
+ be a limitation.
+
+ - I didn't expose setting timeout and cancellation flag for a
+ parser, mainly because I don't think they are really necessary
+ in Emacs' use cases.
+
+ - Many tree-sitter functions asks for a TSPoint, basically a (row,
+ column) location. Emacs uses a gap buffer and keeps no
+ information about row and column position. According to the
+ author of tree-sitter, tree-sitter only asks for (row, column)
+ position to carry it around and return back to the user later;
+ and the real position used is the byte position. He also said
+ that he _think_ that it will work to use byte position only.
+ That's why whenever a TSPoint is asked, we pass a dummy one to
+ it. Judging by the nature of parsing algorithms, I think it is
+ safe to use only byte position, and I don't think this will
+ change in the future.
+
+ REF: https://github.com/tree-sitter/tree-sitter/issues/445
+
+ treesit.h has some commentary on the two main data structure
+ for the parser and node. ts_ensure_position_synced has some
+ commentary on how do we make tree-sitter play well with narrowing
+ (tree-sitter parser only sees the visible region, so we need to
+ translate positions back and forth). Most action happens in
+ ts_ensure_parsed, ts_read_buffer and ts_record_change.
+
+ A complete correspondence list between tree-sitter functions and
+ exposed Lisp functions can be found in the manual (elisp)API
+ Correspondence.
+
+ Placement of CHECK_xxx functions: call CHECK_xxx before using any
+ unchecked Lisp values; these include argument of Lisp functions,
+ return value of Fsymbol_value, car of a cons.
+
+ Initializing tree-sitter: there are two entry points to tree-sitter
+ functions: 'treesit-parser-create' and
+ 'treesit-language-available-p'. Therefore we only need to call
+ initialization function in those two functions.
+
+ Tree-sitter offset (0-based) and buffer position (1-based):
+ tree-sitter offset + buffer position = buffer position
+ buffer position - buffer position = tree-sitter offset
+
+ Tree-sitter-related code in other files:
+ - src/alloc.c for gc for parser and node
+ - src/casefiddle.c & src/insdel.c for notifying tree-sitter
+ parser of buffer changes.
+ - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
+ node type.
+
+ We don't parse at every keystroke. Instead we only record the
+ changes at each keystroke, and only parse when requested. It is
+ possible that lazy parsing is worse: instead of dispersed little
+ pauses, now you have less frequent but larger pauses. I doubt
+ there will be any perceived difference, as the lazy parsing is
+ going to be pretty frequent anyway. Also this (lazy parsing) is
+ what the mailing list guys wanted.
+
+ Because it is pretty slow (comparing to other tree-sitter
+ operations) for tree-sitter to parse the query and produce a query
+ object, it is very wasteful to reparse the query every time
+ treesit-query-capture is called, and it completely kills the
+ performance of querying in a loop for a moderate amount of times
+ (hundreds of queries takes seconds rather than milliseconds to
+ complete). Therefore we want some caching. We can either use a
+ search.c style transparent caching, or simply expose a new type,
+ compiled-ts-query and let the user to manually compile AOT. I
+ believe AOT compiling gives users more control, makes the
+ performance stable and easy to understand (compiled -> fast,
+ uncompiled -> slow), and avoids some edge cases transparent cache
+ could have (see below). So I implemented the AOT compilation.
+
+ Problems a transparent cache could have: Suppose we store cache
+ entries in a fixed-length linked-list, and compare with EQ. 1)
+ One-off query could kick out useful cache. 2) if the user messed
+ up and the query doesn't EQ to the cache anymore, the performance
+ mysteriously drops. 3) what if a user uses so many stuff that the
+ default cache size (20) is not enough and we end up thrashing?
+ These are all imagined scenarios but they are not impossible :-)
+ */
+
+/*** Initialization */
+
+bool ts_initialized = false;
+
+static void *
+ts_calloc_wrapper (size_t n, size_t size)
+{
+ return xzalloc (n * size);
+}
+
+static void
+ts_initialize (void)
+{
+ if (!ts_initialized)
+ {
+ ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree);
+ ts_initialized = true;
+ }
+}
+
+/*** Loading language library */
+
+/* Translates a symbol treesit-<lang> to a C name
+ treesit_<lang>. */
+static void
+ts_symbol_to_c_name (char *symbol_name)
+{
+ for (int idx=0; idx < strlen (symbol_name); idx++)
+ {
+ if (symbol_name[idx] == '-')
+ symbol_name[idx] = '_';
+ }
+}
+
+static bool
+ts_find_override_name
+(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol)
+{
+ for (Lisp_Object list = Vtreesit_load_name_override_list;
+ !NILP (list); list = XCDR (list))
+ {
+ Lisp_Object lang = XCAR (XCAR (list));
+ CHECK_SYMBOL (lang);
+ if (EQ (lang, language_symbol))
+ {
+ *name = Fnth (make_fixnum (1), XCAR (list));
+ CHECK_STRING (*name);
+ *c_symbol = Fnth (make_fixnum (2), XCAR (list));
+ CHECK_STRING (*c_symbol);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
+ thsi function pushes "lib_base_name.so" and "lib_base_name.dylib"
+ into *path_candidates. Obiviously path_candidates should be a Lisp
+ list of Lisp strings. */
+static void
+ts_load_language_push_for_each_suffix
+(Lisp_Object lib_base_name, Lisp_Object *path_candidates)
+{
+ for (Lisp_Object suffixes = Vdynamic_library_suffixes;
+ !NILP (suffixes); suffixes = XCDR (suffixes)) {
+ *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)),
+ *path_candidates);
+ }
+}
+
+/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
+ to the language definition. Signals
+ Qtreesit_load_language_error if something goes wrong.
+ Qtreesit_load_language_error carries the error message from
+ trying to load the library with each extension.
+
+ If SIGNAL is true, signal an error when failed to load LANGUAGE; if
+ false, return NULL when failed. */
+static TSLanguage *
+ts_load_language (Lisp_Object language_symbol, bool signal)
+{
+ Lisp_Object symbol_name = Fsymbol_name (language_symbol);
+
+ /* Figure out the library name and C name. */
+ Lisp_Object lib_base_name =
+ (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name));
+ Lisp_Object base_name =
+ (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name));
+ char *c_name = strdup (SSDATA (base_name));
+ ts_symbol_to_c_name (c_name);
+
+ /* Override the library name and C name, if appropriate. */
+ Lisp_Object override_name;
+ Lisp_Object override_c_name;
+ bool found_override = ts_find_override_name
+ (language_symbol, &override_name, &override_c_name);
+ if (found_override)
+ {
+ lib_base_name = override_name;
+ c_name = SSDATA (override_c_name);
+ }
+
+ /* Now we generate a list of possible library paths. */
+ Lisp_Object path_candidates = Qnil;
+ /* First push just the filenames to the candidate list, which will
+ make dynlib_open look under standard system load paths. */
+ ts_load_language_push_for_each_suffix
+ (lib_base_name, &path_candidates);
+ /* Then push ~/.emacs.d/tree-sitter paths. */
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name
+ (concat2 (build_string ("tree-sitter/"), lib_base_name),
+ Fsymbol_value (Quser_emacs_directory)),
+ &path_candidates);
+ /* Then push paths from treesit-extra-load-path. */
+ for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path);
+ !NILP (tail); tail = XCDR (tail))
+ {
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name (lib_base_name, XCAR (tail)),
+ &path_candidates);
+ }
+
+ /* Try loading the dynamic library by each path candidate. Stop
+ when succeed, record the error message and try the next one when
+ fail. */
+ dynlib_handle_ptr handle;
+ char const *error;
+ Lisp_Object error_list = Qnil;
+ for (Lisp_Object tail = path_candidates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ char *library_name = SSDATA (XCAR (tail));
+ dynlib_error ();
+ handle = dynlib_open (library_name);
+ error = dynlib_error ();
+ if (error == NULL)
+ break;
+ else
+ error_list = Fcons (build_string (error), error_list);
+ }
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ symbol_name, Fnreverse (error_list));
+ else
+ return NULL;
+ }
+
+ /* Load TSLanguage. */
+ dynlib_error ();
+ TSLanguage *(*langfn) (void);
+ langfn = dynlib_sym (handle, c_name);
+ error = dynlib_error ();
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal1 (Qtreesit_load_language_error,
+ build_string (error));
+ else
+ return NULL;
+ }
+ TSLanguage *lang = (*langfn) ();
+
+ /* Check if language version matches tree-sitter version. */
+ TSParser *parser = ts_parser_new ();
+ bool success = ts_parser_set_language (parser, lang);
+ ts_parser_delete (parser);
+ if (!success)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"),
+ make_fixnum (ts_language_version (lang)));
+ else
+ return NULL;
+ }
+ return lang;
+}
+
+DEFUN ("treesit-language-available-p",
+ Ftreesit_langauge_available_p,
+ Streesit_language_available_p,
+ 1, 1, 0,
+ doc: /* Return non-nil if LANGUAGE exists and is loadable. */)
+ (Lisp_Object language)
+{
+ CHECK_SYMBOL (language);
+ ts_initialize ();
+ if (ts_load_language(language, false) == NULL)
+ return Qnil;
+ else
+ return Qt;
+}
+
+DEFUN ("treesit-language-version",
+ Ftreesit_language_version,
+ Streesit_language_version,
+ 0, 1, 0,
+ doc: /* Return the language version of tree-sitter library.
+If MIN-COMPATIBLE non-nil, return the minimal compatible version. */)
+ (Lisp_Object min_compatible)
+{
+ if (NILP (min_compatible))
+ return make_fixnum (TREE_SITTER_LANGUAGE_VERSION);
+ else
+ return make_fixnum (TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION);
+}
+
+/*** Parsing functions */
+
+static void
+ts_check_parser (Lisp_Object obj)
+{
+ CHECK_TS_PARSER (obj);
+ if (XTS_PARSER (obj)->deleted)
+ xsignal1 (Qtreesit_parser_deleted, obj);
+}
+
+/* An auxiliary function that saves a few lines of code. Assumes TREE
+ is not NULL. */
+static inline void
+ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
+ ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
+{
+ eassert (start_byte >= 0);
+ eassert (start_byte <= old_end_byte);
+ eassert (start_byte <= new_end_byte);
+ TSPoint dummy_point = {0, 0};
+ TSInputEdit edit = {(uint32_t) start_byte,
+ (uint32_t) old_end_byte,
+ (uint32_t) new_end_byte,
+ dummy_point, dummy_point, dummy_point};
+ ts_tree_edit (tree, &edit);
+}
+
+/* Update each parser's tree after the user made an edit. This
+function does not parse the buffer and only updates the tree. (So it
+should be very fast.) */
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte)
+{
+ for (Lisp_Object parser_list
+ = BVAR (current_buffer, ts_parser_list);
+ !NILP (parser_list);
+ parser_list = XCDR (parser_list))
+ {
+ CHECK_CONS (parser_list);
+ Lisp_Object lisp_parser = XCAR (parser_list);
+ ts_check_parser (lisp_parser);
+ TSTree *tree = XTS_PARSER (lisp_parser)->tree;
+ if (tree != NULL)
+ {
+ eassert (start_byte <= old_end_byte);
+ eassert (start_byte <= new_end_byte);
+ /* Think the recorded change as a delete followed by an
+ insert, and think of them as moving unchanged text back
+ and forth. After all, the whole point of updating the
+ tree is to update the position of unchanged text. */
+ ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
+ eassert (visible_beg >= 0);
+ eassert (visible_beg <= visible_end);
+
+ /* AFFECTED_START/OLD_END/NEW_END are (0-based) offsets from
+ VISIBLE_BEG. min(visi_end, max(visi_beg, value)) clips
+ value into [visi_beg, visi_end], and subtracting visi_beg
+ gives the offset from visi_beg. */
+ ptrdiff_t start_offset =
+ min (visible_end,
+ max (visible_beg, start_byte)) - visible_beg;
+ ptrdiff_t old_end_offset =
+ min (visible_end,
+ max (visible_beg, old_end_byte)) - visible_beg;
+ ptrdiff_t new_end_offset =
+ min (visible_end,
+ max (visible_beg, new_end_byte)) - visible_beg;
+ eassert (start_offset <= old_end_offset);
+ eassert (start_offset <= new_end_offset);
+
+ ts_tree_edit_1 (tree, start_offset, old_end_offset,
+ new_end_offset);
+ XTS_PARSER (lisp_parser)->need_reparse = true;
+ XTS_PARSER (lisp_parser)->timestamp++;
+
+ /* VISIBLE_BEG/END records tree-sitter's range of view in
+ the buffer. Ee need to adjust them when tree-sitter's
+ view changes. */
+ ptrdiff_t visi_beg_delta;
+ if (old_end_byte > new_end_byte)
+ {
+ /* Move backward. */
+ visi_beg_delta = min (visible_beg, new_end_byte)
+ - min (visible_beg, old_end_byte);
+ }
+ else
+ {
+ /* Move forward. */
+ visi_beg_delta = old_end_byte < visible_beg
+ ? new_end_byte - old_end_byte : 0;
+ }
+ XTS_PARSER (lisp_parser)->visible_beg
+ = visible_beg + visi_beg_delta;
+ XTS_PARSER (lisp_parser)->visible_end
+ = visible_end + visi_beg_delta
+ + (new_end_offset - old_end_offset);
+ eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0);
+ eassert (XTS_PARSER (lisp_parser)->visible_beg
+ <= XTS_PARSER (lisp_parser)->visible_end);
+ }
+ }
+}
+
+static void
+ts_ensure_position_synced (Lisp_Object parser)
+{
+ TSTree *tree = XTS_PARSER (parser)->tree;
+
+ if (tree == NULL)
+ return;
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
+ eassert (0 <= visible_beg);
+ eassert (visible_beg <= visible_end);
+
+ /* Before we parse or set ranges, catch up with the narrowing
+ situation. We change visible_beg and visible_end to match
+ BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
+ change. We want to move the visible range of tree-sitter to
+ match the narrowed range. For example,
+ from ________|xxxx|__
+ to |xxxx|__________ */
+
+ /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
+ if (visible_beg > BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the beginning. */
+ ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ /* 2. Make sure visible_end = BUF_ZV_BYTE. */
+ if (visible_end < BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the end. */
+ ts_tree_edit_1 (tree, visible_end - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ else if (visible_end > BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the end. */
+ ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
+ if (visible_beg < BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the beginning. */
+ ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ eassert (0 <= visible_beg);
+ eassert (visible_beg <= visible_end);
+
+ XTS_PARSER (parser)->visible_beg = visible_beg;
+ XTS_PARSER (parser)->visible_end = visible_end;
+}
+
+static void
+ts_check_buffer_size (struct buffer *buffer)
+{
+ ptrdiff_t buffer_size =
+ (BUF_Z (buffer) - BUF_BEG (buffer));
+ if (buffer_size > UINT32_MAX)
+ xsignal2 (Qtreesit_buffer_too_large,
+ build_pure_c_string ("Buffer size larger than 4GB, size:"),
+ make_fixnum (buffer_size));
+}
+
+/* Parse the buffer. We don't parse until we have to. When we have
+to, we call this function to parse and update the tree. */
+static void
+ts_ensure_parsed (Lisp_Object parser)
+{
+ if (!XTS_PARSER (parser)->need_reparse)
+ return;
+ TSParser *ts_parser = XTS_PARSER (parser)->parser;
+ TSTree *tree = XTS_PARSER(parser)->tree;
+ TSInput input = XTS_PARSER (parser)->input;
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ts_check_buffer_size (buffer);
+
+ /* Before we parse, catch up with the narrowing situation. */
+ ts_ensure_position_synced (parser);
+
+ TSTree *new_tree = ts_parser_parse(ts_parser, tree, input);
+ /* This should be very rare (impossible, really): it only happens
+ when 1) language is not set (impossible in Emacs because the user
+ has to supply a language to create a parser), 2) parse canceled
+ due to timeout (impossible because we don't set a timeout), 3)
+ parse canceled due to cancellation flag (impossible because we
+ don't set the flag). (See comments for ts_parser_parse in
+ tree_sitter/api.h.) */
+ if (new_tree == NULL)
+ {
+ Lisp_Object buf;
+ XSETBUFFER (buf, buffer);
+ xsignal1 (Qtreesit_parse_error, buf);
+ }
+
+ if (tree != NULL)
+ ts_tree_delete (tree);
+ XTS_PARSER (parser)->tree = new_tree;
+ XTS_PARSER (parser)->need_reparse = false;
+}
+
+/* This is the read function provided to tree-sitter to read from a
+ buffer. It reads one character at a time and automatically skips
+ the gap. */
+static const char*
+ts_read_buffer (void *parser, uint32_t byte_index,
+ TSPoint position, uint32_t *bytes_read)
+{
+ struct buffer *buffer =
+ XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
+ ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
+ ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
+ ptrdiff_t byte_pos = byte_index + visible_beg;
+ /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
+ ts_ensure_parsed), so byte_pos will never be smaller than
+ BUF_BEG_BYTE. */
+ eassert (visible_beg = BUF_BEGV_BYTE (buffer));
+ eassert (visible_end = BUF_ZV_BYTE (buffer));
+
+ /* Read one character. Tree-sitter wants us to set bytes_read to 0
+ if it reads to the end of buffer. It doesn't say what it wants
+ for the return value in that case, so we just give it an empty
+ string. */
+ char *beg;
+ int len;
+ /* This function could run from a user command, so it is better to
+ do nothing instead of raising an error. (It was a pain in the a**
+ to decrypt mega-if-conditions in Emacs source, so I wrote the two
+ branches separately, you are welcome.) */
+ if (!BUFFER_LIVE_P (buffer))
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Reached visible end-of-buffer, tell tree-sitter to read no more. */
+ else if (byte_pos >= visible_end)
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Normal case, read a character. */
+ else
+ {
+ beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
+ len = BYTES_BY_CHAR_HEAD ((int) *beg);
+ }
+ *bytes_read = (uint32_t) len;
+ return beg;
+}
+
+/*** Functions for parser and node object*/
+
+/* Wrap the parser in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol)
+{
+ struct Lisp_TS_Parser *lisp_parser
+ = ALLOCATE_PSEUDOVECTOR
+ (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER);
+
+ lisp_parser->language_symbol = language_symbol;
+ lisp_parser->buffer = buffer;
+ lisp_parser->parser = parser;
+ lisp_parser->tree = tree;
+ TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8};
+ lisp_parser->input = input;
+ lisp_parser->need_reparse = true;
+ lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer));
+ lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer));
+ lisp_parser->timestamp = 0;
+ lisp_parser->deleted = false;
+ eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
+ return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
+}
+
+/* Wrap the node in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node)
+{
+ struct Lisp_TS_Node *lisp_node
+ = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE);
+ lisp_node->parser = parser;
+ lisp_node->node = node;
+ lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
+ return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
+}
+
+/* Make a compiled query struct. Return NULL if error occurs. QUERY
+ has to be either a cons or a string. */
+static struct Lisp_TS_Query *
+make_ts_query (Lisp_Object query, const TSLanguage *language,
+ uint32_t *error_offset, TSQueryError *error_type)
+{
+ if (CONSP (query))
+ query = Ftreesit_query_expand (query);
+ char *source = SSDATA (query);
+
+ TSQuery *ts_query = ts_query_new (language, source, strlen (source),
+ error_offset, error_type);
+ TSQueryCursor *ts_cursor = ts_query_cursor_new ();
+
+ if (ts_query == NULL)
+ return NULL;
+
+ struct Lisp_TS_Query *lisp_query
+ = ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_TS_Query,
+ PVEC_TS_COMPILED_QUERY);
+ lisp_query->query = ts_query;
+ lisp_query->cursor = ts_cursor;
+ return lisp_query;
+}
+
+DEFUN ("treesit-parser-p",
+ Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter parser. */)
+ (Lisp_Object object)
+{
+ if (TS_PARSERP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-p",
+ Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter node. */)
+ (Lisp_Object object)
+{
+ if (TS_NODEP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-compiled-query-p",
+ Ftreesit_compiled_query_p, Streesit_compiled_query_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a compiled tree-sitter query. */)
+ (Lisp_Object object)
+{
+ if (TS_COMPILED_QUERY_P (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-query-p",
+ Ftreesit_query_p, Streesit_query_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a generic tree-sitter query. */)
+ (Lisp_Object object)
+{
+ if (TS_COMPILED_QUERY_P (object)
+ || CONSP (object) || STRINGP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-parser",
+ Ftreesit_node_parser, Streesit_node_parser,
+ 1, 1, 0,
+ doc: /* Return the parser to which NODE belongs. */)
+ (Lisp_Object node)
+{
+ CHECK_TS_NODE (node);
+ return XTS_NODE (node)->parser;
+}
+
+DEFUN ("treesit-parser-create",
+ Ftreesit_parser_create, Streesit_parser_create,
+ 1, 3, 0,
+ doc: /* Create and return a parser in BUFFER for LANGUAGE.
+
+The parser is automatically added to BUFFER's `treesit-parser-list'.
+LANGUAGE is a language symbol. If BUFFER is nil, use the current
+buffer. If BUFFER already has a parser for LANGUAGE, return that
+parser. If NO-REUSE is non-nil, always create a new parser. */)
+ (Lisp_Object language, Lisp_Object buffer, Lisp_Object no_reuse)
+{
+ ts_initialize ();
+
+ CHECK_SYMBOL (language);
+ struct buffer *buf;
+ if (NILP (buffer))
+ buf = current_buffer;
+ else
+ {
+ CHECK_BUFFER (buffer);
+ buf = XBUFFER (buffer);
+ }
+ ts_check_buffer_size (buf);
+
+ /* See if we can reuse a parser. */
+ for (Lisp_Object tail = BVAR (buf, ts_parser_list);
+ NILP (no_reuse) && !NILP (tail);
+ tail = XCDR (tail))
+ {
+ struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail));
+ if (EQ (parser->language_symbol, language))
+ {
+ return XCAR (tail);
+ }
+ }
+
+ TSParser *parser = ts_parser_new ();
+ TSLanguage *lang = ts_load_language (language, true);
+ /* We check language version when loading a language, so this should
+ always succeed. */
+ ts_parser_set_language (parser, lang);
+
+ Lisp_Object lisp_parser
+ = make_ts_parser (Fcurrent_buffer (), parser, NULL, language);
+
+ BVAR (buf, ts_parser_list)
+ = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
+
+ return lisp_parser;
+}
+
+DEFUN ("treesit-parser-delete",
+ Ftreesit_parser_delete, Streesit_parser_delete,
+ 1, 1, 0,
+ doc: /* Delete PARSER from its buffer. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+
+ Lisp_Object buffer = XTS_PARSER (parser)->buffer;
+ struct buffer *buf = XBUFFER (buffer);
+ BVAR (buf, ts_parser_list)
+ = Fdelete (parser, BVAR (buf, ts_parser_list));
+
+ XTS_PARSER (parser)->deleted = true;
+ return Qnil;
+}
+
+DEFUN ("treesit-parser-list",
+ Ftreesit_parser_list, Streesit_parser_list,
+ 0, 1, 0,
+ doc: /* Return BUFFER's parser list.
+BUFFER defaults to the current buffer. */)
+ (Lisp_Object buffer)
+{
+ struct buffer *buf;
+ if (NILP (buffer))
+ buf = current_buffer;
+ else
+ {
+ CHECK_BUFFER (buffer);
+ buf = XBUFFER (buffer);
+ }
+ /* Return a fresh list so messing with that list doesn't affect our
+ internal data. */
+ Lisp_Object return_list = Qnil;
+ for (Lisp_Object tail = BVAR (buf, ts_parser_list);
+ !NILP (tail);
+ tail = XCDR (tail))
+ {
+ return_list = Fcons (XCAR (tail), return_list);
+ }
+ return Freverse (return_list);
+}
+
+DEFUN ("treesit-parser-buffer",
+ Ftreesit_parser_buffer, Streesit_parser_buffer,
+ 1, 1, 0,
+ doc: /* Return the buffer of PARSER. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ Lisp_Object buf;
+ XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
+ return buf;
+}
+
+DEFUN ("treesit-parser-language",
+ Ftreesit_parser_language, Streesit_parser_language,
+ 1, 1, 0,
+ doc: /* Return parser's language symbol.
+This symbol is the one used to create the parser. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ return XTS_PARSER (parser)->language_symbol;
+}
+
+/*** Parser API */
+
+DEFUN ("treesit-parser-root-node",
+ Ftreesit_parser_root_node, Streesit_parser_root_node,
+ 1, 1, 0,
+ doc: /* Return the root node of PARSER. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ ts_ensure_parsed (parser);
+ TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
+ return make_ts_node (parser, root_node);
+}
+
+/* Checks that the RANGES argument of
+ treesit-parser-set-included-ranges is valid. */
+static void
+ts_check_range_argument (Lisp_Object ranges)
+{
+ struct buffer *buffer = current_buffer;
+ ptrdiff_t point_min = BUF_BEGV (buffer);
+ ptrdiff_t point_max = BUF_ZV (buffer);
+ EMACS_INT last_point = point_min;
+
+ for (Lisp_Object tail = ranges;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ CHECK_CONS (tail);
+ Lisp_Object range = XCAR (tail);
+ CHECK_CONS (range);
+ CHECK_FIXNUM (XCAR (range));
+ CHECK_FIXNUM (XCDR (range));
+ EMACS_INT beg = XFIXNUM (XCAR (range));
+ EMACS_INT end = XFIXNUM (XCDR (range));
+ if (!(last_point <= beg && beg <= end && end <= point_max))
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("RANGE is either overlapping or out-of-order or out-of-range"),
+ ranges);
+ last_point = end;
+ }
+}
+
+DEFUN ("treesit-parser-set-included-ranges",
+ Ftreesit_parser_set_included_ranges,
+ Streesit_parser_set_included_ranges,
+ 2, 2, 0,
+ doc: /* Limit PARSER to RANGES.
+
+RANGES is a list of (BEG . END), each (BEG . END) confines a range in
+which the parser should operate in. Each range must not overlap, and
+each range should come in order. Signal `treesit-set-range-error'
+if the argument is invalid, or something else went wrong. If RANGES
+is nil, set PARSER to parse the whole buffer. */)
+ (Lisp_Object parser, Lisp_Object ranges)
+{
+ ts_check_parser (parser);
+ CHECK_CONS (ranges);
+ ts_check_range_argument (ranges);
+
+ /* Before we parse, catch up with narrowing/widening. */
+ ts_ensure_position_synced (parser);
+
+ bool success;
+ if (NILP (ranges))
+ {
+ /* If RANGES is nil, make parser to parse the whole document.
+ To do that we give tree-sitter a 0 length, the range is a
+ dummy. */
+ TSRange ts_range = {{0, 0}, {0, 0}, 0, 0};
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, &ts_range , 0);
+ }
+ else
+ {
+ /* Set ranges for PARSER. */
+ ptrdiff_t len = list_length (ranges);
+ TSRange *ts_ranges = malloc (sizeof(TSRange) * len);
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges))
+ {
+ Lisp_Object range = XCAR (ranges);
+ EMACS_INT beg_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCAR (range)));
+ EMACS_INT end_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCDR (range)));
+ /* We don't care about start and end points, put in dummy
+ value. */
+ TSRange rg = {{0,0}, {0,0},
+ (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
+ (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
+ ts_ranges[idx] = rg;
+ }
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len);
+ /* Although XFIXNUM could signal, it should be impossible
+ because we have checked the input by ts_check_range_argument.
+ So there is no need for unwind-protect. */
+ free (ts_ranges);
+ }
+
+ if (!success)
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("Something went wrong when setting ranges"),
+ ranges);
+
+ XTS_PARSER (parser)->need_reparse = true;
+ return Qnil;
+}
+
+DEFUN ("treesit-parser-included-ranges",
+ Ftreesit_parser_included_ranges,
+ Streesit_parser_included_ranges,
+ 1, 1, 0,
+ doc: /* Return the ranges set for PARSER.
+See `treesit-parser-set-ranges'. If no range is set, return
+nil. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ uint32_t len;
+ const TSRange *ranges = ts_parser_included_ranges
+ (XTS_PARSER (parser)->parser, &len);
+ if (len == 0)
+ return Qnil;
+
+ /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
+ etc), so we need to sync up. */
+ ts_ensure_position_synced (parser);
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ Lisp_Object list = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSRange range = ranges[idx];
+ uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
+ uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+ eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
+ eassert (beg_byte <= end_byte);
+ eassert (end_byte <= BUF_ZV_BYTE (buffer));
+
+ Lisp_Object lisp_range =
+ Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) ,
+ make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
+ list = Fcons (lisp_range, list);
+ }
+ return Fnreverse (list);
+}
+
+/*** Node API */
+
+/* Check that OBJ is a positive integer and signal an error if
+ otherwise. */
+static void
+ts_check_positive_integer (Lisp_Object obj)
+{
+ CHECK_INTEGER (obj);
+ if (XFIXNUM (obj) < 0)
+ xsignal1 (Qargs_out_of_range, obj);
+}
+
+static void
+ts_check_node (Lisp_Object obj)
+{
+ CHECK_TS_NODE (obj);
+ Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
+ if (XTS_NODE (obj)->timestamp !=
+ XTS_PARSER (lisp_parser)->timestamp)
+ xsignal1 (Qtreesit_node_outdated, obj);
+}
+
+DEFUN ("treesit-node-type",
+ Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
+ doc: /* Return the NODE's type as a string.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *type = ts_node_type (ts_node);
+ return build_string (type);
+}
+
+DEFUN ("treesit-node-start",
+ Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
+ doc: /* Return the NODE's start position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t start_byte_offset = ts_node_start_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t start_pos = buf_bytepos_to_charpos
+ (buffer, start_byte_offset + visible_beg);
+ return make_fixnum (start_pos);
+}
+
+DEFUN ("treesit-node-end",
+ Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
+ doc: /* Return the NODE's end position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t end_byte_offset = ts_node_end_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t end_pos = buf_bytepos_to_charpos
+ (buffer, end_byte_offset + visible_beg);
+ return make_fixnum (end_pos);
+}
+
+DEFUN ("treesit-node-string",
+ Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
+ doc: /* Return the string representation of NODE.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ char *string = ts_node_string (ts_node);
+ return build_string (string);
+}
+
+DEFUN ("treesit-node-parent",
+ Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
+ doc: /* Return the immediate parent of NODE.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode parent = ts_node_parent (ts_node);
+
+ if (ts_node_is_null (parent))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, parent);
+}
+
+DEFUN ("treesit-node-child",
+ Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
+ doc: /* Return the Nth child of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_child (ts_node, (uint32_t) idx);
+ else
+ child = ts_node_named_child (ts_node, (uint32_t) idx);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-check",
+ Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
+ doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
+
+PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error.
+Named nodes correspond to named rules in the language definition,
+whereas "anonymous" nodes correspond to string literals in the
+language definition.
+
+Missing nodes are inserted by the parser in order to recover from
+certain kinds of syntax errors, i.e., should be there but not there.
+
+Extra nodes represent things like comments, which are not required the
+language definition, but can appear anywhere.
+
+A node "has changes" if the buffer changed since the node is
+created. (Don't forget the "s" at the end of 'has-changes.)
+
+A node "has error" if itself is a syntax error or contains any syntax
+errors. */)
+ (Lisp_Object node, Lisp_Object property)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_SYMBOL (property);
+ TSNode ts_node = XTS_NODE (node)->node;
+ bool result;
+ if (EQ (property, Qnamed))
+ result = ts_node_is_named (ts_node);
+ else if (EQ (property, Qmissing))
+ result = ts_node_is_missing (ts_node);
+ else if (EQ (property, Qextra))
+ result = ts_node_is_extra (ts_node);
+ else if (EQ (property, Qhas_error))
+ result = ts_node_has_error (ts_node);
+ else if (EQ (property, Qhas_changes))
+ result = ts_node_has_changes (ts_node);
+ else
+ signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got",
+ property);
+ return result ? Qt : Qnil;
+}
+
+DEFUN ("treesit-node-field-name-for-child",
+ Ftreesit_node_field_name_for_child,
+ Streesit_node_field_name_for_child, 2, 2, 0,
+ doc: /* Return the field name of the Nth child of NODE.
+
+Return nil if not any child or no field is found.
+If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *name
+ = ts_node_field_name_for_child (ts_node, (uint32_t) idx);
+
+ if (name == NULL)
+ return Qnil;
+
+ return build_string (name);
+}
+
+DEFUN ("treesit-node-child-count",
+ Ftreesit_node_child_count,
+ Streesit_node_child_count, 1, 2, 0,
+ doc: /* Return the number of children of NODE.
+
+If NAMED is non-nil, count named child only. NAMED defaults to
+nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ uint32_t count;
+ if (NILP (named))
+ count = ts_node_child_count (ts_node);
+ else
+ count = ts_node_named_child_count (ts_node);
+ return make_fixnum (count);
+}
+
+DEFUN ("treesit-node-child-by-field-name",
+ Ftreesit_node_child_by_field_name,
+ Streesit_node_child_by_field_name, 2, 2, 0,
+ doc: /* Return the child of NODE with FIELD-NAME.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object field_name)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_STRING (field_name);
+ char *name_str = SSDATA (field_name);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child
+ = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str));
+
+ if (ts_node_is_null(child))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-next-sibling",
+ Ftreesit_node_next_sibling,
+ Streesit_node_next_sibling, 1, 2, 0,
+ doc: /* Return the next sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+ if (NILP (named))
+ sibling = ts_node_next_sibling (ts_node);
+ else
+ sibling = ts_node_next_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-prev-sibling",
+ Ftreesit_node_prev_sibling,
+ Streesit_node_prev_sibling, 1, 2, 0,
+ doc: /* Return the previous sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+
+ if (NILP (named))
+ sibling = ts_node_prev_sibling (ts_node);
+ else
+ sibling = ts_node_prev_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-first-child-for-pos",
+ Ftreesit_node_first_child_for_pos,
+ Streesit_node_first_child_for_pos, 2, 3, 0,
+ doc: /* Return the first child of NODE on POS.
+
+Specifically, return the first child that extends beyond POS. POS is
+a position in the buffer. Return nil if there isn't any. If NAMED is
+non-nil, look for named child only. NAMED defaults to nil. Note that
+this function returns an immediate child, not the smallest
+(grand)child. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (pos);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
+
+ if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf))
+ xsignal1 (Qargs_out_of_range, pos);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_first_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+ else
+ child = ts_node_first_named_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-descendant-for-range",
+ Ftreesit_node_descendant_for_range,
+ Streesit_node_descendant_for_range, 3, 4, 0,
+ doc: /* Return the smallest node that covers BEG to END.
+
+The returned node is a descendant of NODE. POS is a position. Return
+nil if there isn't any. If NAMED is non-nil, look for named child
+only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_INTEGER (beg);
+ CHECK_INTEGER (end);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+ ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
+
+ /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END. */
+ if (!(BUF_BEGV_BYTE (buf) <= byte_beg
+ && byte_beg <= byte_end
+ && byte_end <= BUF_ZV_BYTE (buf)))
+ xsignal2 (Qargs_out_of_range, beg, end);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg , byte_end - visible_beg);
+ else
+ child = ts_node_named_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg, byte_end - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-eq",
+ Ftreesit_node_eq,
+ Streesit_node_eq, 2, 2, 0,
+ doc: /* Return non-nil if NODE1 and NODE2 are the same node.
+If any one of NODE1 and NODE2 is nil, return nil. */)
+ (Lisp_Object node1, Lisp_Object node2)
+{
+ if (NILP (node1) || NILP (node2))
+ return Qnil;
+ CHECK_TS_NODE (node1);
+ CHECK_TS_NODE (node2);
+
+ TSNode ts_node_1 = XTS_NODE (node1)->node;
+ TSNode ts_node_2 = XTS_NODE (node2)->node;
+
+ bool same_node = ts_node_eq (ts_node_1, ts_node_2);
+ return same_node ? Qt : Qnil;
+}
+
+/*** Query functions */
+
+DEFUN ("treesit-pattern-expand",
+ Ftreesit_pattern_expand,
+ Streesit_pattern_expand, 1, 1, 0,
+ doc: /* Expand PATTERN to its string form.
+
+PATTERN can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object pattern)
+{
+ if (EQ (pattern, intern_c_string (":anchor")))
+ return build_pure_c_string(".");
+ if (EQ (pattern, intern_c_string (":?")))
+ return build_pure_c_string("?");
+ if (EQ (pattern, intern_c_string (":*")))
+ return build_pure_c_string("*");
+ if (EQ (pattern, intern_c_string (":+")))
+ return build_pure_c_string("+");
+ if (EQ (pattern, intern_c_string (":equal")))
+ return build_pure_c_string("#equal");
+ if (EQ (pattern, intern_c_string (":match")))
+ return build_pure_c_string("#match");
+ Lisp_Object opening_delimeter =
+ build_pure_c_string (VECTORP (pattern) ? "[" : "(");
+ Lisp_Object closing_delimiter =
+ build_pure_c_string (VECTORP (pattern) ? "]" : ")");
+ if (VECTORP (pattern) || CONSP (pattern))
+ return concat3 (opening_delimeter,
+ Fmapconcat (intern_c_string
+ ("treesit-pattern-expand"),
+ pattern,
+ build_pure_c_string (" ")),
+ closing_delimiter);
+ return CALLN (Fformat, build_pure_c_string("%S"), pattern);
+}
+
+DEFUN ("treesit-query-expand",
+ Ftreesit_query_expand,
+ Streesit_query_expand, 1, 1, 0,
+ doc: /* Expand sexp QUERY to its string form.
+
+A PATTERN in QUERY can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object query)
+{
+ return Fmapconcat (intern_c_string ("treesit-pattern-expand"),
+ query, build_pure_c_string (" "));
+}
+
+static const char*
+ts_query_error_to_string (TSQueryError error)
+{
+ switch (error)
+ {
+ case TSQueryErrorNone:
+ return "None";
+ case TSQueryErrorSyntax:
+ return "Syntax error at";
+ case TSQueryErrorNodeType:
+ return "Node type error at";
+ case TSQueryErrorField:
+ return "Field error at";
+ case TSQueryErrorCapture:
+ return "Capture error at";
+ case TSQueryErrorStructure:
+ return "Structure error at";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* This struct is used for passing captures to be check against
+ predicates. Captures we check for are the ones in START before
+ END. For example, if START and END are
+
+ START END
+ v v
+ (1 . (2 . (3 . (4 . (5 . (6 . nil))))))
+
+ We only look at captures 1 2 3. */
+struct capture_range
+{
+ Lisp_Object start;
+ Lisp_Object end;
+};
+
+/* Collect predicates for this match and return them in a list. Each
+ predicate is a list of strings and symbols. */
+static Lisp_Object
+ts_predicates_for_pattern
+(TSQuery *query, uint32_t pattern_index)
+{
+ uint32_t len;
+ const TSQueryPredicateStep *predicate_list =
+ ts_query_predicates_for_pattern (query, pattern_index, &len);
+ Lisp_Object result = Qnil;
+ Lisp_Object predicate = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSQueryPredicateStep step = predicate_list[idx];
+ switch (step.type)
+ {
+ case TSQueryPredicateStepTypeCapture:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_capture_name_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (intern_c_string_1 (str, str_len),
+ predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeString:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_string_value_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (make_string (str, str_len), predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeDone:
+ result = Fcons (Fnreverse (predicate), result);
+ predicate = Qnil;
+ break;
+ }
+ }
+ return Fnreverse (result);
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+ Signals treesit-query-error if such node is not captured. */
+static Lisp_Object
+ts_predicate_capture_name_to_text
+(Lisp_Object name, struct capture_range captures)
+{
+ Lisp_Object node = Qnil;
+ for (Lisp_Object tail = captures.start;
+ !EQ (tail, captures.end); tail = XCDR (tail))
+ {
+ if (EQ (XCAR (XCAR (tail)), name))
+ {
+ node = XCDR (XCAR (tail));
+ break;
+ }
+ }
+
+ if (NILP (node))
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Cannot find captured node"),
+ name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern"));
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal
+ (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
+ Lisp_Object text = Fbuffer_substring
+ (Ftreesit_node_start (node), Ftreesit_node_end (node));
+ set_buffer_internal (old_buffer);
+ return text;
+}
+
+/* Handles predicate (#equal A B). Return true if A equals B; return
+ false otherwise. A and B can be either string, or a capture name.
+ The capture name evaluates to the text its captured node spans in
+ the buffer. */
+static bool
+ts_predicate_equal
+(Lisp_Object args, struct capture_range captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object arg1 = XCAR (args);
+ Lisp_Object arg2 = XCAR (XCDR (args));
+ Lisp_Object text1 = STRINGP (arg1) ? arg1 :
+ ts_predicate_capture_name_to_text (arg1, captures);
+ Lisp_Object text2 = STRINGP (arg2) ? arg2 :
+ ts_predicate_capture_name_to_text (arg2, captures);
+
+ if (NILP (Fstring_equal (text1, text2)))
+ return false;
+ else
+ return true;
+}
+
+/* Handles predicate (#match "regexp" @node). Return true if "regexp"
+ matches the text spanned by @node; return false otherwise. Matching
+ is case-sensitive. */
+static bool
+ts_predicate_match
+(Lisp_Object args, struct capture_range captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object regexp = XCAR (args);
+ Lisp_Object capture_name = XCAR (XCDR (args));
+ Lisp_Object text = ts_predicate_capture_name_to_text
+ (capture_name, captures);
+
+ /* It's probably common to get the argument order backwards. Catch
+ this mistake early and show helpful explanation, because Emacs
+ loves you. (We put the regexp first because that's what
+ string-match does.) */
+ if (!STRINGP (regexp))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name"));
+ if (!SYMBOLP (capture_name))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string"));
+
+ if (fast_string_match (regexp, text) >= 0)
+ return true;
+ else
+ return false;
+}
+
+/* About predicates: I decide to hard-code predicates in C instead of
+ implementing an extensible system where predicates are translated
+ to Lisp functions, and new predicates can be added by extending a
+ list of functions, because I really couldn't imagine any useful
+ predicates besides equal and match. If we later found out that
+ such system is indeed useful and necessary, it can be easily
+ added. */
+
+/* If all predicates in PREDICATES passes, return true; otherwise
+ return false. */
+static bool
+ts_eval_predicates
+(struct capture_range captures, Lisp_Object predicates)
+{
+ bool pass = true;
+ /* Evaluate each predicates. */
+ for (Lisp_Object tail = predicates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object predicate = XCAR (tail);
+ Lisp_Object fn = XCAR (predicate);
+ Lisp_Object args = XCDR (predicate);
+ if (!NILP (Fstring_equal (fn, build_pure_c_string("equal"))))
+ pass = ts_predicate_equal (args, captures);
+ else if (!NILP (Fstring_equal
+ (fn, build_pure_c_string("match"))))
+ pass = ts_predicate_match (args, captures);
+ else
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Invalid predicate"),
+ fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate"));
+ }
+ /* If all predicates passed, add captures to result list. */
+ return pass;
+}
+
+DEFUN ("treesit-query-compile",
+ Ftreesit_query_compile,
+ Streesit_query_compile, 2, 2, 0,
+ doc: /* Compile QUERY to a compiled query.
+
+Querying a compiled query is much faster than an uncompiled one.
+LANGUAGE is the language this query is for.
+
+Signals treesit-query-error if QUERY is malformed or something else
+goes wrong. You can use `treesit-query-validate' to debug the
+query. */)
+ (Lisp_Object language, Lisp_Object query)
+{
+ if (NILP (Ftreesit_query_p (query)))
+ wrong_type_argument (Qtreesit_query_p, query);
+ CHECK_SYMBOL (language);
+ if (TS_COMPILED_QUERY_P (query))
+ return query;
+
+ TSLanguage *ts_lang = ts_load_language (language, true);
+ uint32_t error_offset;
+ TSQueryError error_type;
+
+ struct Lisp_TS_Query *lisp_query
+ = make_ts_query (query, ts_lang, &error_offset, &error_type);
+
+ if (lisp_query == NULL)
+ xsignal2 (Qtreesit_query_error,
+ build_string (ts_query_error_to_string (error_type)),
+ make_fixnum (error_offset + 1));
+
+ return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
+}
+
+DEFUN ("treesit-query-capture",
+ Ftreesit_query_capture,
+ Streesit_query_capture, 2, 5, 0,
+ doc: /* Query NODE with patterns in QUERY.
+
+Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name
+assigned to the node in PATTERN. NODE is the captured node.
+
+QUERY is either a string query, a sexp query, or a compiled query.
+See Info node `(elisp)Pattern Matching' for how to write a query in
+either string or s-expression form. When using repeatedly, a compiled
+query is much faster than a string or sexp one, so it is recommend to
+compile your queries if it will be used over and over.
+
+BEG and END, if both non-nil, specifies the range in which the query
+is executed. If NODE-ONLY is non-nil, return a list of nodes.
+
+Besides a node, NODE can also be a parser, then the root node of that
+parser is used; NODE can be a language symbol, then the root node of a
+parser for that language is used. If such a parser doesn't exist, it
+is created.
+
+Signals treesit-query-error if QUERY is malformed or something else
+goes wrong. You can use `treesit-query-validate' to debug the
+query. */)
+ (Lisp_Object node, Lisp_Object query,
+ Lisp_Object beg, Lisp_Object end, Lisp_Object node_only)
+{
+ if (!NILP (beg))
+ CHECK_INTEGER (beg);
+ if (!NILP (end))
+ CHECK_INTEGER (end);
+
+ if (!(TS_COMPILED_QUERY_P (query)
+ || CONSP (query) || STRINGP (query)))
+ wrong_type_argument (Qtreesit_query_p, query);
+
+
+ Lisp_Object lisp_node;
+ if (TS_NODEP (node))
+ lisp_node = node;
+ else if (TS_PARSERP (node))
+ lisp_node = Ftreesit_parser_root_node (node);
+ else if (SYMBOLP (node))
+ {
+ Lisp_Object parser
+ = Ftreesit_parser_create (node, Fcurrent_buffer (), Qnil);
+ lisp_node = Ftreesit_parser_root_node (parser);
+ }
+ else
+ xsignal2 (Qwrong_type_argument,
+ list4 (Qor, Qtreesit_node_p,
+ Qtreesit_parser_p, Qsymbolp),
+ node);
+
+ /* Extract C values from Lisp objects. */
+ TSNode ts_node = XTS_NODE (lisp_node)->node;
+ Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
+ const TSLanguage *lang = ts_parser_language
+ (XTS_PARSER (lisp_parser)->parser);
+
+ /* Initialize query objects, and execute query. */
+ struct Lisp_TS_Query *lisp_query;
+ if (TS_COMPILED_QUERY_P (query))
+ lisp_query = XTS_COMPILED_QUERY (query);
+ else
+ {
+ uint32_t error_offset;
+ TSQueryError error_type;
+ lisp_query = make_ts_query (query, lang,
+ &error_offset, &error_type);
+ if (lisp_query == NULL)
+ {
+ xsignal3 (Qtreesit_query_error,
+ build_string
+ (ts_query_error_to_string (error_type)),
+ make_fixnum (error_offset + 1),
+ build_pure_c_string("Debug the query with `treesit-query-validate'"));
+ }
+ /* We don't need need to free TS_QUERY and CURSOR, they are stored
+ in a lisp object, which is tracked by gc. */
+ }
+ TSQuery *ts_query = lisp_query->query;
+ TSQueryCursor *cursor = lisp_query->cursor;
+
+ if (!NILP (beg) && !NILP (end))
+ {
+ EMACS_INT beg_byte = XFIXNUM (beg);
+ EMACS_INT end_byte = XFIXNUM (end);
+ ts_query_cursor_set_byte_range
+ (cursor, (uint32_t) beg_byte - visible_beg,
+ (uint32_t) end_byte - visible_beg);
+ }
+
+ ts_query_cursor_exec (cursor, ts_query, ts_node);
+ TSQueryMatch match;
+
+ /* Go over each match, collect captures and predicates. Include the
+ captures in the RESULT list unconditionally as we get them, then
+ test for predicates. If predicates pass, then all good, if
+ predicates don't pass, revert the result back to the result
+ before this loop (PREV_RESULT). (Predicates control the entire
+ match.) This way we don't need to create a list of captures in
+ every for loop and nconc it to RESULT every time. That is indeed
+ the initial implementation in which Yoav found nconc being the
+ bottleneck (98.4% of the running time spent on nconc). */
+ Lisp_Object result = Qnil;
+ Lisp_Object prev_result = result;
+ while (ts_query_cursor_next_match (cursor, &match))
+ {
+ /* Record the checkpoint that we may roll back to. */
+ prev_result = result;
+ /* Get captured nodes. */
+ const TSQueryCapture *captures = match.captures;
+ for (int idx=0; idx < match.capture_count; idx++)
+ {
+ uint32_t capture_name_len;
+ TSQueryCapture capture = captures[idx];
+ Lisp_Object captured_node =
+ make_ts_node(lisp_parser, capture.node);
+
+ Lisp_Object cap;
+ if (NILP (node_only))
+ {
+ const char *capture_name = ts_query_capture_name_for_id
+ (ts_query, capture.index, &capture_name_len);
+ cap =
+ Fcons (intern_c_string_1 (capture_name, capture_name_len),
+ captured_node);
+ }
+ else
+ {
+ cap = captured_node;
+ }
+ result = Fcons (cap, result);
+ }
+ /* Get predicates. */
+ Lisp_Object predicates =
+ ts_predicates_for_pattern (ts_query, match.pattern_index);
+
+ /* captures_lisp = Fnreverse (captures_lisp); */
+ struct capture_range captures_range = { result, prev_result };
+ if (!ts_eval_predicates (captures_range, predicates))
+ {
+ /* Predicates didn't pass, roll back. */
+ result = prev_result;
+ }
+ }
+ return Fnreverse (result);
+}
+
+/*** Navigation */
+
+/* Return the next/previous named/unnamed sibling of NODE. FORWARD
+ controls the direction and NAMED controls the nameness.
+ */
+static TSNode
+ts_traverse_sibling_helper (TSNode node, bool forward, bool named)
+{
+ if (forward)
+ {
+ if (named)
+ return ts_node_next_named_sibling (node);
+ else
+ return ts_node_next_sibling (node);
+ }
+ else
+ {
+ if (named)
+ return ts_node_prev_named_sibling (node);
+ else
+ return ts_node_prev_sibling (node);
+ }
+}
+
+/* Return true if NODE matches PRED. PRED can be a string or a
+ function. This function doesn't check for PRED's type. */
+static bool
+ts_traverse_match_predicate
+(TSNode node, Lisp_Object pred, Lisp_Object parser)
+{
+ if (STRINGP (pred))
+ {
+ const char *type = ts_node_type (node);
+ return (fast_c_string_match_ignore_case
+ (pred, type, strlen (type)) >= 0);
+ }
+ else
+ {
+ Lisp_Object lisp_node = make_ts_node (parser, node);
+ return !NILP (CALLN (Ffuncall, pred, lisp_node));
+ }
+
+}
+
+/* Traverse the parse tree starting from ROOT (but ROOT is not
+ matches against PRED). PRED can be a function (takes a node and
+ returns nil/non-nil),or a string (treated as regexp matching the
+ node's type, ignores case, must be all single byte characters). If
+ the node satisfies PRED , terminate, set ROOT to that node, and
+ return true. If no node satisfies PRED, return FALSE. PARSER is
+ the parser of ROOT.
+
+ LIMIT is the number of levels we descend in the tree. If NO_LIMIT
+ is true, LIMIT is ignored. FORWARD controls the direction in which
+ we traverse the tree, true means forward, false backward. If NAMED
+ is true, only traverse named nodes, if false, all nodes. If
+ SKIP_ROOT is true, don't match ROOT. */
+static bool
+ts_search_dfs
+(TSNode *root, Lisp_Object pred, Lisp_Object parser,
+ bool named, bool forward, ptrdiff_t limit, bool no_limit,
+ bool skip_root)
+{
+ /* TSTreeCursor doesn't allow us to move backward, so we can't use
+ it. We could use limit == -1 to indicate no_limit == true, but
+ separating them is safer. */
+ TSNode node = *root;
+
+ if (!skip_root && ts_traverse_match_predicate (node, pred, parser))
+ {
+ *root = node;
+ return true;
+ }
+
+ if (!no_limit && limit <= 0)
+ return false;
+ else
+ {
+ int count = named ?
+ ts_node_named_child_count( node)
+ : ts_node_child_count (node);
+ for (int offset=0; offset < count; offset++)
+ {
+ uint32_t idx = forward ? offset
+ : count - offset - 1;
+ TSNode child = ts_node_child (node, idx);
+
+ if (!ts_node_is_null (child)
+ && ts_search_dfs (&child, pred, parser, named,
+ forward, limit - 1, no_limit, false))
+ {
+ *root = child;
+ return true;
+ }
+ }
+ return false;
+ }
+}
+
+/* Go thought the whole tree linearly depth first, starting from
+ START. PRED, PARSER, NAMED, FORWARD are the same as in
+ ts_search_subtre. If UP_ONLY is true, never go to children, only
+ sibling and parents. If SKIP_START is true, don'tt match
+ START. */
+static bool
+ts_search_forward
+(TSNode *start, Lisp_Object pred, Lisp_Object parser,
+ bool named, bool forward, bool up_only, bool skip_start)
+{
+ TSNode node = *start;
+
+ if (!up_only && ts_search_dfs
+ (start, pred, parser, named, forward, 0, true, skip_start))
+ return true;
+
+ TSNode next = ts_traverse_sibling_helper (node, forward, named);
+ while (ts_node_is_null (next))
+ {
+ node = ts_node_parent (node);
+ if (ts_node_is_null (node))
+ return false;
+
+ if (ts_traverse_match_predicate (node, pred, parser))
+ {
+ *start = node;
+ return true;
+ }
+ next = ts_traverse_sibling_helper (node, forward, named);
+ }
+ if (ts_search_forward
+ (&next, pred, parser, named, forward, up_only, false))
+ {
+ *start = next;
+ return true;
+ }
+ else
+ return false;
+}
+
+DEFUN ("treesit-search-subtree",
+ Ftreesit_search_subtree,
+ Streesit_search_subtree, 2, 5, 0,
+ doc: /* Traverse the parse tree depth-first.
+
+Traverse the subtree of NODE, and match PREDICATE with each node along
+the way. PREDICATE is a regexp string that matches against each
+node's type case-insensitively, or a function that takes a node and
+returns nil/non-nil.
+
+By default, only traverse named nodes, if ALL is non-nil, traverse all
+nodes. If BACKWARD is non-nil, traverse backwards. If LIMIT is
+non-nil, we only traverse that number of levels down in the tree.
+
+Return the first matched node, or nil if none matches. */)
+ (Lisp_Object node, Lisp_Object predicate, Lisp_Object all,
+ Lisp_Object backward, Lisp_Object limit)
+{
+ CHECK_TS_NODE (node);
+ CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+ list3 (Qor, Qstringp, Qfunctionp), predicate);
+ CHECK_SYMBOL (all);
+ CHECK_SYMBOL (backward);
+
+ ptrdiff_t the_limit = 0;
+ bool no_limit = false;
+ if (NILP (limit))
+ no_limit = true;
+ else
+ {
+ CHECK_FIXNUM (limit);
+ the_limit = XFIXNUM (limit);
+ }
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ Lisp_Object parser = XTS_NODE (node)->parser;
+ if (ts_search_dfs
+ (&ts_node, predicate, parser, NILP (all),
+ NILP (backward), the_limit, no_limit, false))
+ {
+ return make_ts_node (parser, ts_node);
+ }
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-search-forward",
+ Ftreesit_search_forward,
+ Streesit_search_forward, 2, 5, 0,
+ doc: /* Search for node in the parse tree.
+
+Start traversing the tree from node START, and match PREDICATE with
+each node along the way (except START). PREDICATE is a regexp string
+that matches against each node's type case-insensitively, or a
+function that takes a node and returns nil/non-nil.
+
+By default, only search for named nodes, if ALL is non-nil, search for
+all nodes. If BACKWARD is non-nil, search backwards.
+
+Return the first matched node, or nil if none matches.
+
+For a tree like the below where START is marked 1, traverse as
+numbered:
+ 16
+ |
+ 3--------4-----------8
+ | | |
+ o--o-+--1 5--+--6 9---+-----12
+ | | | | | |
+ o o 2 7 +-+-+ +--+--+
+ | | | | |
+ 10 11 13 14 15
+
+If UP is non-nil, only traverse to siblings and parents. In that
+case, only 1 3 4 8 16 would be traversed. */)
+ (Lisp_Object start, Lisp_Object predicate, Lisp_Object all,
+ Lisp_Object backward, Lisp_Object up)
+{
+ CHECK_TS_NODE (start);
+ CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+ list3 (Qor, Qstringp, Qfunctionp), predicate);
+ CHECK_SYMBOL (all);
+ CHECK_SYMBOL (backward);
+ CHECK_SYMBOL (up);
+
+ TSNode ts_start = XTS_NODE (start)->node;
+ Lisp_Object parser = XTS_NODE (start)->parser;
+ if (ts_search_forward
+ (&ts_start, predicate, parser, NILP (all),
+ NILP (backward), !NILP (up), true))
+ {
+ return make_ts_node (parser, ts_start);
+ }
+ else
+ return Qnil;
+}
+
+/* Recursively traverse the tree under CURSOR, and append the result
+ subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree.
+ Note that the top-level children list is reversed, because
+ reasons. */
+static void
+ts_build_sparse_tree
+(TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred,
+ Lisp_Object process_fn, ptrdiff_t limit,
+ bool no_limit, Lisp_Object parser)
+{
+
+ TSNode node = ts_tree_cursor_current_node (cursor);
+ bool match = ts_traverse_match_predicate (node, pred, parser);
+ if (match)
+ {
+ /* If this node matches pred, add a new node to the parent's
+ children list. */
+ Lisp_Object lisp_node = make_ts_node (parser, node);
+ if (!NILP (process_fn))
+ {
+ lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
+ }
+ Lisp_Object this = Fcons (lisp_node, Qnil);
+ Fsetcdr (parent, Fcons (this, Fcdr (parent)));
+ /* Now for children nodes, this is the new parent. */
+ parent = this;
+ }
+ /* Go through each child. */
+ if ((no_limit || limit > 0)
+ && ts_tree_cursor_goto_first_child (cursor))
+ {
+ do
+ {
+ /* Make sure not to use node after the recursive funcall.
+ Then C compilers should be smart enough not to copy NODE
+ to stack. */
+ ts_build_sparse_tree
+ (cursor, parent, pred, process_fn,
+ limit - 1, no_limit, parser);
+ }
+ while (ts_tree_cursor_goto_next_sibling (cursor));
+ /* Don't forget to come back to this node. */
+ ts_tree_cursor_goto_parent (cursor);
+ }
+ /* Before we go, reverse children in the sparse tree. */
+ if (match)
+ {
+ /* When match == true, "parent" is actually the node we added in
+ this layer (parent = this). */
+ Fsetcdr (parent, Fnreverse (Fcdr (parent)));
+ }
+}
+
+DEFUN ("treesit-induce-sparse-tree",
+ Ftreesit_induce_sparse_tree,
+ Streesit_induce_sparse_tree, 2, 4, 0,
+ doc: /* Create a sparse tree of ROOT's subtree.
+
+Basically, take the subtree under ROOT, and comb it so only the nodes
+that match PREDICATE are left, like picking out grapes on the vine.
+PREDICATE is a regexp string that matches against each node's type
+case-insensitively.
+
+For a subtree on the left that consist of both numbers and letters, if
+PREDICATE is "is letter", the returned tree is the one on the right.
+
+ a a a
+ | | |
+ +---+---+ +---+---+ +---+---+
+ | | | | | | | | |
+ b 1 2 b | | b c d
+ | | => | | => |
+ c +--+ c + e
+ | | | | |
+ +--+ d 4 +--+ d
+ | | |
+ e 5 e
+
+If PROCESS-FN is non-nil, instead of returning the matched nodes, pass
+each node to PROCESS-FN use the return value instead. If non-nil,
+LIMIT is the number of levels to go down from ROOT.
+
+Each node in the returned tree looks like (NODE . (CHILD ...)). The
+root of this tree might be nil, if ROOT doesn't match PREDICATE. If
+no node matches PRED, return nil.
+
+PREDICATE can also be a function that takes a node and returns
+nil/non-nil, but it is slower and more memory consuming than
+regexp. */)
+ (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn,
+ Lisp_Object limit)
+{
+ CHECK_TS_NODE (root);
+ CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+ list3 (Qor, Qstringp, Qfunctionp), predicate);
+
+ if (!NILP (process_fn))
+ CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
+ ptrdiff_t the_limit = 0;
+ bool no_limit = false;
+ if (NILP (limit))
+ no_limit = true;
+ else
+ {
+ CHECK_FIXNUM (limit);
+ the_limit = XFIXNUM (limit);
+ }
+
+ TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
+ Lisp_Object parser = XTS_NODE (root)->parser;
+ Lisp_Object parent = Fcons (Qnil, Qnil);
+ ts_build_sparse_tree
+ (&cursor, parent, predicate, process_fn,
+ the_limit, no_limit, parser);
+ Fsetcdr (parent, Fnreverse (Fcdr (parent)));
+ if (NILP (Fcdr (parent)))
+ return Qnil;
+ else
+ return parent;
+}
+
+/*** Initialization */
+
+/* Initialize the tree-sitter routines. */
+void
+syms_of_treesit (void)
+{
+ DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
+ DEFSYM (Qtreesit_node_p, "treesit-node-p");
+ DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p");
+ DEFSYM (Qtreesit_query_p, "treesit-query-p");
+ DEFSYM (Qnamed, "named");
+ DEFSYM (Qmissing, "missing");
+ DEFSYM (Qextra, "extra");
+ DEFSYM (Qhas_changes, "has-changes");
+ DEFSYM (Qhas_error, "has-error");
+
+ DEFSYM (Qtreesit_error, "treesit-error");
+ DEFSYM (Qtreesit_query_error, "treesit-query-error");
+ DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
+ DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
+ DEFSYM (Qtreesit_buffer_too_large,
+ "treesit-buffer-too-large");
+ DEFSYM (Qtreesit_load_language_error,
+ "treesit-load-language-error");
+ DEFSYM (Qtreesit_node_outdated,
+ "treesit-node-outdated");
+ DEFSYM (Quser_emacs_directory,
+ "user-emacs-directory");
+ DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
+
+ DEFSYM (Qor, "or");
+
+ define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
+ define_error (Qtreesit_query_error, "Query pattern is malformed",
+ Qtreesit_error);
+ /* Should be impossible, no need to document this error. */
+ define_error (Qtreesit_parse_error, "Parse failed",
+ Qtreesit_error);
+ define_error (Qtreesit_range_invalid,
+ "RANGES are invalid, they have to be ordered and not overlapping",
+ Qtreesit_error);
+ define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)",
+ Qtreesit_error);
+ define_error (Qtreesit_load_language_error,
+ "Cannot load language definition",
+ Qtreesit_error);
+ define_error (Qtreesit_node_outdated,
+ "This node is outdated, please retrieve a new one",
+ Qtreesit_error);
+ define_error (Qtreesit_parser_deleted,
+ "This parser is deleted and cannot be used",
+ Qtreesit_error);
+
+ DEFVAR_LISP ("treesit-load-name-override-list",
+ Vtreesit_load_name_override_list,
+ doc:
+ /* An override list for unconventional tree-sitter libraries.
+
+By default, Emacs assumes the dynamic library for LANG is
+libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
+dynamic libraries. Emacs also assumes that the name of the C function
+the library provides is tree_sitter_LANG. If that is not the case,
+add an entry
+
+ (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
+
+to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
+library without extension, FUNCTION-NAME is the function provided by
+the library. */);
+ Vtreesit_load_name_override_list = Qnil;
+
+ DEFVAR_LISP ("treesit-extra-load-path",
+ Vtreesit_extra_load_path,
+ doc:
+ /* Extra load paths of tree-sitter language definitions.
+When trying to load a tree-sitter language definition,
+Emacs looks at directories in this variable,
+`user-emacs-directory'/tree-sitter, and system default locations for
+dynamic libraries, in that order. */);
+ Vtreesit_extra_load_path = Qnil;
+
+ defsubr (&Streesit_language_available_p);
+
+ defsubr (&Streesit_parser_p);
+ defsubr (&Streesit_node_p);
+ defsubr (&Streesit_compiled_query_p);
+ defsubr (&Streesit_query_p);
+
+ defsubr (&Streesit_node_parser);
+
+ defsubr (&Streesit_parser_create);
+ defsubr (&Streesit_parser_delete);
+ defsubr (&Streesit_parser_list);
+ defsubr (&Streesit_parser_buffer);
+ defsubr (&Streesit_parser_language);
+
+ defsubr (&Streesit_parser_root_node);
+ /* defsubr (&Streesit_parse_string); */
+
+ defsubr (&Streesit_parser_set_included_ranges);
+ defsubr (&Streesit_parser_included_ranges);
+
+ defsubr (&Streesit_node_type);
+ defsubr (&Streesit_node_start);
+ defsubr (&Streesit_node_end);
+ defsubr (&Streesit_node_string);
+ defsubr (&Streesit_node_parent);
+ defsubr (&Streesit_node_child);
+ defsubr (&Streesit_node_check);
+ defsubr (&Streesit_node_field_name_for_child);
+ defsubr (&Streesit_node_child_count);
+ defsubr (&Streesit_node_child_by_field_name);
+ defsubr (&Streesit_node_next_sibling);
+ defsubr (&Streesit_node_prev_sibling);
+ defsubr (&Streesit_node_first_child_for_pos);
+ defsubr (&Streesit_node_descendant_for_range);
+ defsubr (&Streesit_node_eq);
+
+ defsubr (&Streesit_pattern_expand);
+ defsubr (&Streesit_query_expand);
+ defsubr (&Streesit_query_compile);
+ defsubr (&Streesit_query_capture);
+
+ defsubr (&Streesit_search_subtree);
+ defsubr (&Streesit_search_forward);
+ defsubr (&Streesit_induce_sparse_tree);
+}