diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.in | 10 | ||||
-rw-r--r-- | src/alloc.c | 13 | ||||
-rw-r--r-- | src/casefiddle.c | 12 | ||||
-rw-r--r-- | src/data.c | 6 | ||||
-rw-r--r-- | src/emacs.c | 7 | ||||
-rw-r--r-- | src/eval.c | 13 | ||||
-rw-r--r-- | src/insdel.c | 47 | ||||
-rw-r--r-- | src/json.c | 16 | ||||
-rw-r--r-- | src/lisp.h | 9 | ||||
-rw-r--r-- | src/lread.c | 8 | ||||
-rw-r--r-- | src/print.c | 28 | ||||
-rw-r--r-- | src/treesit.c | 1657 | ||||
-rw-r--r-- | src/treesit.h | 137 |
13 files changed, 1943 insertions, 20 deletions
diff --git a/src/Makefile.in b/src/Makefile.in index 7d15b7afd51..a21af42c0b9 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -343,6 +343,10 @@ JSON_LIBS = @JSON_LIBS@ JSON_CFLAGS = @JSON_CFLAGS@ JSON_OBJ = @JSON_OBJ@ +TREE_SITTER_LIBS = @TREE_SITTER_LIBS@ +TREE_SITTER_FLAGS = @TREE_SITTER_FLAGS@ +TREE_SITTER_OBJ = @TREE_SITTER_OBJ@ + INTERVALS_H = dispextern.h intervals.h composite.h GETLOADAVG_LIBS = @GETLOADAVG_LIBS@ @@ -406,7 +410,7 @@ EMACS_CFLAGS=-Demacs $(MYCPPFLAGS) -I. -I$(srcdir) \ $(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \ $(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \ $(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \ - $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) \ + $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \ $(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \ $(WERROR_CFLAGS) $(HAIKU_CFLAGS) $(XCOMPOSITE_CFLAGS) $(XSHAPE_CFLAGS) ALL_CFLAGS = $(EMACS_CFLAGS) $(WARN_CFLAGS) $(CFLAGS) @@ -445,7 +449,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ $(if $(HYBRID_MALLOC),sheap.o) \ $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \ $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \ - $(HAIKU_OBJ) $(PGTK_OBJ) + $(TREE_SITTER_OBJ) $(HAIKU_OBJ) $(PGTK_OBJ) doc_obj = $(base_obj) $(NS_OBJC_OBJ) obj = $(doc_obj) $(HAIKU_CXX_OBJ) @@ -565,7 +569,7 @@ LIBES = $(LIBS) $(W32_LIBS) $(LIBS_GNUSTEP) $(PGTK_LIBS) $(LIBX_BASE) $(LIBIMAGE $(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \ $(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \ $(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \ - $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS) + $(TREE_SITTER_LIBS) $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS) ## FORCE it so that admin/unidata can decide whether this file is ## up-to-date. Although since charprop depends on bootstrap-emacs, diff --git a/src/alloc.c b/src/alloc.c index 43fbbb79bed..40a3e235eab 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -50,6 +50,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ #include TERM_HEADER #endif /* HAVE_WINDOW_SYSTEM */ +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + #include <flexmember.h> #include <verify.h> #include <execinfo.h> /* For backtrace. */ @@ -3162,6 +3166,15 @@ cleanup_vector (struct Lisp_Vector *vector) if (uptr->finalizer) uptr->finalizer (uptr->p); } +#ifdef HAVE_TREE_SITTER + else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_PARSER)) + { + struct Lisp_TS_Parser *lisp_parser + = PSEUDOVEC_STRUCT (vector, Lisp_TS_Parser); + ts_tree_delete(lisp_parser->tree); + ts_parser_delete(lisp_parser->parser); + } +#endif #ifdef HAVE_MODULES else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION)) { diff --git a/src/casefiddle.c b/src/casefiddle.c index 2ea5f09b4c5..3022c5cc7d6 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c @@ -30,6 +30,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ #include "composite.h" #include "keymap.h" +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; /* State for casing individual characters. */ @@ -530,6 +534,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) modify_text (start, end); prepare_casing_context (&ctx, flag, true); +#ifdef HAVE_TREE_SITTER + ptrdiff_t start_byte = CHAR_TO_BYTE (start); + ptrdiff_t old_end_byte = CHAR_TO_BYTE (end); +#endif + ptrdiff_t orig_end = end; record_delete (start, make_buffer_string (start, end, true), false); if (NILP (BVAR (current_buffer, enable_multibyte_characters))) @@ -548,6 +557,9 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) { signal_after_change (start, end - start - added, end - start); update_compositions (start, end, CHECK_ALL); +#ifdef HAVE_TREE_SITTER + ts_record_change (start_byte, old_end_byte, CHAR_TO_BYTE (end)); +#endif } return orig_end + added; diff --git a/src/data.c b/src/data.c index 72dcf6f878d..a28bf414147 100644 --- a/src/data.c +++ b/src/data.c @@ -261,6 +261,10 @@ for example, (type-of 1) returns `integer'. */) return Qxwidget; case PVEC_XWIDGET_VIEW: return Qxwidget_view; + case PVEC_TS_PARSER: + return Qtreesit_parser; + case PVEC_TS_NODE: + return Qtreesit_node; case PVEC_SQLITE: return Qsqlite; /* "Impossible" cases. */ @@ -4258,6 +4262,8 @@ syms_of_data (void) DEFSYM (Qterminal, "terminal"); DEFSYM (Qxwidget, "xwidget"); DEFSYM (Qxwidget_view, "xwidget-view"); + DEFSYM (Qtreesit_parser, "treesit-parser"); + DEFSYM (Qtreesit_node, "treesit-node"); DEFSYM (Qdefun, "defun"); diff --git a/src/emacs.c b/src/emacs.c index ca99a8c787d..9f20a1597c9 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -136,6 +136,10 @@ extern char etext; #include <sys/resource.h> #endif +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + #include "pdumper.h" #include "fingerprint.h" #include "epaths.h" @@ -2225,6 +2229,9 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem syms_of_module (); #endif +#ifdef HAVE_TREE_SITTER + syms_of_treesit (); +#endif #ifdef HAVE_SOUND syms_of_sound (); #endif diff --git a/src/eval.c b/src/eval.c index 77ec47e2b79..3ec03de1376 100644 --- a/src/eval.c +++ b/src/eval.c @@ -1835,6 +1835,19 @@ signal_error (const char *s, Lisp_Object arg) xsignal (Qerror, Fcons (build_string (s), arg)); } +void +define_error (Lisp_Object name, const char *message, Lisp_Object parent) +{ + eassert (SYMBOLP (name)); + eassert (SYMBOLP (parent)); + Lisp_Object parent_conditions = Fget (parent, Qerror_conditions); + eassert (CONSP (parent_conditions)); + eassert (!NILP (Fmemq (parent, parent_conditions))); + eassert (NILP (Fmemq (name, parent_conditions))); + Fput (name, Qerror_conditions, pure_cons (name, parent_conditions)); + Fput (name, Qerror_message, build_pure_c_string (message)); +} + /* Use this for arithmetic overflow, e.g., when an integer result is too large even for a bignum. */ void diff --git a/src/insdel.c b/src/insdel.c index 6f180ac5800..4676330cb79 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -31,6 +31,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ #include "region-cache.h" #include "pdumper.h" +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + static void insert_from_string_1 (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t, ptrdiff_t, bool, bool); static void insert_from_buffer_1 (struct buffer *, ptrdiff_t, ptrdiff_t, bool); @@ -940,6 +944,12 @@ insert_1_both (const char *string, set_text_properties (make_fixnum (PT), make_fixnum (PT + nchars), Qnil, Qnil, Qnil); +#ifdef HAVE_TREE_SITTER + eassert (nbytes >= 0); + eassert (PT_BYTE >= 0); + ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); +#endif + adjust_point (nchars, nbytes); check_markers (); @@ -1071,6 +1081,12 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit); +#ifdef HAVE_TREE_SITTER + eassert (nbytes >= 0); + eassert (PT_BYTE >= 0); + ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); +#endif + adjust_point (nchars, outgoing_nbytes); check_markers (); @@ -1137,6 +1153,12 @@ insert_from_gap (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail) current_buffer, 0); } +#ifdef HAVE_TREE_SITTER + eassert (nbytes >= 0); + eassert (ins_bytepos >= 0); + ts_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes); +#endif + if (ins_charpos < PT) adjust_point (nchars, nbytes); @@ -1287,6 +1309,12 @@ insert_from_buffer_1 (struct buffer *buf, /* Insert those intervals. */ graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit); +#ifdef HAVE_TREE_SITTER + eassert (outgoing_nbytes >= 0); + eassert (PT_BYTE >= 0); + ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + outgoing_nbytes); +#endif + adjust_point (nchars, outgoing_nbytes); } @@ -1535,6 +1563,13 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new, graft_intervals_into_buffer (intervals, from, inschars, current_buffer, inherit); +#ifdef HAVE_TREE_SITTER + eassert (to_byte >= from_byte); + eassert (outgoing_insbytes >= 0); + eassert (from_byte >= 0); + ts_record_change (from_byte, to_byte, from_byte + outgoing_insbytes); +#endif + /* Relocate point as if it were a marker. */ if (from < PT) adjust_point ((from + inschars - (PT < to ? PT : to)), @@ -1569,7 +1604,11 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new, If MARKERS, relocate markers. Unlike most functions at this level, never call - prepare_to_modify_buffer and never call signal_after_change. */ + prepare_to_modify_buffer and never call signal_after_change. + Because this function is called in a loop, one character at a time. + The caller of 'replace_range_2' calls these hooks for the entire + region once. Apart from signal_after_change, any caller of this + function should also call ts_record_change. */ void replace_range_2 (ptrdiff_t from, ptrdiff_t from_byte, @@ -1892,6 +1931,12 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte, evaporate_overlays (from); +#ifdef HAVE_TREE_SITTER + eassert (from_byte <= to_byte); + eassert (from_byte >= 0); + ts_record_change (from_byte, to_byte, from_byte); +#endif + return deletion; } diff --git a/src/json.c b/src/json.c index db1be07f196..957f91b46bb 100644 --- a/src/json.c +++ b/src/json.c @@ -1090,22 +1090,6 @@ usage: (json-parse-buffer &rest args) */) return unbind_to (count, lisp); } -/* Simplified version of 'define-error' that works with pure - objects. */ - -static void -define_error (Lisp_Object name, const char *message, Lisp_Object parent) -{ - eassert (SYMBOLP (name)); - eassert (SYMBOLP (parent)); - Lisp_Object parent_conditions = Fget (parent, Qerror_conditions); - eassert (CONSP (parent_conditions)); - eassert (!NILP (Fmemq (parent, parent_conditions))); - eassert (NILP (Fmemq (name, parent_conditions))); - Fput (name, Qerror_conditions, pure_cons (name, parent_conditions)); - Fput (name, Qerror_message, build_pure_c_string (message)); -} - void syms_of_json (void) { diff --git a/src/lisp.h b/src/lisp.h index 1ad89fc4689..eb1f1ec2c21 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -577,6 +577,8 @@ enum Lisp_Fwd_Type your object -- this way, the same object could be used to represent several disparate C structures. + In addition, you need to add switch branches in data.c for Ftype_of. + You also need to add the new type to the constant `cl--typeof-types' in lisp/emacs-lisp/cl-preloaded.el. */ @@ -1056,6 +1058,8 @@ enum pvec_type PVEC_CONDVAR, PVEC_MODULE_FUNCTION, PVEC_NATIVE_COMP_UNIT, + PVEC_TS_PARSER, + PVEC_TS_NODE, PVEC_SQLITE, /* These should be last, for internal_equal and sxhash_obj. */ @@ -5534,6 +5538,11 @@ maybe_gc (void) maybe_garbage_collect (); } +/* Simplified version of 'define-error' that works with pure + objects. */ +void +define_error (Lisp_Object name, const char *message, Lisp_Object parent); + INLINE_HEADER_END #endif /* EMACS_LISP_H */ diff --git a/src/lread.c b/src/lread.c index 2538851bac6..f1ffdef04e4 100644 --- a/src/lread.c +++ b/src/lread.c @@ -5218,6 +5218,14 @@ to the specified file name if a suffix is allowed or required. */); Fcons (build_pure_c_string (MODULES_SECONDARY_SUFFIX), Vload_suffixes); #endif + DEFVAR_LISP ("dynamic-library-suffixes", Vdynamic_library_suffixes, + doc: /* A list of suffixes for loadable dynamic libraries. */); + Vdynamic_library_suffixes = + Fcons (build_pure_c_string (DYNAMIC_LIB_SECONDARY_SUFFIX), Qnil); + Vdynamic_library_suffixes = + Fcons (build_pure_c_string (DYNAMIC_LIB_SUFFIX), + Vdynamic_library_suffixes); + #endif DEFVAR_LISP ("module-file-suffix", Vmodule_file_suffix, doc: /* Suffix of loadable module file, or nil if modules are not supported. */); diff --git a/src/print.c b/src/print.c index d7583282b69..d8b8513f311 100644 --- a/src/print.c +++ b/src/print.c @@ -48,6 +48,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ # include <sys/socket.h> /* for F_DUPFD_CLOEXEC */ #endif +#ifdef HAVE_TREE_SITTER +#include "treesit.h" +#endif + struct terminal; /* Avoid actual stack overflow in print. */ @@ -1956,6 +1960,30 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag, } break; #endif + +#ifdef HAVE_TREE_SITTER + case PVEC_TS_PARSER: + print_c_string ("#<treesit-parser for ", printcharfun); + Lisp_Object language = XTS_PARSER (obj)->language_symbol; + print_string (Fsymbol_name (language), printcharfun); + print_c_string (" in ", printcharfun); + print_object (XTS_PARSER (obj)->buffer, printcharfun, escapeflag); + printchar ('>', printcharfun); + break; + case PVEC_TS_NODE: + print_c_string ("#<treesit-node from ", printcharfun); + print_object (Ftreesit_node_start (obj), + printcharfun, escapeflag); + print_c_string (" to ", printcharfun); + print_object (Ftreesit_node_end (obj), + printcharfun, escapeflag); + print_c_string (" in ", printcharfun); + print_object (XTS_PARSER (XTS_NODE (obj)->parser)->buffer, + printcharfun, escapeflag); + printchar ('>', printcharfun); + break; +#endif + case PVEC_SQLITE: { print_c_string ("#<sqlite ", printcharfun); diff --git a/src/treesit.c b/src/treesit.c new file mode 100644 index 00000000000..e127fc2d87c --- /dev/null +++ b/src/treesit.c @@ -0,0 +1,1657 @@ +/* Tree-sitter integration for GNU Emacs. + +Copyright (C) 2021-2022 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> +#include "lisp.h" +#include "buffer.h" +#include "treesit.h" + +/* Commentary + + The Emacs wrapper of tree-sitter does not expose everything the C + API provides, most notably: + + - It doesn't expose a syntax tree, we put the syntax tree in the + parser object, and updating the tree is handled in the C level. + + - We don't expose tree cursor either. I think Lisp is slow enough + to nullify any performance advantage of using a cursor, though I + don't have evidence. Also I want to minimize the number of new + types we introduce, currently we only add parser and node type. + + - Because updating the change is handled in the C level as each + change is made in the buffer, there is no way for Lisp to update + a node. But since we can just retrieve a new node, it shouldn't + be a limitation. + + - I didn't expose setting timeout and cancellation flag for a + parser, mainly because I don't think they are really necessary + in Emacs' use cases. + + - Many tree-sitter functions asks for a TSPoint, basically a (row, + column) location. Emacs uses a gap buffer and keeps no + information about row and column position. According to the + author of tree-sitter, tree-sitter only asks for (row, column) + position to carry it around and return back to the user later; + and the real position used is the byte position. He also said + that he _think_ that it will work to use byte position only. + That's why whenever a TSPoint is asked, we pass a dummy one to + it. Judging by the nature of parsing algorithms, I think it is + safe to use only byte position, and I don't think this will + change in the future. + + REF: https://github.com/tree-sitter/tree-sitter/issues/445 + + treesit.h has some commentary on the two main data structure + for the parser and node. ts_ensure_position_synced has some + commentary on how do we make tree-sitter play well with narrowing + (tree-sitter parser only sees the visible region, so we need to + translate positions back and forth). Most action happens in + ts_ensure_parsed, ts_read_buffer and ts_record_change. + + A complete correspondence list between tree-sitter functions and + exposed Lisp functions can be found in the manual (elisp)API + Correspondence. + + Placement of CHECK_xxx functions: call CHECK_xxx before using any + unchecked Lisp values; these include argument of Lisp functions, + return value of Fsymbol_value, car of a cons. + + Initializing tree-sitter: there are two entry points to tree-sitter + functions: 'treesit-parser-create' and + 'treesit-language-available-p'. Therefore we only need to call + initialization function in those two functions. + + Tree-sitter offset (0-based) and buffer position (1-based): + tree-sitter offset + buffer position = buffer position + buffer position - buffer position = tree-sitter offset + + Tree-sitter-related code in other files: + - src/alloc.c for gc for parser and node + - src/casefiddle.c & src/insdel.c for notifying tree-sitter + parser of buffer changes. + - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and + node type. + */ + +/*** Initialization */ + +bool ts_initialized = false; + +static void * +ts_calloc_wrapper (size_t n, size_t size) +{ + return xzalloc (n * size); +} + +void +ts_initialize () +{ + if (!ts_initialized) + { + ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree); + ts_initialized = true; + } +} + +/*** Loading language library */ + +/* Translates a symbol treesit-<lang> to a C name + treesit_<lang>. */ +void +ts_symbol_to_c_name (char *symbol_name) +{ + for (int idx=0; idx < strlen (symbol_name); idx++) + { + if (symbol_name[idx] == '-') + symbol_name[idx] = '_'; + } +} + +bool +ts_find_override_name +(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol) +{ + for (Lisp_Object list = Vtreesit_load_name_override_list; + !NILP (list); list = XCDR (list)) + { + Lisp_Object lang = XCAR (XCAR (list)); + CHECK_SYMBOL (lang); + if (EQ (lang, language_symbol)) + { + *name = Fnth (make_fixnum (1), XCAR (list)); + CHECK_STRING (*name); + *c_symbol = Fnth (make_fixnum (2), XCAR (list)); + CHECK_STRING (*c_symbol); + return true; + } + } + return false; +} + +/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"), + thsi function pushes "lib_base_name.so" and "lib_base_name.dylib" + into *path_candidates. Obiviously path_candidates should be a Lisp + list of Lisp strings. */ +void +ts_load_language_push_for_each_suffix +(Lisp_Object lib_base_name, Lisp_Object *path_candidates) +{ + for (Lisp_Object suffixes = Vdynamic_library_suffixes; + !NILP (suffixes); suffixes = XCDR (suffixes)) { + *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)), + *path_candidates); + } +} + +/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer + to the language definition. Signals + Qtreesit_load_language_error if something goes wrong. + Qtreesit_load_language_error carries the error message from + trying to load the library with each extension. + + If SIGNAL is true, signal an error when failed to load LANGUAGE; if + false, return NULL when failed. */ +TSLanguage * +ts_load_language (Lisp_Object language_symbol, bool signal) +{ + Lisp_Object symbol_name = Fsymbol_name (language_symbol); + + /* Figure out the library name and C name. */ + Lisp_Object lib_base_name = + (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name)); + Lisp_Object base_name = + (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name)); + char *c_name = strdup (SSDATA (base_name)); + ts_symbol_to_c_name (c_name); + + /* Override the library name and C name, if appropriate. */ + Lisp_Object override_name; + Lisp_Object override_c_name; + bool found_override = ts_find_override_name + (language_symbol, &override_name, &override_c_name); + if (found_override) + { + lib_base_name = override_name; + c_name = SSDATA (override_c_name); + } + + /* Now we generate a list of possible library paths. */ + Lisp_Object path_candidates = Qnil; + /* First push just the filenames to the candidate list, which will + make dynlib_open look under standard system load paths. */ + ts_load_language_push_for_each_suffix + (lib_base_name, &path_candidates); + /* Then push ~/.emacs.d/tree-sitter paths. */ + ts_load_language_push_for_each_suffix + (Fexpand_file_name + (concat2 (build_string ("tree-sitter/"), lib_base_name), + Fsymbol_value (Quser_emacs_directory)), + &path_candidates); + /* Then push paths from treesit-extra-load-path. */ + for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path); + !NILP (tail); tail = XCDR (tail)) + { + ts_load_language_push_for_each_suffix + (Fexpand_file_name (lib_base_name, XCAR (tail)), + &path_candidates); + } + + /* Try loading the dynamic library by each path candidate. Stop + when succeed, record the error message and try the next one when + fail. */ + dynlib_handle_ptr handle; + char const *error; + Lisp_Object error_list = Qnil; + for (Lisp_Object tail = path_candidates; + !NILP (tail); tail = XCDR (tail)) + { + char *library_name = SSDATA (XCAR (tail)); + dynlib_error (); + handle = dynlib_open (library_name); + error = dynlib_error (); + if (error == NULL) + break; + else + error_list = Fcons (build_string (error), error_list); + } + if (error != NULL) + { + if (signal) + xsignal2 (Qtreesit_load_language_error, + symbol_name, Fnreverse (error_list)); + else + return NULL; + } + + /* Load TSLanguage. */ + dynlib_error (); + TSLanguage *(*langfn) (); + langfn = dynlib_sym (handle, c_name); + error = dynlib_error (); + if (error != NULL) + { + if (signal) + xsignal1 (Qtreesit_load_language_error, + build_string (error)); + else + return NULL; + } + TSLanguage *lang = (*langfn) (); + + /* Check if language version matches tree-sitter version. */ + TSParser *parser = ts_parser_new (); + bool success = ts_parser_set_language (parser, lang); + ts_parser_delete (parser); + if (!success) + { + if (signal) + xsignal2 (Qtreesit_load_language_error, + build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"), + make_fixnum (ts_language_version (lang))); + else + return NULL; + } + return lang; +} + +DEFUN ("treesit-language-available-p", + Ftreesit_langauge_available_p, + Streesit_language_available_p, + 1, 1, 0, + doc: /* Return non-nil if LANGUAGE exists and is loadable. */) + (Lisp_Object language) +{ + CHECK_SYMBOL (language); + ts_initialize (); + if (ts_load_language(language, false) == NULL) + return Qnil; + else + return Qt; +} + +/*** Parsing functions */ + +/* An auxiliary function that saves a few lines of code. Assumes TREE + is not NULL. */ +static inline void +ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, + ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) +{ + TSPoint dummy_point = {0, 0}; + TSInputEdit edit = {(uint32_t) start_byte, + (uint32_t) old_end_byte, + (uint32_t) new_end_byte, + dummy_point, dummy_point, dummy_point}; + ts_tree_edit (tree, &edit); +} + +/* Update each parser's tree after the user made an edit. This +function does not parse the buffer and only updates the tree. (So it +should be very fast.) */ +void +ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, + ptrdiff_t new_end_byte) +{ + for (Lisp_Object parser_list = + Fsymbol_value (Qtreesit_parser_list); + !NILP (parser_list); + parser_list = XCDR (parser_list)) + { + CHECK_CONS (parser_list); + Lisp_Object lisp_parser = XCAR (parser_list); + CHECK_TS_PARSER (lisp_parser); + TSTree *tree = XTS_PARSER (lisp_parser)->tree; + if (tree != NULL) + { + eassert (start_byte <= old_end_byte); + eassert (start_byte <= new_end_byte); + /* Think the recorded change as a delete followed by an + insert, and think of them as moving unchanged text back + and forth. After all, the whole point of updating the + tree is to update the position of unchanged text. */ + ptrdiff_t bytes_del = old_end_byte - start_byte; + ptrdiff_t bytes_ins = new_end_byte - start_byte; + + ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; + ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; + + ptrdiff_t affected_start = + max (visible_beg, start_byte) - visible_beg; + ptrdiff_t affected_old_end = + min (visible_end, affected_start + bytes_del); + ptrdiff_t affected_new_end = + affected_start + bytes_ins; + + ts_tree_edit_1 (tree, affected_start, affected_old_end, + affected_new_end); + XTS_PARSER (lisp_parser)->visible_end = affected_new_end; + XTS_PARSER (lisp_parser)->need_reparse = true; + XTS_PARSER (lisp_parser)->timestamp++; + } + } +} + +void +ts_ensure_position_synced (Lisp_Object parser) +{ + TSParser *ts_parser = XTS_PARSER (parser)->parser; + TSTree *tree = XTS_PARSER (parser)->tree; + + if (tree == NULL) + return; + + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg; + ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end; + /* Before we parse or set ranges, catch up with the narrowing + situation. We change visible_beg and visible_end to match + BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the + change. We want to move the visible range of tree-sitter to + match the narrowed range. For example, + from ________|xxxx|__ + to |xxxx|__________ */ + + /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */ + if (visible_beg > BUF_BEGV_BYTE (buffer)) + { + /* Tree-sitter sees: insert at the beginning. */ + ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer)); + visible_beg = BUF_BEGV_BYTE (buffer); + } + /* 2. Make sure visible_end = BUF_ZV_BYTE. */ + if (visible_end < BUF_ZV_BYTE (buffer)) + { + /* Tree-sitter sees: insert at the end. */ + ts_tree_edit_1 (tree, visible_end - visible_beg, + visible_end - visible_beg, + BUF_ZV_BYTE (buffer) - visible_beg); + visible_end = BUF_ZV_BYTE (buffer); + } + else if (visible_end > BUF_ZV_BYTE (buffer)) + { + /* Tree-sitter sees: delete at the end. */ + ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg, + visible_end - visible_beg, + BUF_ZV_BYTE (buffer) - visible_beg); + visible_end = BUF_ZV_BYTE (buffer); + } + /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */ + if (visible_beg < BUF_BEGV_BYTE (buffer)) + { + /* Tree-sitter sees: delete at the beginning. */ + ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0); + visible_beg = BUF_BEGV_BYTE (buffer); + } + eassert (0 <= visible_beg); + eassert (visible_beg <= visible_end); + + XTS_PARSER (parser)->visible_beg = visible_beg; + XTS_PARSER (parser)->visible_end = visible_end; +} + +void +ts_check_buffer_size (struct buffer *buffer) +{ + ptrdiff_t buffer_size = + (BUF_Z (buffer) - BUF_BEG (buffer)); + if (buffer_size > UINT32_MAX) + xsignal2 (Qtreesit_buffer_too_large, + build_pure_c_string ("Buffer size too large, size:"), + make_fixnum (buffer_size)); +} + +/* Parse the buffer. We don't parse until we have to. When we have +to, we call this function to parse and update the tree. */ +void +ts_ensure_parsed (Lisp_Object parser) +{ + if (!XTS_PARSER (parser)->need_reparse) + return; + TSParser *ts_parser = XTS_PARSER (parser)->parser; + TSTree *tree = XTS_PARSER(parser)->tree; + TSInput input = XTS_PARSER (parser)->input; + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + ts_check_buffer_size (buffer); + + /* Before we parse, catch up with the narrowing situation. */ + ts_ensure_position_synced (parser); + + TSTree *new_tree = ts_parser_parse(ts_parser, tree, input); + /* This should be very rare (impossible, really): it only happens + when 1) language is not set (impossible in Emacs because the user + has to supply a language to create a parser), 2) parse canceled + due to timeout (impossible because we don't set a timeout), 3) + parse canceled due to cancellation flag (impossible because we + don't set the flag). (See comments for ts_parser_parse in + tree_sitter/api.h.) */ + if (new_tree == NULL) + { + Lisp_Object buf; + XSETBUFFER (buf, buffer); + xsignal1 (Qtreesit_parse_error, buf); + } + + ts_tree_delete (tree); + XTS_PARSER (parser)->tree = new_tree; + XTS_PARSER (parser)->need_reparse = false; +} + +/* This is the read function provided to tree-sitter to read from a + buffer. It reads one character at a time and automatically skips + the gap. */ +const char* +ts_read_buffer (void *parser, uint32_t byte_index, + TSPoint position, uint32_t *bytes_read) +{ + struct buffer *buffer = + XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer); + ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg; + ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end; + ptrdiff_t byte_pos = byte_index + visible_beg; + /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in + ts_ensure_parsed), so byte_pos will never be smaller than + BUF_BEG_BYTE. */ + eassert (visible_beg = BUF_BEGV_BYTE (buffer)); + eassert (visible_end = BUF_ZV_BYTE (buffer)); + + /* Read one character. Tree-sitter wants us to set bytes_read to 0 + if it reads to the end of buffer. It doesn't say what it wants + for the return value in that case, so we just give it an empty + string. */ + char *beg; + int len; + /* This function could run from a user command, so it is better to + do nothing instead of raising an error. (It was a pain in the a** + to decrypt mega-if-conditions in Emacs source, so I wrote the two + branches separately.) */ + if (!BUFFER_LIVE_P (buffer)) + { + beg = NULL; + len = 0; + } + /* Reached visible end-of-buffer, tell tree-sitter to read no more. */ + else if (byte_pos >= visible_end) + { + beg = NULL; + len = 0; + } + /* Normal case, read a character. */ + else + { + beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos); + len = BYTES_BY_CHAR_HEAD ((int) *beg); + } + *bytes_read = (uint32_t) len; + return beg; +} + +/*** Functions for parser and node object*/ + +/* Wrap the parser in a Lisp_Object to be used in the Lisp machine. */ +Lisp_Object +make_ts_parser (Lisp_Object buffer, TSParser *parser, + TSTree *tree, Lisp_Object language_symbol) +{ + struct Lisp_TS_Parser *lisp_parser + = ALLOCATE_PSEUDOVECTOR + (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER); + + lisp_parser->language_symbol = language_symbol; + lisp_parser->buffer = buffer; + lisp_parser->parser = parser; + lisp_parser->tree = tree; + TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8}; + lisp_parser->input = input; + lisp_parser->need_reparse = true; + lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer)); + lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer)); + return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); +} + +/* Wrap the node in a Lisp_Object to be used in the Lisp machine. */ +Lisp_Object +make_ts_node (Lisp_Object parser, TSNode node) +{ + struct Lisp_TS_Node *lisp_node + = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE); + lisp_node->parser = parser; + lisp_node->node = node; + lisp_node->timestamp = XTS_PARSER (parser)->timestamp; + return make_lisp_ptr (lisp_node, Lisp_Vectorlike); +} + +DEFUN ("treesit-parser-p", + Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0, + doc: /* Return t if OBJECT is a tree-sitter parser. */) + (Lisp_Object object) +{ + if (TS_PARSERP (object)) + return Qt; + else + return Qnil; +} + +DEFUN ("treesit-node-p", + Ftreesit_node_p, Streesit_node_p, 1, 1, 0, + doc: /* Return t if OBJECT is a tree-sitter node. */) + (Lisp_Object object) +{ + if (TS_NODEP (object)) + return Qt; + else + return Qnil; +} + +DEFUN ("treesit-node-parser", + Ftreesit_node_parser, Streesit_node_parser, + 1, 1, 0, + doc: /* Return the parser to which NODE belongs. */) + (Lisp_Object node) +{ + CHECK_TS_NODE (node); + return XTS_NODE (node)->parser; +} + +DEFUN ("treesit-parser-create", + Ftreesit_parser_create, Streesit_parser_create, + 2, 2, 0, + doc: /* Create and return a parser in BUFFER for LANGUAGE. + +The parser is automatically added to BUFFER's +`treesit-parser-list'. LANGUAGE should be the symbol of a +function provided by a tree-sitter language dynamic module, e.g., +'treesit-json. If BUFFER is nil, use the current buffer. */) + (Lisp_Object buffer, Lisp_Object language) +{ + if (NILP (buffer)) + buffer = Fcurrent_buffer (); + + CHECK_BUFFER (buffer); + CHECK_SYMBOL (language); + ts_check_buffer_size (XBUFFER (buffer)); + + ts_initialize (); + + TSParser *parser = ts_parser_new (); + TSLanguage *lang = ts_load_language (language, true); + /* We check language version when loading a language, so this should + always succeed. */ + ts_parser_set_language (parser, lang); + + Lisp_Object lisp_parser + = make_ts_parser (buffer, parser, NULL, language); + + struct buffer *old_buffer = current_buffer; + set_buffer_internal (XBUFFER (buffer)); + + Fset (Qtreesit_parser_list, + Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list))); + + set_buffer_internal (old_buffer); + return lisp_parser; +} + +DEFUN ("treesit-parser-buffer", + Ftreesit_parser_buffer, Streesit_parser_buffer, + 1, 1, 0, + doc: /* Return the buffer of PARSER. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + Lisp_Object buf; + XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer)); + return buf; +} + +DEFUN ("treesit-parser-language", + Ftreesit_parser_language, Streesit_parser_language, + 1, 1, 0, + doc: /* Return parser's language symbol. +This symbol is the one used to create the parser. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + return XTS_PARSER (parser)->language_symbol; +} + +/*** Parser API */ + +DEFUN ("treesit-parser-root-node", + Ftreesit_parser_root_node, Streesit_parser_root_node, + 1, 1, 0, + doc: /* Return the root node of PARSER. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + ts_ensure_parsed (parser); + TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree); + return make_ts_node (parser, root_node); +} + +/* Checks that the RANGES argument of + treesit-parser-set-included-ranges is valid. */ +void +ts_check_range_argument (Lisp_Object ranges) +{ + EMACS_INT last_point = 1; + for (Lisp_Object tail = ranges; + !NILP (tail); tail = XCDR (tail)) + { + CHECK_CONS (tail); + Lisp_Object range = XCAR (tail); + CHECK_CONS (range); + CHECK_FIXNUM (XCAR (range)); + CHECK_FIXNUM (XCDR (range)); + EMACS_INT beg = XFIXNUM (XCAR (range)); + EMACS_INT end = XFIXNUM (XCDR (range)); + /* TODO: Maybe we should check for point-min/max, too? */ + if (!(last_point <= beg && beg <= end)) + xsignal2 (Qtreesit_range_invalid, + build_pure_c_string + ("RANGE is either overlapping or out-of-order"), + ranges); + last_point = end; + } +} + +DEFUN ("treesit-parser-set-included-ranges", + Ftreesit_parser_set_included_ranges, + Streesit_parser_set_included_ranges, + 2, 2, 0, + doc: /* Limit PARSER to RANGES. + +RANGES is a list of (BEG . END), each (BEG . END) confines a range in +which the parser should operate in. Each range must not overlap, and +each range should come in order. Signal `treesit-set-range-error' +if the argument is invalid, or something else went wrong. If RANGES +is nil, set PARSER to parse the whole buffer. */) + (Lisp_Object parser, Lisp_Object ranges) +{ + CHECK_TS_PARSER (parser); + CHECK_CONS (ranges); + ts_check_range_argument (ranges); + + /* Before we parse, catch up with narrowing/widening. */ + ts_ensure_position_synced (parser); + + bool success; + if (NILP (ranges)) + { + /* If RANGES is nil, make parser to parse the whole document. + To do that we give tree-sitter a 0 length, the range is a + dummy. */ + TSRange ts_range = {0, 0, 0, 0}; + success = ts_parser_set_included_ranges + (XTS_PARSER (parser)->parser, &ts_range , 0); + } + else + { + /* Set ranges for PARSER. */ + ptrdiff_t len = list_length (ranges); + TSRange *ts_ranges = malloc (sizeof(TSRange) * len); + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + + for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges)) + { + Lisp_Object range = XCAR (ranges); + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + + EMACS_INT beg_byte = buf_charpos_to_bytepos + (buffer, XFIXNUM (XCAR (range))); + EMACS_INT end_byte = buf_charpos_to_bytepos + (buffer, XFIXNUM (XCDR (range))); + /* We don't care about start and end points, put in dummy + value. */ + TSRange rg = {{0,0}, {0,0}, + (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer), + (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)}; + ts_ranges[idx] = rg; + } + success = ts_parser_set_included_ranges + (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len); + /* Although XFIXNUM could signal, it should be impossible + because we have checked the input by ts_check_range_argument. + So there is no need for unwind-protect. */ + free (ts_ranges); + } + + if (!success) + xsignal2 (Qtreesit_range_invalid, + build_pure_c_string + ("Something went wrong when setting ranges"), + ranges); + + XTS_PARSER (parser)->need_reparse = true; + return Qnil; +} + +DEFUN ("treesit-parser-included-ranges", + Ftreesit_parser_included_ranges, + Streesit_parser_included_ranges, + 1, 1, 0, + doc: /* Return the ranges set for PARSER. +See `treesit-parser-set-ranges'. If no range is set, return +nil. */) + (Lisp_Object parser) +{ + CHECK_TS_PARSER (parser); + uint32_t len; + const TSRange *ranges = ts_parser_included_ranges + (XTS_PARSER (parser)->parser, &len); + if (len == 0) + return Qnil; + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); + + Lisp_Object list = Qnil; + for (int idx=0; idx < len; idx++) + { + TSRange range = ranges[idx]; + uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer); + uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer); + + Lisp_Object lisp_range = + Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) , + make_fixnum (buf_bytepos_to_charpos (buffer, end_byte))); + list = Fcons (lisp_range, list); + } + return Fnreverse (list); +} + +/*** Node API */ + +/* Check that OBJ is a positive integer and signal an error if + otherwise. */ +static void +ts_check_positive_integer (Lisp_Object obj) +{ + CHECK_INTEGER (obj); + if (XFIXNUM (obj) < 0) + xsignal1 (Qargs_out_of_range, obj); +} + +static void +ts_check_node (Lisp_Object obj) +{ + CHECK_TS_NODE (obj); + Lisp_Object lisp_parser = XTS_NODE (obj)->parser; + if (XTS_NODE (obj)->timestamp != + XTS_PARSER (lisp_parser)->timestamp) + xsignal1 (Qtreesit_node_outdated, obj); +} + +DEFUN ("treesit-node-type", + Ftreesit_node_type, Streesit_node_type, 1, 1, 0, + doc: /* Return the NODE's type as a string. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + const char *type = ts_node_type (ts_node); + return build_string (type); +} + +DEFUN ("treesit-node-start", + Ftreesit_node_start, Streesit_node_start, 1, 1, 0, + doc: /* Return the NODE's start position. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + uint32_t start_byte_offset = ts_node_start_byte (ts_node); + struct buffer *buffer = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t start_pos = buf_bytepos_to_charpos + (buffer, start_byte_offset + visible_beg); + return make_fixnum (start_pos); +} + +DEFUN ("treesit-node-end", + Ftreesit_node_end, Streesit_node_end, 1, 1, 0, + doc: /* Return the NODE's end position. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + uint32_t end_byte_offset = ts_node_end_byte (ts_node); + struct buffer *buffer = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t end_pos = buf_bytepos_to_charpos + (buffer, end_byte_offset + visible_beg); + return make_fixnum (end_pos); +} + +DEFUN ("treesit-node-string", + Ftreesit_node_string, Streesit_node_string, 1, 1, 0, + doc: /* Return the string representation of NODE. +If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + char *string = ts_node_string (ts_node); + return make_string (string, strlen (string)); +} + +DEFUN ("treesit-node-parent", + Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0, + doc: /* Return the immediate parent of NODE. +Return nil if there isn't any. If NODE is nil, return nil. */) + (Lisp_Object node) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + TSNode parent = ts_node_parent (ts_node); + + if (ts_node_is_null (parent)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, parent); +} + +DEFUN ("treesit-node-child", + Ftreesit_node_child, Streesit_node_child, 2, 3, 0, + doc: /* Return the Nth child of NODE. + +Return nil if there isn't any. If NAMED is non-nil, look for named +child only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object n, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + ts_check_positive_integer (n); + EMACS_INT idx = XFIXNUM (n); + if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n); + TSNode ts_node = XTS_NODE (node)->node; + TSNode child; + if (NILP (named)) + child = ts_node_child (ts_node, (uint32_t) idx); + else + child = ts_node_named_child (ts_node, (uint32_t) idx); + + if (ts_node_is_null (child)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-check", + Ftreesit_node_check, Streesit_node_check, 2, 2, 0, + doc: /* Return non-nil if NODE has PROPERTY, nil otherwise. + +PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error. +Named nodes correspond to named rules in the language definition, +whereas "anonymous" nodes correspond to string literals in the +language definition. + +Missing nodes are inserted by the parser in order to recover from +certain kinds of syntax errors, i.e., should be there but not there. + +Extra nodes represent things like comments, which are not required the +language definition, but can appear anywhere. + +A node "has changes" if the buffer changed since the node is +created. (Don't forget the "s" at the end of 'has-changes.) + +A node "has error" if itself is a syntax error or contains any syntax +errors. */) + (Lisp_Object node, Lisp_Object property) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + CHECK_SYMBOL (property); + TSNode ts_node = XTS_NODE (node)->node; + bool result; + if (EQ (property, Qnamed)) + result = ts_node_is_named (ts_node); + else if (EQ (property, Qmissing)) + result = ts_node_is_missing (ts_node); + else if (EQ (property, Qextra)) + result = ts_node_is_extra (ts_node); + else if (EQ (property, Qhas_error)) + result = ts_node_has_error (ts_node); + else if (EQ (property, Qhas_changes)) + result = ts_node_has_changes (ts_node); + else + signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got", + property); + return result ? Qt : Qnil; +} + +DEFUN ("treesit-node-field-name-for-child", + Ftreesit_node_field_name_for_child, + Streesit_node_field_name_for_child, 2, 2, 0, + doc: /* Return the field name of the Nth child of NODE. + +Return nil if there isn't any child or no field is found. +If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object n) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + ts_check_positive_integer (n); + EMACS_INT idx = XFIXNUM (n); + if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n); + TSNode ts_node = XTS_NODE (node)->node; + const char *name + = ts_node_field_name_for_child (ts_node, (uint32_t) idx); + + if (name == NULL) + return Qnil; + + return make_string (name, strlen (name)); +} + +DEFUN ("treesit-node-child-count", + Ftreesit_node_child_count, + Streesit_node_child_count, 1, 2, 0, + doc: /* Return the number of children of NODE. + +If NAMED is non-nil, count named child only. NAMED defaults to +nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + uint32_t count; + if (NILP (named)) + count = ts_node_child_count (ts_node); + else + count = ts_node_named_child_count (ts_node); + return make_fixnum (count); +} + +DEFUN ("treesit-node-child-by-field-name", + Ftreesit_node_child_by_field_name, + Streesit_node_child_by_field_name, 2, 2, 0, + doc: /* Return the child of NODE with FIELD-NAME. +Return nil if there isn't any. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object field_name) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + CHECK_STRING (field_name); + char *name_str = SSDATA (field_name); + TSNode ts_node = XTS_NODE (node)->node; + TSNode child + = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str)); + + if (ts_node_is_null(child)) + return Qnil; + + return make_ts_node(XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-next-sibling", + Ftreesit_node_next_sibling, + Streesit_node_next_sibling, 1, 2, 0, + doc: /* Return the next sibling of NODE. + +Return nil if there isn't any. If NAMED is non-nil, look for named +child only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + TSNode sibling; + if (NILP (named)) + sibling = ts_node_next_sibling (ts_node); + else + sibling = ts_node_next_named_sibling (ts_node); + + if (ts_node_is_null(sibling)) + return Qnil; + + return make_ts_node(XTS_NODE (node)->parser, sibling); +} + +DEFUN ("treesit-node-prev-sibling", + Ftreesit_node_prev_sibling, + Streesit_node_prev_sibling, 1, 2, 0, + doc: /* Return the previous sibling of NODE. + +Return nil if there isn't any. If NAMED is non-nil, look for named +child only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + TSNode ts_node = XTS_NODE (node)->node; + TSNode sibling; + + if (NILP (named)) + sibling = ts_node_prev_sibling (ts_node); + else + sibling = ts_node_prev_named_sibling (ts_node); + + if (ts_node_is_null(sibling)) + return Qnil; + + return make_ts_node(XTS_NODE (node)->parser, sibling); +} + +DEFUN ("treesit-node-first-child-for-pos", + Ftreesit_node_first_child_for_pos, + Streesit_node_first_child_for_pos, 2, 3, 0, + doc: /* Return the first child of NODE on POS. + +Specifically, return the first child that extends beyond POS. POS is +a position in the buffer. Return nil if there isn't any. If NAMED is +non-nil, look for named child only. NAMED defaults to nil. Note that +this function returns an immediate child, not the smallest +(grand)child. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object pos, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + ts_check_positive_integer (pos); + + struct buffer *buf = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos)); + + if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf)) + xsignal1 (Qargs_out_of_range, pos); + + TSNode ts_node = XTS_NODE (node)->node; + TSNode child; + if (NILP (named)) + child = ts_node_first_child_for_byte + (ts_node, byte_pos - visible_beg); + else + child = ts_node_first_named_child_for_byte + (ts_node, byte_pos - visible_beg); + + if (ts_node_is_null (child)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-descendant-for-range", + Ftreesit_node_descendant_for_range, + Streesit_node_descendant_for_range, 3, 4, 0, + doc: /* Return the smallest node that covers BEG to END. + +The returned node is a descendant of NODE. POS is a position. Return +nil if there isn't any. If NAMED is non-nil, look for named child +only. NAMED defaults to nil. If NODE is nil, return nil. */) + (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named) +{ + if (NILP (node)) return Qnil; + ts_check_node (node); + CHECK_INTEGER (beg); + CHECK_INTEGER (end); + + struct buffer *buf = + XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg)); + ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end)); + + /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END. */ + if (!(BUF_BEGV_BYTE (buf) <= byte_beg + && byte_beg <= byte_end + && byte_end <= BUF_ZV_BYTE (buf))) + xsignal2 (Qargs_out_of_range, beg, end); + + TSNode ts_node = XTS_NODE (node)->node; + TSNode child; + if (NILP (named)) + child = ts_node_descendant_for_byte_range + (ts_node, byte_beg - visible_beg , byte_end - visible_beg); + else + child = ts_node_named_descendant_for_byte_range + (ts_node, byte_beg - visible_beg, byte_end - visible_beg); + + if (ts_node_is_null (child)) + return Qnil; + + return make_ts_node (XTS_NODE (node)->parser, child); +} + +DEFUN ("treesit-node-eq", + Ftreesit_node_eq, + Streesit_node_eq, 2, 2, 0, + doc: /* Return non-nil if NODE1 and NODE2 are the same node. +If any one of NODE1 and NODE2 is nil, return nil. */) + (Lisp_Object node1, Lisp_Object node2) +{ + if (NILP (node1) || NILP (node2)) + return Qnil; + CHECK_TS_NODE (node1); + CHECK_TS_NODE (node2); + + TSNode ts_node_1 = XTS_NODE (node1)->node; + TSNode ts_node_2 = XTS_NODE (node2)->node; + + bool same_node = ts_node_eq (ts_node_1, ts_node_2); + return same_node ? Qt : Qnil; +} + +/*** Query functions */ + +/* If we decide to pre-load tree-sitter.el, maybe we can implement + this function in Lisp. */ +DEFUN ("treesit-expand-pattern", + Ftreesit_expand_pattern, + Streesit_expand_pattern, 1, 1, 0, + doc: /* Expand PATTERN to its string form. + +PATTERN can be + + :anchor + :? + :* + :+ + :equal + :match + (TYPE PATTERN...) + [PATTERN...] + FIELD-NAME: + @CAPTURE-NAME + (_) + _ + \"TYPE\" + +Consult Info node `(elisp)Pattern Matching' form detailed +explanation. */) + (Lisp_Object pattern) +{ + if (EQ (pattern, intern_c_string (":anchor"))) + return build_pure_c_string("."); + if (EQ (pattern, intern_c_string (":?"))) + return build_pure_c_string("?"); + if (EQ (pattern, intern_c_string (":*"))) + return build_pure_c_string("*"); + if (EQ (pattern, intern_c_string (":+"))) + return build_pure_c_string("+"); + if (EQ (pattern, intern_c_string (":equal"))) + return build_pure_c_string("#equal"); + if (EQ (pattern, intern_c_string (":match"))) + return build_pure_c_string("#match"); + Lisp_Object opening_delimeter = + build_pure_c_string (VECTORP (pattern) ? "[" : "("); + Lisp_Object closing_delimiter = + build_pure_c_string (VECTORP (pattern) ? "]" : ")"); + if (VECTORP (pattern) || CONSP (pattern)) + return concat3 (opening_delimeter, + Fmapconcat (intern_c_string + ("treesit-expand-pattern"), + pattern, + build_pure_c_string (" ")), + closing_delimiter); + return CALLN (Fformat, build_pure_c_string("%S"), pattern); +} + +DEFUN ("treesit-expand-query", + Ftreesit_expand_query, + Streesit_expand_query, 1, 1, 0, + doc: /* Expand sexp QUERY to its string form. + +A PATTERN in QUERY can be + + :anchor + :? + :* + :+ + :equal + :match + (TYPE PATTERN...) + [PATTERN...] + FIELD-NAME: + @CAPTURE-NAME + (_) + _ + \"TYPE\" + +Consult Info node `(elisp)Pattern Matching' form detailed +explanation. */) + (Lisp_Object query) +{ + return Fmapconcat (intern_c_string ("treesit-expand-pattern"), + query, build_pure_c_string (" ")); +} + +char* +ts_query_error_to_string (TSQueryError error) +{ + switch (error) + { + case TSQueryErrorNone: + return "None"; + case TSQueryErrorSyntax: + return "Syntax error at"; + case TSQueryErrorNodeType: + return "Node type error at"; + case TSQueryErrorField: + return "Field error at"; + case TSQueryErrorCapture: + return "Capture error at"; + case TSQueryErrorStructure: + return "Structure error at"; + default: + return "Unknown error"; + } +} + +/* Collect predicates for this match and return them in a list. Each + predicate is a list of strings and symbols. */ +Lisp_Object +ts_predicates_for_pattern +(TSQuery *query, uint32_t pattern_index) +{ + uint32_t len; + const TSQueryPredicateStep *predicate_list = + ts_query_predicates_for_pattern (query, pattern_index, &len); + Lisp_Object result = Qnil; + Lisp_Object predicate = Qnil; + for (int idx=0; idx < len; idx++) + { + TSQueryPredicateStep step = predicate_list[idx]; + switch (step.type) + { + case TSQueryPredicateStepTypeCapture: + { + uint32_t str_len; + const char *str = ts_query_capture_name_for_id + (query, step.value_id, &str_len); + predicate = Fcons (intern_c_string_1 (str, str_len), + predicate); + break; + } + case TSQueryPredicateStepTypeString: + { + uint32_t str_len; + const char *str = ts_query_string_value_for_id + (query, step.value_id, &str_len); + predicate = Fcons (make_string (str, str_len), predicate); + break; + } + case TSQueryPredicateStepTypeDone: + result = Fcons (Fnreverse (predicate), result); + predicate = Qnil; + break; + } + } + return Fnreverse (result); +} + +/* Translate a capture NAME (symbol) to the text of the captured node. + Signals treesit-query-error if such node is not captured. */ +Lisp_Object +ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures) +{ + Lisp_Object node = Qnil; + for (Lisp_Object tail = captures; !NILP (tail); tail = XCDR (tail)) + { + if (EQ (XCAR (XCAR (tail)), name)) + { + node = XCDR (XCAR (tail)); + break; + } + } + + if (NILP (node)) + xsignal3 (Qtreesit_query_error, + build_pure_c_string ("Cannot find captured node"), + name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern")); + + struct buffer *old_buffer = current_buffer; + set_buffer_internal + (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer)); + Lisp_Object text = Fbuffer_substring + (Ftreesit_node_start (node), Ftreesit_node_end (node)); + set_buffer_internal (old_buffer); + return text; +} + +/* Handles predicate (#equal A B). Return true if A equals B; return + false otherwise. A and B can be either string, or a capture name. + The capture name evaluates to the text its captured node spans in + the buffer. */ +bool +ts_predicate_equal (Lisp_Object args, Lisp_Object captures) +{ + if (XFIXNUM (Flength (args)) != 2) + xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args)); + + Lisp_Object arg1 = XCAR (args); + Lisp_Object arg2 = XCAR (XCDR (args)); + Lisp_Object tail = captures; + Lisp_Object text1 = STRINGP (arg1) ? arg1 : + ts_predicate_capture_name_to_text (arg1, captures); + Lisp_Object text2 = STRINGP (arg2) ? arg2 : + ts_predicate_capture_name_to_text (arg2, captures); + + if (NILP (Fstring_equal (text1, text2))) + return false; + else + return true; +} + +/* Handles predicate (#match "regexp" @node). Return true if "regexp" + matches the text spanned by @node; return false otherwise. Matching + is case-sensitive. */ +bool +ts_predicate_match (Lisp_Object args, Lisp_Object captures) +{ + if (XFIXNUM (Flength (args)) != 2) + xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args)); + + Lisp_Object regexp = XCAR (args); + Lisp_Object capture_name = XCAR (XCDR (args)); + Lisp_Object tail = captures; + Lisp_Object text = ts_predicate_capture_name_to_text + (capture_name, captures); + + /* It's probably common to get the argument order backwards. Catch + this mistake early and show helpful explanation, because Emacs + loves you. (We put the regexp first because that's what + string-match does.) */ + if (!STRINGP (regexp)) + xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name")); + if (!SYMBOLP (capture_name)) + xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string")); + + if (fast_string_match (regexp, text) >= 0) + return true; + else + return false; +} + +/* About predicates: I decide to hard-code predicates in C instead of + implementing an extensible system where predicates are translated + to Lisp functions, and new predicates can be added by extending a + list of functions, because I really couldn't imagine any useful + predicates besides equal and match. If we later found out that + such system is indeed useful and necessary, it can be easily + added. */ + +/* If all predicates in PREDICATES passes, return true; otherwise + return false. */ +bool +ts_eval_predicates (Lisp_Object captures, Lisp_Object predicates) +{ + bool pass = true; + /* Evaluate each predicates. */ + for (Lisp_Object tail = predicates; + !NILP (tail); tail = XCDR (tail)) + { + Lisp_Object predicate = XCAR (tail); + Lisp_Object fn = XCAR (predicate); + Lisp_Object args = XCDR (predicate); + if (!NILP (Fstring_equal (fn, build_pure_c_string("equal")))) + pass = ts_predicate_equal (args, captures); + else if (!NILP (Fstring_equal + (fn, build_pure_c_string("match")))) + pass = ts_predicate_match (args, captures); + else + xsignal3 (Qtreesit_query_error, + build_pure_c_string ("Invalid predicate"), + fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate")); + } + /* If all predicates passed, add captures to result list. */ + return pass; +} + +DEFUN ("treesit-query-capture", + Ftreesit_query_capture, + Streesit_query_capture, 2, 4, 0, + doc: /* Query NODE with patterns in QUERY. + +Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name +assigned to the node in PATTERN. NODE is the captured node. + +QUERY is either a string query or a sexp query. See Info node +`(elisp)Pattern Matching' for how to write a query in either string or +s-expression form. + +BEG and END, if both non-nil, specifies the range in which the query +is executed. + +Raise an treesit-query-error if QUERY is malformed, or something +else goes wrong. */) + (Lisp_Object node, Lisp_Object query, + Lisp_Object beg, Lisp_Object end) +{ + ts_check_node (node); + if (!NILP (beg)) + CHECK_INTEGER (beg); + if (!NILP (end)) + CHECK_INTEGER (end); + + if (CONSP (query)) + query = Ftreesit_expand_query (query); + else + CHECK_STRING (query); + + /* Extract C values from Lisp objects. */ + TSNode ts_node = XTS_NODE (node)->node; + Lisp_Object lisp_parser = XTS_NODE (node)->parser; + ptrdiff_t visible_beg = + XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; + const TSLanguage *lang = ts_parser_language + (XTS_PARSER (lisp_parser)->parser); + char *source = SSDATA (query); + + /* Initialize query objects, and execute query. */ + uint32_t error_offset; + TSQueryError error_type; + /* TODO: We could cache the query object, so that repeatedly + querying with the same query can reuse the query object. It also + saves us from expanding the sexp query into a string. I don't + know how much time that could save though. */ + TSQuery *ts_query = ts_query_new (lang, source, strlen (source), + &error_offset, &error_type); + TSQueryCursor *cursor = ts_query_cursor_new (); + + if (ts_query == NULL) + { + xsignal2 (Qtreesit_query_error, + build_string (ts_query_error_to_string (error_type)), + make_fixnum (error_offset + 1)); + } + if (!NILP (beg) && !NILP (end)) + { + EMACS_INT beg_byte = XFIXNUM (beg); + EMACS_INT end_byte = XFIXNUM (end); + ts_query_cursor_set_byte_range + (cursor, (uint32_t) beg_byte - visible_beg, + (uint32_t) end_byte - visible_beg); + } + + ts_query_cursor_exec (cursor, ts_query, ts_node); + TSQueryMatch match; + + /* Go over each match, collect captures and predicates. Include the + captures in the return list if all predicates in that match + passes. */ + Lisp_Object result = Qnil; + while (ts_query_cursor_next_match (cursor, &match)) + { + /* Get captured nodes. */ + Lisp_Object captures_lisp = Qnil; + const TSQueryCapture *captures = match.captures; + for (int idx=0; idx < match.capture_count; idx++) + { + uint32_t capture_name_len; + TSQueryCapture capture = captures[idx]; + Lisp_Object captured_node = + make_ts_node(lisp_parser, capture.node); + const char *capture_name = ts_query_capture_name_for_id + (ts_query, capture.index, &capture_name_len); + Lisp_Object cap = + Fcons (intern_c_string_1 (capture_name, capture_name_len), + captured_node); + captures_lisp = Fcons (cap, captures_lisp); + } + /* Get predicates. */ + Lisp_Object predicates = + ts_predicates_for_pattern (ts_query, match.pattern_index); + + captures_lisp = Fnreverse (captures_lisp); + if (ts_eval_predicates (captures_lisp, predicates)) + { + result = CALLN (Fnconc, result, captures_lisp); + } + } + ts_query_delete (ts_query); + ts_query_cursor_delete (cursor); + return result; +} + +/*** Initialization */ + +/* Initialize the tree-sitter routines. */ +void +syms_of_treesit (void) +{ + DEFSYM (Qtreesit_parser_p, "treesit-parser-p"); + DEFSYM (Qtreesit_node_p, "treesit-node-p"); + DEFSYM (Qnamed, "named"); + DEFSYM (Qmissing, "missing"); + DEFSYM (Qextra, "extra"); + DEFSYM (Qhas_changes, "has-changes"); + DEFSYM (Qhas_error, "has-error"); + + DEFSYM (Qtreesit_error, "treesit-error"); + DEFSYM (Qtreesit_query_error, "treesit-query-error"); + DEFSYM (Qtreesit_parse_error, "treesit-parse-error"); + DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid"); + DEFSYM (Qtreesit_buffer_too_large, + "treesit-buffer-too-large"); + DEFSYM (Qtreesit_load_language_error, + "treesit-load-language-error"); + DEFSYM (Qtreesit_node_outdated, + "treesit-node-outdated"); + DEFSYM (Quser_emacs_directory, + "user-emacs-directory"); + + define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); + define_error (Qtreesit_query_error, "Query pattern is malformed", + Qtreesit_error); + /* Should be impossible, no need to document this error. */ + define_error (Qtreesit_parse_error, "Parse failed", + Qtreesit_error); + define_error (Qtreesit_range_invalid, + "RANGES are invalid, they have to be ordered and not overlapping", + Qtreesit_error); + define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)", + Qtreesit_error); + define_error (Qtreesit_load_language_error, + "Cannot load language definition", + Qtreesit_error); + define_error (Qtreesit_node_outdated, + "This node is outdated, please retrieve a new one", + Qtreesit_error); + + DEFSYM (Qtreesit_parser_list, "treesit-parser-list"); + DEFVAR_LISP ("treesit-parser-list", Vtreesit_parser_list, + doc: /* A list of tree-sitter parsers. + +If you removed a parser from this list, do not put it back in. Emacs +keeps the parser in this list updated with any change in the buffer. +If removed and put back in, there is no guarantee that the parser is in +sync with the buffer's content. */); + Vtreesit_parser_list = Qnil; + Fmake_variable_buffer_local (Qtreesit_parser_list); + + DEFVAR_LISP ("treesit-load-name-override-list", + Vtreesit_load_name_override_list, + doc: + /* An override list for unconventional tree-sitter libraries. + +By default, Emacs assumes the dynamic library for LANG is +libtree-sitter-LANG.EXT, where EXT is the OS specific extension for +dynamic libraries. Emacs also assumes that the name of the C function +the library provides is tree_sitter_LANG. If that is not the case, +add an entry + + (LANG LIBRARY-BASE-NAME FUNCTION-NAME) + +to this list, where LIBRARY-BASE-NAME is the filename of the dynamic +library without extension, FUNCTION-NAME is the function provided by +the library. */); + Vtreesit_load_name_override_list = Qnil; + + DEFVAR_LISP ("treesit-extra-load-path", + Vtreesit_extra_load_path, + doc: + /* Extra load paths of tree-sitter language definitions. +When trying to load a tree-sitter language definition, +Emacs looks at directories in this variable, +`user-emacs-directory'/tree-sitter, and system default locations for +dynamic libraries, in that order. */); + Vtreesit_extra_load_path = Qnil; + + defsubr (&Streesit_language_available_p); + + defsubr (&Streesit_parser_p); + defsubr (&Streesit_node_p); + + defsubr (&Streesit_node_parser); + + defsubr (&Streesit_parser_create); + defsubr (&Streesit_parser_buffer); + defsubr (&Streesit_parser_language); + + defsubr (&Streesit_parser_root_node); + /* defsubr (&Streesit_parse_string); */ + + defsubr (&Streesit_parser_set_included_ranges); + defsubr (&Streesit_parser_included_ranges); + + defsubr (&Streesit_node_type); + defsubr (&Streesit_node_start); + defsubr (&Streesit_node_end); + defsubr (&Streesit_node_string); + defsubr (&Streesit_node_parent); + defsubr (&Streesit_node_child); + defsubr (&Streesit_node_check); + defsubr (&Streesit_node_field_name_for_child); + defsubr (&Streesit_node_child_count); + defsubr (&Streesit_node_child_by_field_name); + defsubr (&Streesit_node_next_sibling); + defsubr (&Streesit_node_prev_sibling); + defsubr (&Streesit_node_first_child_for_pos); + defsubr (&Streesit_node_descendant_for_range); + defsubr (&Streesit_node_eq); + + defsubr (&Streesit_expand_pattern); + defsubr (&Streesit_expand_query); + defsubr (&Streesit_query_capture); +} diff --git a/src/treesit.h b/src/treesit.h new file mode 100644 index 00000000000..639c4eedc55 --- /dev/null +++ b/src/treesit.h @@ -0,0 +1,137 @@ +/* Header file for the tree-sitter integration. + +Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ + +#ifndef EMACS_TREESIT_H +#define EMACS_TREESIT_H + +#include <tree_sitter/api.h> +#include "lisp.h" + +INLINE_HEADER_BEGIN + +/* A wrapper for a tree-sitter parser, but also contains a parse tree + and other goodies for convenience. */ +struct Lisp_TS_Parser +{ + union vectorlike_header header; + /* A symbol represents the language this parser uses. See the + manual for more explanation. */ + Lisp_Object language_symbol; + /* The buffer associated with this parser. */ + Lisp_Object buffer; + /* The pointer to the tree-sitter parser. Never NULL. */ + TSParser *parser; + /* Pointer to the syntax tree. Initially is NULL, so check for NULL + before use. */ + TSTree *tree; + /* Teaches tree-sitter how to read an Emacs buffer. */ + TSInput input; + /* Re-parsing an unchanged buffer is not free for tree-sitter, so we + only make it re-parse when need_reparse == true. That usually + means some change is made in the buffer. But others could set + this field to true to force tree-sitter to re-parse. */ + bool need_reparse; + /* These two positions record the buffer byte position (1-based) of + the "visible region" that tree-sitter sees. Unlike markers, + These two positions do not change as the user inserts and deletes + text around them. Before re-parse, we move these positions to + match BUF_BEGV_BYTE and BUF_ZV_BYTE. Note that we don't need to + synchronize these positions when retrieving them in a function + that involves a node: if the node is not outdated, these + positions are synchronized. */ + ptrdiff_t visible_beg; + ptrdiff_t visible_end; + /* This counter is incremented every time a change is made to the + buffer in ts_record_change. The node retrieved from this parser + inherits this timestamp. This way we can make sure the node is + not outdated when we access its information. */ + ptrdiff_t timestamp; +}; + +/* A wrapper around a tree-sitter node. */ +struct Lisp_TS_Node +{ + union vectorlike_header header; + /* This prevents gc from collecting the tree before the node is done + with it. TSNode contains a pointer to the tree it belongs to, + and the parser object, when collected by gc, will free that + tree. */ + Lisp_Object parser; + TSNode node; + /* A node inherits its parser's timestamp at creation time. The + parser's timestamp increments as the buffer changes. This way we + can make sure the node is not outdated when we access its + information. */ + ptrdiff_t timestamp; +}; + +INLINE bool +TS_PARSERP (Lisp_Object x) +{ + return PSEUDOVECTORP (x, PVEC_TS_PARSER); +} + +INLINE struct Lisp_TS_Parser * +XTS_PARSER (Lisp_Object a) +{ + eassert (TS_PARSERP (a)); + return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Parser); +} + +INLINE bool +TS_NODEP (Lisp_Object x) +{ + return PSEUDOVECTORP (x, PVEC_TS_NODE); +} + +INLINE struct Lisp_TS_Node * +XTS_NODE (Lisp_Object a) +{ + eassert (TS_NODEP (a)); + return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node); +} + +INLINE void +CHECK_TS_PARSER (Lisp_Object parser) +{ + CHECK_TYPE (TS_PARSERP (parser), Qtreesit_parser_p, parser); +} + +INLINE void +CHECK_TS_NODE (Lisp_Object node) +{ + CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node); +} + +void +ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, + ptrdiff_t new_end_byte); + +Lisp_Object +make_ts_parser (Lisp_Object buffer, TSParser *parser, + TSTree *tree, Lisp_Object language_symbol); + +Lisp_Object +make_ts_node (Lisp_Object parser, TSNode node); + +extern void syms_of_treesit (void); + +INLINE_HEADER_END + +#endif /* EMACS_TREESIT_H */ |