summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.in10
-rw-r--r--src/alloc.c13
-rw-r--r--src/casefiddle.c12
-rw-r--r--src/data.c6
-rw-r--r--src/emacs.c7
-rw-r--r--src/eval.c13
-rw-r--r--src/insdel.c47
-rw-r--r--src/json.c16
-rw-r--r--src/lisp.h9
-rw-r--r--src/lread.c8
-rw-r--r--src/print.c28
-rw-r--r--src/treesit.c1657
-rw-r--r--src/treesit.h137
13 files changed, 1943 insertions, 20 deletions
diff --git a/src/Makefile.in b/src/Makefile.in
index 7d15b7afd51..a21af42c0b9 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -343,6 +343,10 @@ JSON_LIBS = @JSON_LIBS@
JSON_CFLAGS = @JSON_CFLAGS@
JSON_OBJ = @JSON_OBJ@
+TREE_SITTER_LIBS = @TREE_SITTER_LIBS@
+TREE_SITTER_FLAGS = @TREE_SITTER_FLAGS@
+TREE_SITTER_OBJ = @TREE_SITTER_OBJ@
+
INTERVALS_H = dispextern.h intervals.h composite.h
GETLOADAVG_LIBS = @GETLOADAVG_LIBS@
@@ -406,7 +410,7 @@ EMACS_CFLAGS=-Demacs $(MYCPPFLAGS) -I. -I$(srcdir) \
$(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \
$(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \
$(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \
- $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) \
+ $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \
$(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \
$(WERROR_CFLAGS) $(HAIKU_CFLAGS) $(XCOMPOSITE_CFLAGS) $(XSHAPE_CFLAGS)
ALL_CFLAGS = $(EMACS_CFLAGS) $(WARN_CFLAGS) $(CFLAGS)
@@ -445,7 +449,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \
$(if $(HYBRID_MALLOC),sheap.o) \
$(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \
$(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \
- $(HAIKU_OBJ) $(PGTK_OBJ)
+ $(TREE_SITTER_OBJ) $(HAIKU_OBJ) $(PGTK_OBJ)
doc_obj = $(base_obj) $(NS_OBJC_OBJ)
obj = $(doc_obj) $(HAIKU_CXX_OBJ)
@@ -565,7 +569,7 @@ LIBES = $(LIBS) $(W32_LIBS) $(LIBS_GNUSTEP) $(PGTK_LIBS) $(LIBX_BASE) $(LIBIMAGE
$(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \
$(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \
$(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \
- $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS)
+ $(TREE_SITTER_LIBS) $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS)
## FORCE it so that admin/unidata can decide whether this file is
## up-to-date. Although since charprop depends on bootstrap-emacs,
diff --git a/src/alloc.c b/src/alloc.c
index 43fbbb79bed..40a3e235eab 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -50,6 +50,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include TERM_HEADER
#endif /* HAVE_WINDOW_SYSTEM */
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
#include <flexmember.h>
#include <verify.h>
#include <execinfo.h> /* For backtrace. */
@@ -3162,6 +3166,15 @@ cleanup_vector (struct Lisp_Vector *vector)
if (uptr->finalizer)
uptr->finalizer (uptr->p);
}
+#ifdef HAVE_TREE_SITTER
+ else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_PARSER))
+ {
+ struct Lisp_TS_Parser *lisp_parser
+ = PSEUDOVEC_STRUCT (vector, Lisp_TS_Parser);
+ ts_tree_delete(lisp_parser->tree);
+ ts_parser_delete(lisp_parser->parser);
+ }
+#endif
#ifdef HAVE_MODULES
else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION))
{
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 2ea5f09b4c5..3022c5cc7d6 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -30,6 +30,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "composite.h"
#include "keymap.h"
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
/* State for casing individual characters. */
@@ -530,6 +534,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
modify_text (start, end);
prepare_casing_context (&ctx, flag, true);
+#ifdef HAVE_TREE_SITTER
+ ptrdiff_t start_byte = CHAR_TO_BYTE (start);
+ ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
+#endif
+
ptrdiff_t orig_end = end;
record_delete (start, make_buffer_string (start, end, true), false);
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
@@ -548,6 +557,9 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
{
signal_after_change (start, end - start - added, end - start);
update_compositions (start, end, CHECK_ALL);
+#ifdef HAVE_TREE_SITTER
+ ts_record_change (start_byte, old_end_byte, CHAR_TO_BYTE (end));
+#endif
}
return orig_end + added;
diff --git a/src/data.c b/src/data.c
index 72dcf6f878d..a28bf414147 100644
--- a/src/data.c
+++ b/src/data.c
@@ -261,6 +261,10 @@ for example, (type-of 1) returns `integer'. */)
return Qxwidget;
case PVEC_XWIDGET_VIEW:
return Qxwidget_view;
+ case PVEC_TS_PARSER:
+ return Qtreesit_parser;
+ case PVEC_TS_NODE:
+ return Qtreesit_node;
case PVEC_SQLITE:
return Qsqlite;
/* "Impossible" cases. */
@@ -4258,6 +4262,8 @@ syms_of_data (void)
DEFSYM (Qterminal, "terminal");
DEFSYM (Qxwidget, "xwidget");
DEFSYM (Qxwidget_view, "xwidget-view");
+ DEFSYM (Qtreesit_parser, "treesit-parser");
+ DEFSYM (Qtreesit_node, "treesit-node");
DEFSYM (Qdefun, "defun");
diff --git a/src/emacs.c b/src/emacs.c
index ca99a8c787d..9f20a1597c9 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -136,6 +136,10 @@ extern char etext;
#include <sys/resource.h>
#endif
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
#include "pdumper.h"
#include "fingerprint.h"
#include "epaths.h"
@@ -2225,6 +2229,9 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem
syms_of_module ();
#endif
+#ifdef HAVE_TREE_SITTER
+ syms_of_treesit ();
+#endif
#ifdef HAVE_SOUND
syms_of_sound ();
#endif
diff --git a/src/eval.c b/src/eval.c
index 77ec47e2b79..3ec03de1376 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -1835,6 +1835,19 @@ signal_error (const char *s, Lisp_Object arg)
xsignal (Qerror, Fcons (build_string (s), arg));
}
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent)
+{
+ eassert (SYMBOLP (name));
+ eassert (SYMBOLP (parent));
+ Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
+ eassert (CONSP (parent_conditions));
+ eassert (!NILP (Fmemq (parent, parent_conditions)));
+ eassert (NILP (Fmemq (name, parent_conditions)));
+ Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
+ Fput (name, Qerror_message, build_pure_c_string (message));
+}
+
/* Use this for arithmetic overflow, e.g., when an integer result is
too large even for a bignum. */
void
diff --git a/src/insdel.c b/src/insdel.c
index 6f180ac5800..4676330cb79 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -31,6 +31,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "region-cache.h"
#include "pdumper.h"
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
static void insert_from_string_1 (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t,
ptrdiff_t, bool, bool);
static void insert_from_buffer_1 (struct buffer *, ptrdiff_t, ptrdiff_t, bool);
@@ -940,6 +944,12 @@ insert_1_both (const char *string,
set_text_properties (make_fixnum (PT), make_fixnum (PT + nchars),
Qnil, Qnil, Qnil);
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
adjust_point (nchars, nbytes);
check_markers ();
@@ -1071,6 +1081,12 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
graft_intervals_into_buffer (intervals, PT, nchars,
current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
adjust_point (nchars, outgoing_nbytes);
check_markers ();
@@ -1137,6 +1153,12 @@ insert_from_gap (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
current_buffer, 0);
}
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (ins_bytepos >= 0);
+ ts_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes);
+#endif
+
if (ins_charpos < PT)
adjust_point (nchars, nbytes);
@@ -1287,6 +1309,12 @@ insert_from_buffer_1 (struct buffer *buf,
/* Insert those intervals. */
graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (outgoing_nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + outgoing_nbytes);
+#endif
+
adjust_point (nchars, outgoing_nbytes);
}
@@ -1535,6 +1563,13 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
graft_intervals_into_buffer (intervals, from, inschars,
current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (to_byte >= from_byte);
+ eassert (outgoing_insbytes >= 0);
+ eassert (from_byte >= 0);
+ ts_record_change (from_byte, to_byte, from_byte + outgoing_insbytes);
+#endif
+
/* Relocate point as if it were a marker. */
if (from < PT)
adjust_point ((from + inschars - (PT < to ? PT : to)),
@@ -1569,7 +1604,11 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
If MARKERS, relocate markers.
Unlike most functions at this level, never call
- prepare_to_modify_buffer and never call signal_after_change. */
+ prepare_to_modify_buffer and never call signal_after_change.
+ Because this function is called in a loop, one character at a time.
+ The caller of 'replace_range_2' calls these hooks for the entire
+ region once. Apart from signal_after_change, any caller of this
+ function should also call ts_record_change. */
void
replace_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
@@ -1892,6 +1931,12 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
evaporate_overlays (from);
+#ifdef HAVE_TREE_SITTER
+ eassert (from_byte <= to_byte);
+ eassert (from_byte >= 0);
+ ts_record_change (from_byte, to_byte, from_byte);
+#endif
+
return deletion;
}
diff --git a/src/json.c b/src/json.c
index db1be07f196..957f91b46bb 100644
--- a/src/json.c
+++ b/src/json.c
@@ -1090,22 +1090,6 @@ usage: (json-parse-buffer &rest args) */)
return unbind_to (count, lisp);
}
-/* Simplified version of 'define-error' that works with pure
- objects. */
-
-static void
-define_error (Lisp_Object name, const char *message, Lisp_Object parent)
-{
- eassert (SYMBOLP (name));
- eassert (SYMBOLP (parent));
- Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
- eassert (CONSP (parent_conditions));
- eassert (!NILP (Fmemq (parent, parent_conditions)));
- eassert (NILP (Fmemq (name, parent_conditions)));
- Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
- Fput (name, Qerror_message, build_pure_c_string (message));
-}
-
void
syms_of_json (void)
{
diff --git a/src/lisp.h b/src/lisp.h
index 1ad89fc4689..eb1f1ec2c21 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -577,6 +577,8 @@ enum Lisp_Fwd_Type
your object -- this way, the same object could be used to represent
several disparate C structures.
+ In addition, you need to add switch branches in data.c for Ftype_of.
+
You also need to add the new type to the constant
`cl--typeof-types' in lisp/emacs-lisp/cl-preloaded.el. */
@@ -1056,6 +1058,8 @@ enum pvec_type
PVEC_CONDVAR,
PVEC_MODULE_FUNCTION,
PVEC_NATIVE_COMP_UNIT,
+ PVEC_TS_PARSER,
+ PVEC_TS_NODE,
PVEC_SQLITE,
/* These should be last, for internal_equal and sxhash_obj. */
@@ -5534,6 +5538,11 @@ maybe_gc (void)
maybe_garbage_collect ();
}
+/* Simplified version of 'define-error' that works with pure
+ objects. */
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent);
+
INLINE_HEADER_END
#endif /* EMACS_LISP_H */
diff --git a/src/lread.c b/src/lread.c
index 2538851bac6..f1ffdef04e4 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -5218,6 +5218,14 @@ to the specified file name if a suffix is allowed or required. */);
Fcons (build_pure_c_string (MODULES_SECONDARY_SUFFIX), Vload_suffixes);
#endif
+ DEFVAR_LISP ("dynamic-library-suffixes", Vdynamic_library_suffixes,
+ doc: /* A list of suffixes for loadable dynamic libraries. */);
+ Vdynamic_library_suffixes =
+ Fcons (build_pure_c_string (DYNAMIC_LIB_SECONDARY_SUFFIX), Qnil);
+ Vdynamic_library_suffixes =
+ Fcons (build_pure_c_string (DYNAMIC_LIB_SUFFIX),
+ Vdynamic_library_suffixes);
+
#endif
DEFVAR_LISP ("module-file-suffix", Vmodule_file_suffix,
doc: /* Suffix of loadable module file, or nil if modules are not supported. */);
diff --git a/src/print.c b/src/print.c
index d7583282b69..d8b8513f311 100644
--- a/src/print.c
+++ b/src/print.c
@@ -48,6 +48,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
# include <sys/socket.h> /* for F_DUPFD_CLOEXEC */
#endif
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
struct terminal;
/* Avoid actual stack overflow in print. */
@@ -1956,6 +1960,30 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag,
}
break;
#endif
+
+#ifdef HAVE_TREE_SITTER
+ case PVEC_TS_PARSER:
+ print_c_string ("#<treesit-parser for ", printcharfun);
+ Lisp_Object language = XTS_PARSER (obj)->language_symbol;
+ print_string (Fsymbol_name (language), printcharfun);
+ print_c_string (" in ", printcharfun);
+ print_object (XTS_PARSER (obj)->buffer, printcharfun, escapeflag);
+ printchar ('>', printcharfun);
+ break;
+ case PVEC_TS_NODE:
+ print_c_string ("#<treesit-node from ", printcharfun);
+ print_object (Ftreesit_node_start (obj),
+ printcharfun, escapeflag);
+ print_c_string (" to ", printcharfun);
+ print_object (Ftreesit_node_end (obj),
+ printcharfun, escapeflag);
+ print_c_string (" in ", printcharfun);
+ print_object (XTS_PARSER (XTS_NODE (obj)->parser)->buffer,
+ printcharfun, escapeflag);
+ printchar ('>', printcharfun);
+ break;
+#endif
+
case PVEC_SQLITE:
{
print_c_string ("#<sqlite ", printcharfun);
diff --git a/src/treesit.c b/src/treesit.c
new file mode 100644
index 00000000000..e127fc2d87c
--- /dev/null
+++ b/src/treesit.c
@@ -0,0 +1,1657 @@
+/* Tree-sitter integration for GNU Emacs.
+
+Copyright (C) 2021-2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "treesit.h"
+
+/* Commentary
+
+ The Emacs wrapper of tree-sitter does not expose everything the C
+ API provides, most notably:
+
+ - It doesn't expose a syntax tree, we put the syntax tree in the
+ parser object, and updating the tree is handled in the C level.
+
+ - We don't expose tree cursor either. I think Lisp is slow enough
+ to nullify any performance advantage of using a cursor, though I
+ don't have evidence. Also I want to minimize the number of new
+ types we introduce, currently we only add parser and node type.
+
+ - Because updating the change is handled in the C level as each
+ change is made in the buffer, there is no way for Lisp to update
+ a node. But since we can just retrieve a new node, it shouldn't
+ be a limitation.
+
+ - I didn't expose setting timeout and cancellation flag for a
+ parser, mainly because I don't think they are really necessary
+ in Emacs' use cases.
+
+ - Many tree-sitter functions asks for a TSPoint, basically a (row,
+ column) location. Emacs uses a gap buffer and keeps no
+ information about row and column position. According to the
+ author of tree-sitter, tree-sitter only asks for (row, column)
+ position to carry it around and return back to the user later;
+ and the real position used is the byte position. He also said
+ that he _think_ that it will work to use byte position only.
+ That's why whenever a TSPoint is asked, we pass a dummy one to
+ it. Judging by the nature of parsing algorithms, I think it is
+ safe to use only byte position, and I don't think this will
+ change in the future.
+
+ REF: https://github.com/tree-sitter/tree-sitter/issues/445
+
+ treesit.h has some commentary on the two main data structure
+ for the parser and node. ts_ensure_position_synced has some
+ commentary on how do we make tree-sitter play well with narrowing
+ (tree-sitter parser only sees the visible region, so we need to
+ translate positions back and forth). Most action happens in
+ ts_ensure_parsed, ts_read_buffer and ts_record_change.
+
+ A complete correspondence list between tree-sitter functions and
+ exposed Lisp functions can be found in the manual (elisp)API
+ Correspondence.
+
+ Placement of CHECK_xxx functions: call CHECK_xxx before using any
+ unchecked Lisp values; these include argument of Lisp functions,
+ return value of Fsymbol_value, car of a cons.
+
+ Initializing tree-sitter: there are two entry points to tree-sitter
+ functions: 'treesit-parser-create' and
+ 'treesit-language-available-p'. Therefore we only need to call
+ initialization function in those two functions.
+
+ Tree-sitter offset (0-based) and buffer position (1-based):
+ tree-sitter offset + buffer position = buffer position
+ buffer position - buffer position = tree-sitter offset
+
+ Tree-sitter-related code in other files:
+ - src/alloc.c for gc for parser and node
+ - src/casefiddle.c & src/insdel.c for notifying tree-sitter
+ parser of buffer changes.
+ - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
+ node type.
+ */
+
+/*** Initialization */
+
+bool ts_initialized = false;
+
+static void *
+ts_calloc_wrapper (size_t n, size_t size)
+{
+ return xzalloc (n * size);
+}
+
+void
+ts_initialize ()
+{
+ if (!ts_initialized)
+ {
+ ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree);
+ ts_initialized = true;
+ }
+}
+
+/*** Loading language library */
+
+/* Translates a symbol treesit-<lang> to a C name
+ treesit_<lang>. */
+void
+ts_symbol_to_c_name (char *symbol_name)
+{
+ for (int idx=0; idx < strlen (symbol_name); idx++)
+ {
+ if (symbol_name[idx] == '-')
+ symbol_name[idx] = '_';
+ }
+}
+
+bool
+ts_find_override_name
+(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol)
+{
+ for (Lisp_Object list = Vtreesit_load_name_override_list;
+ !NILP (list); list = XCDR (list))
+ {
+ Lisp_Object lang = XCAR (XCAR (list));
+ CHECK_SYMBOL (lang);
+ if (EQ (lang, language_symbol))
+ {
+ *name = Fnth (make_fixnum (1), XCAR (list));
+ CHECK_STRING (*name);
+ *c_symbol = Fnth (make_fixnum (2), XCAR (list));
+ CHECK_STRING (*c_symbol);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
+ thsi function pushes "lib_base_name.so" and "lib_base_name.dylib"
+ into *path_candidates. Obiviously path_candidates should be a Lisp
+ list of Lisp strings. */
+void
+ts_load_language_push_for_each_suffix
+(Lisp_Object lib_base_name, Lisp_Object *path_candidates)
+{
+ for (Lisp_Object suffixes = Vdynamic_library_suffixes;
+ !NILP (suffixes); suffixes = XCDR (suffixes)) {
+ *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)),
+ *path_candidates);
+ }
+}
+
+/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
+ to the language definition. Signals
+ Qtreesit_load_language_error if something goes wrong.
+ Qtreesit_load_language_error carries the error message from
+ trying to load the library with each extension.
+
+ If SIGNAL is true, signal an error when failed to load LANGUAGE; if
+ false, return NULL when failed. */
+TSLanguage *
+ts_load_language (Lisp_Object language_symbol, bool signal)
+{
+ Lisp_Object symbol_name = Fsymbol_name (language_symbol);
+
+ /* Figure out the library name and C name. */
+ Lisp_Object lib_base_name =
+ (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name));
+ Lisp_Object base_name =
+ (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name));
+ char *c_name = strdup (SSDATA (base_name));
+ ts_symbol_to_c_name (c_name);
+
+ /* Override the library name and C name, if appropriate. */
+ Lisp_Object override_name;
+ Lisp_Object override_c_name;
+ bool found_override = ts_find_override_name
+ (language_symbol, &override_name, &override_c_name);
+ if (found_override)
+ {
+ lib_base_name = override_name;
+ c_name = SSDATA (override_c_name);
+ }
+
+ /* Now we generate a list of possible library paths. */
+ Lisp_Object path_candidates = Qnil;
+ /* First push just the filenames to the candidate list, which will
+ make dynlib_open look under standard system load paths. */
+ ts_load_language_push_for_each_suffix
+ (lib_base_name, &path_candidates);
+ /* Then push ~/.emacs.d/tree-sitter paths. */
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name
+ (concat2 (build_string ("tree-sitter/"), lib_base_name),
+ Fsymbol_value (Quser_emacs_directory)),
+ &path_candidates);
+ /* Then push paths from treesit-extra-load-path. */
+ for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path);
+ !NILP (tail); tail = XCDR (tail))
+ {
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name (lib_base_name, XCAR (tail)),
+ &path_candidates);
+ }
+
+ /* Try loading the dynamic library by each path candidate. Stop
+ when succeed, record the error message and try the next one when
+ fail. */
+ dynlib_handle_ptr handle;
+ char const *error;
+ Lisp_Object error_list = Qnil;
+ for (Lisp_Object tail = path_candidates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ char *library_name = SSDATA (XCAR (tail));
+ dynlib_error ();
+ handle = dynlib_open (library_name);
+ error = dynlib_error ();
+ if (error == NULL)
+ break;
+ else
+ error_list = Fcons (build_string (error), error_list);
+ }
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ symbol_name, Fnreverse (error_list));
+ else
+ return NULL;
+ }
+
+ /* Load TSLanguage. */
+ dynlib_error ();
+ TSLanguage *(*langfn) ();
+ langfn = dynlib_sym (handle, c_name);
+ error = dynlib_error ();
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal1 (Qtreesit_load_language_error,
+ build_string (error));
+ else
+ return NULL;
+ }
+ TSLanguage *lang = (*langfn) ();
+
+ /* Check if language version matches tree-sitter version. */
+ TSParser *parser = ts_parser_new ();
+ bool success = ts_parser_set_language (parser, lang);
+ ts_parser_delete (parser);
+ if (!success)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"),
+ make_fixnum (ts_language_version (lang)));
+ else
+ return NULL;
+ }
+ return lang;
+}
+
+DEFUN ("treesit-language-available-p",
+ Ftreesit_langauge_available_p,
+ Streesit_language_available_p,
+ 1, 1, 0,
+ doc: /* Return non-nil if LANGUAGE exists and is loadable. */)
+ (Lisp_Object language)
+{
+ CHECK_SYMBOL (language);
+ ts_initialize ();
+ if (ts_load_language(language, false) == NULL)
+ return Qnil;
+ else
+ return Qt;
+}
+
+/*** Parsing functions */
+
+/* An auxiliary function that saves a few lines of code. Assumes TREE
+ is not NULL. */
+static inline void
+ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
+ ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
+{
+ TSPoint dummy_point = {0, 0};
+ TSInputEdit edit = {(uint32_t) start_byte,
+ (uint32_t) old_end_byte,
+ (uint32_t) new_end_byte,
+ dummy_point, dummy_point, dummy_point};
+ ts_tree_edit (tree, &edit);
+}
+
+/* Update each parser's tree after the user made an edit. This
+function does not parse the buffer and only updates the tree. (So it
+should be very fast.) */
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte)
+{
+ for (Lisp_Object parser_list =
+ Fsymbol_value (Qtreesit_parser_list);
+ !NILP (parser_list);
+ parser_list = XCDR (parser_list))
+ {
+ CHECK_CONS (parser_list);
+ Lisp_Object lisp_parser = XCAR (parser_list);
+ CHECK_TS_PARSER (lisp_parser);
+ TSTree *tree = XTS_PARSER (lisp_parser)->tree;
+ if (tree != NULL)
+ {
+ eassert (start_byte <= old_end_byte);
+ eassert (start_byte <= new_end_byte);
+ /* Think the recorded change as a delete followed by an
+ insert, and think of them as moving unchanged text back
+ and forth. After all, the whole point of updating the
+ tree is to update the position of unchanged text. */
+ ptrdiff_t bytes_del = old_end_byte - start_byte;
+ ptrdiff_t bytes_ins = new_end_byte - start_byte;
+
+ ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
+
+ ptrdiff_t affected_start =
+ max (visible_beg, start_byte) - visible_beg;
+ ptrdiff_t affected_old_end =
+ min (visible_end, affected_start + bytes_del);
+ ptrdiff_t affected_new_end =
+ affected_start + bytes_ins;
+
+ ts_tree_edit_1 (tree, affected_start, affected_old_end,
+ affected_new_end);
+ XTS_PARSER (lisp_parser)->visible_end = affected_new_end;
+ XTS_PARSER (lisp_parser)->need_reparse = true;
+ XTS_PARSER (lisp_parser)->timestamp++;
+ }
+ }
+}
+
+void
+ts_ensure_position_synced (Lisp_Object parser)
+{
+ TSParser *ts_parser = XTS_PARSER (parser)->parser;
+ TSTree *tree = XTS_PARSER (parser)->tree;
+
+ if (tree == NULL)
+ return;
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
+ /* Before we parse or set ranges, catch up with the narrowing
+ situation. We change visible_beg and visible_end to match
+ BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
+ change. We want to move the visible range of tree-sitter to
+ match the narrowed range. For example,
+ from ________|xxxx|__
+ to |xxxx|__________ */
+
+ /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
+ if (visible_beg > BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the beginning. */
+ ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ }
+ /* 2. Make sure visible_end = BUF_ZV_BYTE. */
+ if (visible_end < BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the end. */
+ ts_tree_edit_1 (tree, visible_end - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ }
+ else if (visible_end > BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the end. */
+ ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ }
+ /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
+ if (visible_beg < BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the beginning. */
+ ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ }
+ eassert (0 <= visible_beg);
+ eassert (visible_beg <= visible_end);
+
+ XTS_PARSER (parser)->visible_beg = visible_beg;
+ XTS_PARSER (parser)->visible_end = visible_end;
+}
+
+void
+ts_check_buffer_size (struct buffer *buffer)
+{
+ ptrdiff_t buffer_size =
+ (BUF_Z (buffer) - BUF_BEG (buffer));
+ if (buffer_size > UINT32_MAX)
+ xsignal2 (Qtreesit_buffer_too_large,
+ build_pure_c_string ("Buffer size too large, size:"),
+ make_fixnum (buffer_size));
+}
+
+/* Parse the buffer. We don't parse until we have to. When we have
+to, we call this function to parse and update the tree. */
+void
+ts_ensure_parsed (Lisp_Object parser)
+{
+ if (!XTS_PARSER (parser)->need_reparse)
+ return;
+ TSParser *ts_parser = XTS_PARSER (parser)->parser;
+ TSTree *tree = XTS_PARSER(parser)->tree;
+ TSInput input = XTS_PARSER (parser)->input;
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ts_check_buffer_size (buffer);
+
+ /* Before we parse, catch up with the narrowing situation. */
+ ts_ensure_position_synced (parser);
+
+ TSTree *new_tree = ts_parser_parse(ts_parser, tree, input);
+ /* This should be very rare (impossible, really): it only happens
+ when 1) language is not set (impossible in Emacs because the user
+ has to supply a language to create a parser), 2) parse canceled
+ due to timeout (impossible because we don't set a timeout), 3)
+ parse canceled due to cancellation flag (impossible because we
+ don't set the flag). (See comments for ts_parser_parse in
+ tree_sitter/api.h.) */
+ if (new_tree == NULL)
+ {
+ Lisp_Object buf;
+ XSETBUFFER (buf, buffer);
+ xsignal1 (Qtreesit_parse_error, buf);
+ }
+
+ ts_tree_delete (tree);
+ XTS_PARSER (parser)->tree = new_tree;
+ XTS_PARSER (parser)->need_reparse = false;
+}
+
+/* This is the read function provided to tree-sitter to read from a
+ buffer. It reads one character at a time and automatically skips
+ the gap. */
+const char*
+ts_read_buffer (void *parser, uint32_t byte_index,
+ TSPoint position, uint32_t *bytes_read)
+{
+ struct buffer *buffer =
+ XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
+ ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
+ ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
+ ptrdiff_t byte_pos = byte_index + visible_beg;
+ /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
+ ts_ensure_parsed), so byte_pos will never be smaller than
+ BUF_BEG_BYTE. */
+ eassert (visible_beg = BUF_BEGV_BYTE (buffer));
+ eassert (visible_end = BUF_ZV_BYTE (buffer));
+
+ /* Read one character. Tree-sitter wants us to set bytes_read to 0
+ if it reads to the end of buffer. It doesn't say what it wants
+ for the return value in that case, so we just give it an empty
+ string. */
+ char *beg;
+ int len;
+ /* This function could run from a user command, so it is better to
+ do nothing instead of raising an error. (It was a pain in the a**
+ to decrypt mega-if-conditions in Emacs source, so I wrote the two
+ branches separately.) */
+ if (!BUFFER_LIVE_P (buffer))
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Reached visible end-of-buffer, tell tree-sitter to read no more. */
+ else if (byte_pos >= visible_end)
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Normal case, read a character. */
+ else
+ {
+ beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
+ len = BYTES_BY_CHAR_HEAD ((int) *beg);
+ }
+ *bytes_read = (uint32_t) len;
+ return beg;
+}
+
+/*** Functions for parser and node object*/
+
+/* Wrap the parser in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol)
+{
+ struct Lisp_TS_Parser *lisp_parser
+ = ALLOCATE_PSEUDOVECTOR
+ (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER);
+
+ lisp_parser->language_symbol = language_symbol;
+ lisp_parser->buffer = buffer;
+ lisp_parser->parser = parser;
+ lisp_parser->tree = tree;
+ TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8};
+ lisp_parser->input = input;
+ lisp_parser->need_reparse = true;
+ lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer));
+ lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer));
+ return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
+}
+
+/* Wrap the node in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node)
+{
+ struct Lisp_TS_Node *lisp_node
+ = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE);
+ lisp_node->parser = parser;
+ lisp_node->node = node;
+ lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
+ return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
+}
+
+DEFUN ("treesit-parser-p",
+ Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter parser. */)
+ (Lisp_Object object)
+{
+ if (TS_PARSERP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-p",
+ Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter node. */)
+ (Lisp_Object object)
+{
+ if (TS_NODEP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-parser",
+ Ftreesit_node_parser, Streesit_node_parser,
+ 1, 1, 0,
+ doc: /* Return the parser to which NODE belongs. */)
+ (Lisp_Object node)
+{
+ CHECK_TS_NODE (node);
+ return XTS_NODE (node)->parser;
+}
+
+DEFUN ("treesit-parser-create",
+ Ftreesit_parser_create, Streesit_parser_create,
+ 2, 2, 0,
+ doc: /* Create and return a parser in BUFFER for LANGUAGE.
+
+The parser is automatically added to BUFFER's
+`treesit-parser-list'. LANGUAGE should be the symbol of a
+function provided by a tree-sitter language dynamic module, e.g.,
+'treesit-json. If BUFFER is nil, use the current buffer. */)
+ (Lisp_Object buffer, Lisp_Object language)
+{
+ if (NILP (buffer))
+ buffer = Fcurrent_buffer ();
+
+ CHECK_BUFFER (buffer);
+ CHECK_SYMBOL (language);
+ ts_check_buffer_size (XBUFFER (buffer));
+
+ ts_initialize ();
+
+ TSParser *parser = ts_parser_new ();
+ TSLanguage *lang = ts_load_language (language, true);
+ /* We check language version when loading a language, so this should
+ always succeed. */
+ ts_parser_set_language (parser, lang);
+
+ Lisp_Object lisp_parser
+ = make_ts_parser (buffer, parser, NULL, language);
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal (XBUFFER (buffer));
+
+ Fset (Qtreesit_parser_list,
+ Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list)));
+
+ set_buffer_internal (old_buffer);
+ return lisp_parser;
+}
+
+DEFUN ("treesit-parser-buffer",
+ Ftreesit_parser_buffer, Streesit_parser_buffer,
+ 1, 1, 0,
+ doc: /* Return the buffer of PARSER. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ Lisp_Object buf;
+ XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
+ return buf;
+}
+
+DEFUN ("treesit-parser-language",
+ Ftreesit_parser_language, Streesit_parser_language,
+ 1, 1, 0,
+ doc: /* Return parser's language symbol.
+This symbol is the one used to create the parser. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ return XTS_PARSER (parser)->language_symbol;
+}
+
+/*** Parser API */
+
+DEFUN ("treesit-parser-root-node",
+ Ftreesit_parser_root_node, Streesit_parser_root_node,
+ 1, 1, 0,
+ doc: /* Return the root node of PARSER. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ ts_ensure_parsed (parser);
+ TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
+ return make_ts_node (parser, root_node);
+}
+
+/* Checks that the RANGES argument of
+ treesit-parser-set-included-ranges is valid. */
+void
+ts_check_range_argument (Lisp_Object ranges)
+{
+ EMACS_INT last_point = 1;
+ for (Lisp_Object tail = ranges;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ CHECK_CONS (tail);
+ Lisp_Object range = XCAR (tail);
+ CHECK_CONS (range);
+ CHECK_FIXNUM (XCAR (range));
+ CHECK_FIXNUM (XCDR (range));
+ EMACS_INT beg = XFIXNUM (XCAR (range));
+ EMACS_INT end = XFIXNUM (XCDR (range));
+ /* TODO: Maybe we should check for point-min/max, too? */
+ if (!(last_point <= beg && beg <= end))
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("RANGE is either overlapping or out-of-order"),
+ ranges);
+ last_point = end;
+ }
+}
+
+DEFUN ("treesit-parser-set-included-ranges",
+ Ftreesit_parser_set_included_ranges,
+ Streesit_parser_set_included_ranges,
+ 2, 2, 0,
+ doc: /* Limit PARSER to RANGES.
+
+RANGES is a list of (BEG . END), each (BEG . END) confines a range in
+which the parser should operate in. Each range must not overlap, and
+each range should come in order. Signal `treesit-set-range-error'
+if the argument is invalid, or something else went wrong. If RANGES
+is nil, set PARSER to parse the whole buffer. */)
+ (Lisp_Object parser, Lisp_Object ranges)
+{
+ CHECK_TS_PARSER (parser);
+ CHECK_CONS (ranges);
+ ts_check_range_argument (ranges);
+
+ /* Before we parse, catch up with narrowing/widening. */
+ ts_ensure_position_synced (parser);
+
+ bool success;
+ if (NILP (ranges))
+ {
+ /* If RANGES is nil, make parser to parse the whole document.
+ To do that we give tree-sitter a 0 length, the range is a
+ dummy. */
+ TSRange ts_range = {0, 0, 0, 0};
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, &ts_range , 0);
+ }
+ else
+ {
+ /* Set ranges for PARSER. */
+ ptrdiff_t len = list_length (ranges);
+ TSRange *ts_ranges = malloc (sizeof(TSRange) * len);
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges))
+ {
+ Lisp_Object range = XCAR (ranges);
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ EMACS_INT beg_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCAR (range)));
+ EMACS_INT end_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCDR (range)));
+ /* We don't care about start and end points, put in dummy
+ value. */
+ TSRange rg = {{0,0}, {0,0},
+ (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
+ (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
+ ts_ranges[idx] = rg;
+ }
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len);
+ /* Although XFIXNUM could signal, it should be impossible
+ because we have checked the input by ts_check_range_argument.
+ So there is no need for unwind-protect. */
+ free (ts_ranges);
+ }
+
+ if (!success)
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("Something went wrong when setting ranges"),
+ ranges);
+
+ XTS_PARSER (parser)->need_reparse = true;
+ return Qnil;
+}
+
+DEFUN ("treesit-parser-included-ranges",
+ Ftreesit_parser_included_ranges,
+ Streesit_parser_included_ranges,
+ 1, 1, 0,
+ doc: /* Return the ranges set for PARSER.
+See `treesit-parser-set-ranges'. If no range is set, return
+nil. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ uint32_t len;
+ const TSRange *ranges = ts_parser_included_ranges
+ (XTS_PARSER (parser)->parser, &len);
+ if (len == 0)
+ return Qnil;
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ Lisp_Object list = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSRange range = ranges[idx];
+ uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
+ uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+
+ Lisp_Object lisp_range =
+ Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) ,
+ make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
+ list = Fcons (lisp_range, list);
+ }
+ return Fnreverse (list);
+}
+
+/*** Node API */
+
+/* Check that OBJ is a positive integer and signal an error if
+ otherwise. */
+static void
+ts_check_positive_integer (Lisp_Object obj)
+{
+ CHECK_INTEGER (obj);
+ if (XFIXNUM (obj) < 0)
+ xsignal1 (Qargs_out_of_range, obj);
+}
+
+static void
+ts_check_node (Lisp_Object obj)
+{
+ CHECK_TS_NODE (obj);
+ Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
+ if (XTS_NODE (obj)->timestamp !=
+ XTS_PARSER (lisp_parser)->timestamp)
+ xsignal1 (Qtreesit_node_outdated, obj);
+}
+
+DEFUN ("treesit-node-type",
+ Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
+ doc: /* Return the NODE's type as a string.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *type = ts_node_type (ts_node);
+ return build_string (type);
+}
+
+DEFUN ("treesit-node-start",
+ Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
+ doc: /* Return the NODE's start position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t start_byte_offset = ts_node_start_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t start_pos = buf_bytepos_to_charpos
+ (buffer, start_byte_offset + visible_beg);
+ return make_fixnum (start_pos);
+}
+
+DEFUN ("treesit-node-end",
+ Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
+ doc: /* Return the NODE's end position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t end_byte_offset = ts_node_end_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t end_pos = buf_bytepos_to_charpos
+ (buffer, end_byte_offset + visible_beg);
+ return make_fixnum (end_pos);
+}
+
+DEFUN ("treesit-node-string",
+ Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
+ doc: /* Return the string representation of NODE.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ char *string = ts_node_string (ts_node);
+ return make_string (string, strlen (string));
+}
+
+DEFUN ("treesit-node-parent",
+ Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
+ doc: /* Return the immediate parent of NODE.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode parent = ts_node_parent (ts_node);
+
+ if (ts_node_is_null (parent))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, parent);
+}
+
+DEFUN ("treesit-node-child",
+ Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
+ doc: /* Return the Nth child of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_child (ts_node, (uint32_t) idx);
+ else
+ child = ts_node_named_child (ts_node, (uint32_t) idx);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-check",
+ Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
+ doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
+
+PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error.
+Named nodes correspond to named rules in the language definition,
+whereas "anonymous" nodes correspond to string literals in the
+language definition.
+
+Missing nodes are inserted by the parser in order to recover from
+certain kinds of syntax errors, i.e., should be there but not there.
+
+Extra nodes represent things like comments, which are not required the
+language definition, but can appear anywhere.
+
+A node "has changes" if the buffer changed since the node is
+created. (Don't forget the "s" at the end of 'has-changes.)
+
+A node "has error" if itself is a syntax error or contains any syntax
+errors. */)
+ (Lisp_Object node, Lisp_Object property)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_SYMBOL (property);
+ TSNode ts_node = XTS_NODE (node)->node;
+ bool result;
+ if (EQ (property, Qnamed))
+ result = ts_node_is_named (ts_node);
+ else if (EQ (property, Qmissing))
+ result = ts_node_is_missing (ts_node);
+ else if (EQ (property, Qextra))
+ result = ts_node_is_extra (ts_node);
+ else if (EQ (property, Qhas_error))
+ result = ts_node_has_error (ts_node);
+ else if (EQ (property, Qhas_changes))
+ result = ts_node_has_changes (ts_node);
+ else
+ signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got",
+ property);
+ return result ? Qt : Qnil;
+}
+
+DEFUN ("treesit-node-field-name-for-child",
+ Ftreesit_node_field_name_for_child,
+ Streesit_node_field_name_for_child, 2, 2, 0,
+ doc: /* Return the field name of the Nth child of NODE.
+
+Return nil if there isn't any child or no field is found.
+If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *name
+ = ts_node_field_name_for_child (ts_node, (uint32_t) idx);
+
+ if (name == NULL)
+ return Qnil;
+
+ return make_string (name, strlen (name));
+}
+
+DEFUN ("treesit-node-child-count",
+ Ftreesit_node_child_count,
+ Streesit_node_child_count, 1, 2, 0,
+ doc: /* Return the number of children of NODE.
+
+If NAMED is non-nil, count named child only. NAMED defaults to
+nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ uint32_t count;
+ if (NILP (named))
+ count = ts_node_child_count (ts_node);
+ else
+ count = ts_node_named_child_count (ts_node);
+ return make_fixnum (count);
+}
+
+DEFUN ("treesit-node-child-by-field-name",
+ Ftreesit_node_child_by_field_name,
+ Streesit_node_child_by_field_name, 2, 2, 0,
+ doc: /* Return the child of NODE with FIELD-NAME.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object field_name)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_STRING (field_name);
+ char *name_str = SSDATA (field_name);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child
+ = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str));
+
+ if (ts_node_is_null(child))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-next-sibling",
+ Ftreesit_node_next_sibling,
+ Streesit_node_next_sibling, 1, 2, 0,
+ doc: /* Return the next sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+ if (NILP (named))
+ sibling = ts_node_next_sibling (ts_node);
+ else
+ sibling = ts_node_next_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-prev-sibling",
+ Ftreesit_node_prev_sibling,
+ Streesit_node_prev_sibling, 1, 2, 0,
+ doc: /* Return the previous sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+
+ if (NILP (named))
+ sibling = ts_node_prev_sibling (ts_node);
+ else
+ sibling = ts_node_prev_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-first-child-for-pos",
+ Ftreesit_node_first_child_for_pos,
+ Streesit_node_first_child_for_pos, 2, 3, 0,
+ doc: /* Return the first child of NODE on POS.
+
+Specifically, return the first child that extends beyond POS. POS is
+a position in the buffer. Return nil if there isn't any. If NAMED is
+non-nil, look for named child only. NAMED defaults to nil. Note that
+this function returns an immediate child, not the smallest
+(grand)child. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (pos);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
+
+ if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf))
+ xsignal1 (Qargs_out_of_range, pos);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_first_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+ else
+ child = ts_node_first_named_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-descendant-for-range",
+ Ftreesit_node_descendant_for_range,
+ Streesit_node_descendant_for_range, 3, 4, 0,
+ doc: /* Return the smallest node that covers BEG to END.
+
+The returned node is a descendant of NODE. POS is a position. Return
+nil if there isn't any. If NAMED is non-nil, look for named child
+only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_INTEGER (beg);
+ CHECK_INTEGER (end);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+ ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
+
+ /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END. */
+ if (!(BUF_BEGV_BYTE (buf) <= byte_beg
+ && byte_beg <= byte_end
+ && byte_end <= BUF_ZV_BYTE (buf)))
+ xsignal2 (Qargs_out_of_range, beg, end);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg , byte_end - visible_beg);
+ else
+ child = ts_node_named_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg, byte_end - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-eq",
+ Ftreesit_node_eq,
+ Streesit_node_eq, 2, 2, 0,
+ doc: /* Return non-nil if NODE1 and NODE2 are the same node.
+If any one of NODE1 and NODE2 is nil, return nil. */)
+ (Lisp_Object node1, Lisp_Object node2)
+{
+ if (NILP (node1) || NILP (node2))
+ return Qnil;
+ CHECK_TS_NODE (node1);
+ CHECK_TS_NODE (node2);
+
+ TSNode ts_node_1 = XTS_NODE (node1)->node;
+ TSNode ts_node_2 = XTS_NODE (node2)->node;
+
+ bool same_node = ts_node_eq (ts_node_1, ts_node_2);
+ return same_node ? Qt : Qnil;
+}
+
+/*** Query functions */
+
+/* If we decide to pre-load tree-sitter.el, maybe we can implement
+ this function in Lisp. */
+DEFUN ("treesit-expand-pattern",
+ Ftreesit_expand_pattern,
+ Streesit_expand_pattern, 1, 1, 0,
+ doc: /* Expand PATTERN to its string form.
+
+PATTERN can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object pattern)
+{
+ if (EQ (pattern, intern_c_string (":anchor")))
+ return build_pure_c_string(".");
+ if (EQ (pattern, intern_c_string (":?")))
+ return build_pure_c_string("?");
+ if (EQ (pattern, intern_c_string (":*")))
+ return build_pure_c_string("*");
+ if (EQ (pattern, intern_c_string (":+")))
+ return build_pure_c_string("+");
+ if (EQ (pattern, intern_c_string (":equal")))
+ return build_pure_c_string("#equal");
+ if (EQ (pattern, intern_c_string (":match")))
+ return build_pure_c_string("#match");
+ Lisp_Object opening_delimeter =
+ build_pure_c_string (VECTORP (pattern) ? "[" : "(");
+ Lisp_Object closing_delimiter =
+ build_pure_c_string (VECTORP (pattern) ? "]" : ")");
+ if (VECTORP (pattern) || CONSP (pattern))
+ return concat3 (opening_delimeter,
+ Fmapconcat (intern_c_string
+ ("treesit-expand-pattern"),
+ pattern,
+ build_pure_c_string (" ")),
+ closing_delimiter);
+ return CALLN (Fformat, build_pure_c_string("%S"), pattern);
+}
+
+DEFUN ("treesit-expand-query",
+ Ftreesit_expand_query,
+ Streesit_expand_query, 1, 1, 0,
+ doc: /* Expand sexp QUERY to its string form.
+
+A PATTERN in QUERY can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object query)
+{
+ return Fmapconcat (intern_c_string ("treesit-expand-pattern"),
+ query, build_pure_c_string (" "));
+}
+
+char*
+ts_query_error_to_string (TSQueryError error)
+{
+ switch (error)
+ {
+ case TSQueryErrorNone:
+ return "None";
+ case TSQueryErrorSyntax:
+ return "Syntax error at";
+ case TSQueryErrorNodeType:
+ return "Node type error at";
+ case TSQueryErrorField:
+ return "Field error at";
+ case TSQueryErrorCapture:
+ return "Capture error at";
+ case TSQueryErrorStructure:
+ return "Structure error at";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* Collect predicates for this match and return them in a list. Each
+ predicate is a list of strings and symbols. */
+Lisp_Object
+ts_predicates_for_pattern
+(TSQuery *query, uint32_t pattern_index)
+{
+ uint32_t len;
+ const TSQueryPredicateStep *predicate_list =
+ ts_query_predicates_for_pattern (query, pattern_index, &len);
+ Lisp_Object result = Qnil;
+ Lisp_Object predicate = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSQueryPredicateStep step = predicate_list[idx];
+ switch (step.type)
+ {
+ case TSQueryPredicateStepTypeCapture:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_capture_name_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (intern_c_string_1 (str, str_len),
+ predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeString:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_string_value_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (make_string (str, str_len), predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeDone:
+ result = Fcons (Fnreverse (predicate), result);
+ predicate = Qnil;
+ break;
+ }
+ }
+ return Fnreverse (result);
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+ Signals treesit-query-error if such node is not captured. */
+Lisp_Object
+ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures)
+{
+ Lisp_Object node = Qnil;
+ for (Lisp_Object tail = captures; !NILP (tail); tail = XCDR (tail))
+ {
+ if (EQ (XCAR (XCAR (tail)), name))
+ {
+ node = XCDR (XCAR (tail));
+ break;
+ }
+ }
+
+ if (NILP (node))
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Cannot find captured node"),
+ name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern"));
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal
+ (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
+ Lisp_Object text = Fbuffer_substring
+ (Ftreesit_node_start (node), Ftreesit_node_end (node));
+ set_buffer_internal (old_buffer);
+ return text;
+}
+
+/* Handles predicate (#equal A B). Return true if A equals B; return
+ false otherwise. A and B can be either string, or a capture name.
+ The capture name evaluates to the text its captured node spans in
+ the buffer. */
+bool
+ts_predicate_equal (Lisp_Object args, Lisp_Object captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object arg1 = XCAR (args);
+ Lisp_Object arg2 = XCAR (XCDR (args));
+ Lisp_Object tail = captures;
+ Lisp_Object text1 = STRINGP (arg1) ? arg1 :
+ ts_predicate_capture_name_to_text (arg1, captures);
+ Lisp_Object text2 = STRINGP (arg2) ? arg2 :
+ ts_predicate_capture_name_to_text (arg2, captures);
+
+ if (NILP (Fstring_equal (text1, text2)))
+ return false;
+ else
+ return true;
+}
+
+/* Handles predicate (#match "regexp" @node). Return true if "regexp"
+ matches the text spanned by @node; return false otherwise. Matching
+ is case-sensitive. */
+bool
+ts_predicate_match (Lisp_Object args, Lisp_Object captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object regexp = XCAR (args);
+ Lisp_Object capture_name = XCAR (XCDR (args));
+ Lisp_Object tail = captures;
+ Lisp_Object text = ts_predicate_capture_name_to_text
+ (capture_name, captures);
+
+ /* It's probably common to get the argument order backwards. Catch
+ this mistake early and show helpful explanation, because Emacs
+ loves you. (We put the regexp first because that's what
+ string-match does.) */
+ if (!STRINGP (regexp))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name"));
+ if (!SYMBOLP (capture_name))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string"));
+
+ if (fast_string_match (regexp, text) >= 0)
+ return true;
+ else
+ return false;
+}
+
+/* About predicates: I decide to hard-code predicates in C instead of
+ implementing an extensible system where predicates are translated
+ to Lisp functions, and new predicates can be added by extending a
+ list of functions, because I really couldn't imagine any useful
+ predicates besides equal and match. If we later found out that
+ such system is indeed useful and necessary, it can be easily
+ added. */
+
+/* If all predicates in PREDICATES passes, return true; otherwise
+ return false. */
+bool
+ts_eval_predicates (Lisp_Object captures, Lisp_Object predicates)
+{
+ bool pass = true;
+ /* Evaluate each predicates. */
+ for (Lisp_Object tail = predicates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object predicate = XCAR (tail);
+ Lisp_Object fn = XCAR (predicate);
+ Lisp_Object args = XCDR (predicate);
+ if (!NILP (Fstring_equal (fn, build_pure_c_string("equal"))))
+ pass = ts_predicate_equal (args, captures);
+ else if (!NILP (Fstring_equal
+ (fn, build_pure_c_string("match"))))
+ pass = ts_predicate_match (args, captures);
+ else
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Invalid predicate"),
+ fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate"));
+ }
+ /* If all predicates passed, add captures to result list. */
+ return pass;
+}
+
+DEFUN ("treesit-query-capture",
+ Ftreesit_query_capture,
+ Streesit_query_capture, 2, 4, 0,
+ doc: /* Query NODE with patterns in QUERY.
+
+Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name
+assigned to the node in PATTERN. NODE is the captured node.
+
+QUERY is either a string query or a sexp query. See Info node
+`(elisp)Pattern Matching' for how to write a query in either string or
+s-expression form.
+
+BEG and END, if both non-nil, specifies the range in which the query
+is executed.
+
+Raise an treesit-query-error if QUERY is malformed, or something
+else goes wrong. */)
+ (Lisp_Object node, Lisp_Object query,
+ Lisp_Object beg, Lisp_Object end)
+{
+ ts_check_node (node);
+ if (!NILP (beg))
+ CHECK_INTEGER (beg);
+ if (!NILP (end))
+ CHECK_INTEGER (end);
+
+ if (CONSP (query))
+ query = Ftreesit_expand_query (query);
+ else
+ CHECK_STRING (query);
+
+ /* Extract C values from Lisp objects. */
+ TSNode ts_node = XTS_NODE (node)->node;
+ Lisp_Object lisp_parser = XTS_NODE (node)->parser;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ const TSLanguage *lang = ts_parser_language
+ (XTS_PARSER (lisp_parser)->parser);
+ char *source = SSDATA (query);
+
+ /* Initialize query objects, and execute query. */
+ uint32_t error_offset;
+ TSQueryError error_type;
+ /* TODO: We could cache the query object, so that repeatedly
+ querying with the same query can reuse the query object. It also
+ saves us from expanding the sexp query into a string. I don't
+ know how much time that could save though. */
+ TSQuery *ts_query = ts_query_new (lang, source, strlen (source),
+ &error_offset, &error_type);
+ TSQueryCursor *cursor = ts_query_cursor_new ();
+
+ if (ts_query == NULL)
+ {
+ xsignal2 (Qtreesit_query_error,
+ build_string (ts_query_error_to_string (error_type)),
+ make_fixnum (error_offset + 1));
+ }
+ if (!NILP (beg) && !NILP (end))
+ {
+ EMACS_INT beg_byte = XFIXNUM (beg);
+ EMACS_INT end_byte = XFIXNUM (end);
+ ts_query_cursor_set_byte_range
+ (cursor, (uint32_t) beg_byte - visible_beg,
+ (uint32_t) end_byte - visible_beg);
+ }
+
+ ts_query_cursor_exec (cursor, ts_query, ts_node);
+ TSQueryMatch match;
+
+ /* Go over each match, collect captures and predicates. Include the
+ captures in the return list if all predicates in that match
+ passes. */
+ Lisp_Object result = Qnil;
+ while (ts_query_cursor_next_match (cursor, &match))
+ {
+ /* Get captured nodes. */
+ Lisp_Object captures_lisp = Qnil;
+ const TSQueryCapture *captures = match.captures;
+ for (int idx=0; idx < match.capture_count; idx++)
+ {
+ uint32_t capture_name_len;
+ TSQueryCapture capture = captures[idx];
+ Lisp_Object captured_node =
+ make_ts_node(lisp_parser, capture.node);
+ const char *capture_name = ts_query_capture_name_for_id
+ (ts_query, capture.index, &capture_name_len);
+ Lisp_Object cap =
+ Fcons (intern_c_string_1 (capture_name, capture_name_len),
+ captured_node);
+ captures_lisp = Fcons (cap, captures_lisp);
+ }
+ /* Get predicates. */
+ Lisp_Object predicates =
+ ts_predicates_for_pattern (ts_query, match.pattern_index);
+
+ captures_lisp = Fnreverse (captures_lisp);
+ if (ts_eval_predicates (captures_lisp, predicates))
+ {
+ result = CALLN (Fnconc, result, captures_lisp);
+ }
+ }
+ ts_query_delete (ts_query);
+ ts_query_cursor_delete (cursor);
+ return result;
+}
+
+/*** Initialization */
+
+/* Initialize the tree-sitter routines. */
+void
+syms_of_treesit (void)
+{
+ DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
+ DEFSYM (Qtreesit_node_p, "treesit-node-p");
+ DEFSYM (Qnamed, "named");
+ DEFSYM (Qmissing, "missing");
+ DEFSYM (Qextra, "extra");
+ DEFSYM (Qhas_changes, "has-changes");
+ DEFSYM (Qhas_error, "has-error");
+
+ DEFSYM (Qtreesit_error, "treesit-error");
+ DEFSYM (Qtreesit_query_error, "treesit-query-error");
+ DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
+ DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
+ DEFSYM (Qtreesit_buffer_too_large,
+ "treesit-buffer-too-large");
+ DEFSYM (Qtreesit_load_language_error,
+ "treesit-load-language-error");
+ DEFSYM (Qtreesit_node_outdated,
+ "treesit-node-outdated");
+ DEFSYM (Quser_emacs_directory,
+ "user-emacs-directory");
+
+ define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
+ define_error (Qtreesit_query_error, "Query pattern is malformed",
+ Qtreesit_error);
+ /* Should be impossible, no need to document this error. */
+ define_error (Qtreesit_parse_error, "Parse failed",
+ Qtreesit_error);
+ define_error (Qtreesit_range_invalid,
+ "RANGES are invalid, they have to be ordered and not overlapping",
+ Qtreesit_error);
+ define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)",
+ Qtreesit_error);
+ define_error (Qtreesit_load_language_error,
+ "Cannot load language definition",
+ Qtreesit_error);
+ define_error (Qtreesit_node_outdated,
+ "This node is outdated, please retrieve a new one",
+ Qtreesit_error);
+
+ DEFSYM (Qtreesit_parser_list, "treesit-parser-list");
+ DEFVAR_LISP ("treesit-parser-list", Vtreesit_parser_list,
+ doc: /* A list of tree-sitter parsers.
+
+If you removed a parser from this list, do not put it back in. Emacs
+keeps the parser in this list updated with any change in the buffer.
+If removed and put back in, there is no guarantee that the parser is in
+sync with the buffer's content. */);
+ Vtreesit_parser_list = Qnil;
+ Fmake_variable_buffer_local (Qtreesit_parser_list);
+
+ DEFVAR_LISP ("treesit-load-name-override-list",
+ Vtreesit_load_name_override_list,
+ doc:
+ /* An override list for unconventional tree-sitter libraries.
+
+By default, Emacs assumes the dynamic library for LANG is
+libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
+dynamic libraries. Emacs also assumes that the name of the C function
+the library provides is tree_sitter_LANG. If that is not the case,
+add an entry
+
+ (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
+
+to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
+library without extension, FUNCTION-NAME is the function provided by
+the library. */);
+ Vtreesit_load_name_override_list = Qnil;
+
+ DEFVAR_LISP ("treesit-extra-load-path",
+ Vtreesit_extra_load_path,
+ doc:
+ /* Extra load paths of tree-sitter language definitions.
+When trying to load a tree-sitter language definition,
+Emacs looks at directories in this variable,
+`user-emacs-directory'/tree-sitter, and system default locations for
+dynamic libraries, in that order. */);
+ Vtreesit_extra_load_path = Qnil;
+
+ defsubr (&Streesit_language_available_p);
+
+ defsubr (&Streesit_parser_p);
+ defsubr (&Streesit_node_p);
+
+ defsubr (&Streesit_node_parser);
+
+ defsubr (&Streesit_parser_create);
+ defsubr (&Streesit_parser_buffer);
+ defsubr (&Streesit_parser_language);
+
+ defsubr (&Streesit_parser_root_node);
+ /* defsubr (&Streesit_parse_string); */
+
+ defsubr (&Streesit_parser_set_included_ranges);
+ defsubr (&Streesit_parser_included_ranges);
+
+ defsubr (&Streesit_node_type);
+ defsubr (&Streesit_node_start);
+ defsubr (&Streesit_node_end);
+ defsubr (&Streesit_node_string);
+ defsubr (&Streesit_node_parent);
+ defsubr (&Streesit_node_child);
+ defsubr (&Streesit_node_check);
+ defsubr (&Streesit_node_field_name_for_child);
+ defsubr (&Streesit_node_child_count);
+ defsubr (&Streesit_node_child_by_field_name);
+ defsubr (&Streesit_node_next_sibling);
+ defsubr (&Streesit_node_prev_sibling);
+ defsubr (&Streesit_node_first_child_for_pos);
+ defsubr (&Streesit_node_descendant_for_range);
+ defsubr (&Streesit_node_eq);
+
+ defsubr (&Streesit_expand_pattern);
+ defsubr (&Streesit_expand_query);
+ defsubr (&Streesit_query_capture);
+}
diff --git a/src/treesit.h b/src/treesit.h
new file mode 100644
index 00000000000..639c4eedc55
--- /dev/null
+++ b/src/treesit.h
@@ -0,0 +1,137 @@
+/* Header file for the tree-sitter integration.
+
+Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#ifndef EMACS_TREESIT_H
+#define EMACS_TREESIT_H
+
+#include <tree_sitter/api.h>
+#include "lisp.h"
+
+INLINE_HEADER_BEGIN
+
+/* A wrapper for a tree-sitter parser, but also contains a parse tree
+ and other goodies for convenience. */
+struct Lisp_TS_Parser
+{
+ union vectorlike_header header;
+ /* A symbol represents the language this parser uses. See the
+ manual for more explanation. */
+ Lisp_Object language_symbol;
+ /* The buffer associated with this parser. */
+ Lisp_Object buffer;
+ /* The pointer to the tree-sitter parser. Never NULL. */
+ TSParser *parser;
+ /* Pointer to the syntax tree. Initially is NULL, so check for NULL
+ before use. */
+ TSTree *tree;
+ /* Teaches tree-sitter how to read an Emacs buffer. */
+ TSInput input;
+ /* Re-parsing an unchanged buffer is not free for tree-sitter, so we
+ only make it re-parse when need_reparse == true. That usually
+ means some change is made in the buffer. But others could set
+ this field to true to force tree-sitter to re-parse. */
+ bool need_reparse;
+ /* These two positions record the buffer byte position (1-based) of
+ the "visible region" that tree-sitter sees. Unlike markers,
+ These two positions do not change as the user inserts and deletes
+ text around them. Before re-parse, we move these positions to
+ match BUF_BEGV_BYTE and BUF_ZV_BYTE. Note that we don't need to
+ synchronize these positions when retrieving them in a function
+ that involves a node: if the node is not outdated, these
+ positions are synchronized. */
+ ptrdiff_t visible_beg;
+ ptrdiff_t visible_end;
+ /* This counter is incremented every time a change is made to the
+ buffer in ts_record_change. The node retrieved from this parser
+ inherits this timestamp. This way we can make sure the node is
+ not outdated when we access its information. */
+ ptrdiff_t timestamp;
+};
+
+/* A wrapper around a tree-sitter node. */
+struct Lisp_TS_Node
+{
+ union vectorlike_header header;
+ /* This prevents gc from collecting the tree before the node is done
+ with it. TSNode contains a pointer to the tree it belongs to,
+ and the parser object, when collected by gc, will free that
+ tree. */
+ Lisp_Object parser;
+ TSNode node;
+ /* A node inherits its parser's timestamp at creation time. The
+ parser's timestamp increments as the buffer changes. This way we
+ can make sure the node is not outdated when we access its
+ information. */
+ ptrdiff_t timestamp;
+};
+
+INLINE bool
+TS_PARSERP (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_PARSER);
+}
+
+INLINE struct Lisp_TS_Parser *
+XTS_PARSER (Lisp_Object a)
+{
+ eassert (TS_PARSERP (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Parser);
+}
+
+INLINE bool
+TS_NODEP (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_NODE);
+}
+
+INLINE struct Lisp_TS_Node *
+XTS_NODE (Lisp_Object a)
+{
+ eassert (TS_NODEP (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node);
+}
+
+INLINE void
+CHECK_TS_PARSER (Lisp_Object parser)
+{
+ CHECK_TYPE (TS_PARSERP (parser), Qtreesit_parser_p, parser);
+}
+
+INLINE void
+CHECK_TS_NODE (Lisp_Object node)
+{
+ CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node);
+}
+
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte);
+
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol);
+
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node);
+
+extern void syms_of_treesit (void);
+
+INLINE_HEADER_END
+
+#endif /* EMACS_TREESIT_H */