summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--admin/notes/tree-sitter/build-module/README17
-rwxr-xr-xadmin/notes/tree-sitter/build-module/batch.sh20
-rwxr-xr-xadmin/notes/tree-sitter/build-module/build.sh62
-rw-r--r--admin/notes/tree-sitter/html-manual/Accessing-Node.html206
-rw-r--r--admin/notes/tree-sitter/html-manual/Language-Definitions.html326
-rw-r--r--admin/notes/tree-sitter/html-manual/Multiple-Languages.html255
-rw-r--r--admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html160
-rw-r--r--admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html244
-rw-r--r--admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html125
-rw-r--r--admin/notes/tree-sitter/html-manual/Pattern-Matching.html430
-rw-r--r--admin/notes/tree-sitter/html-manual/Retrieving-Node.html362
-rw-r--r--admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html212
-rw-r--r--admin/notes/tree-sitter/html-manual/Using-Parser.html186
-rwxr-xr-xadmin/notes/tree-sitter/html-manual/build-manual.sh23
-rw-r--r--admin/notes/tree-sitter/html-manual/manual.css374
-rw-r--r--admin/notes/tree-sitter/starter-guide442
-rw-r--r--configure.ac59
-rw-r--r--doc/lispref/elisp.texi12
-rw-r--r--doc/lispref/modes.texi271
-rw-r--r--doc/lispref/parsing.texi1515
-rw-r--r--lisp/emacs-lisp/cl-preloaded.el3
-rw-r--r--lisp/progmodes/python.el285
-rw-r--r--lisp/treesit.el935
-rw-r--r--src/Makefile.in10
-rw-r--r--src/alloc.c20
-rw-r--r--src/buffer.c16
-rw-r--r--src/buffer.h4
-rw-r--r--src/casefiddle.c12
-rw-r--r--src/data.c9
-rw-r--r--src/emacs.c7
-rw-r--r--src/eval.c13
-rw-r--r--src/insdel.c47
-rw-r--r--src/json.c16
-rw-r--r--src/lisp.h10
-rw-r--r--src/lread.c8
-rw-r--r--src/print.c34
-rw-r--r--src/treesit.c2327
-rw-r--r--src/treesit.h171
-rw-r--r--test/src/treesit-tests.el450
39 files changed, 9636 insertions, 42 deletions
diff --git a/admin/notes/tree-sitter/build-module/README b/admin/notes/tree-sitter/build-module/README
new file mode 100644
index 00000000000..ee6076c119c
--- /dev/null
+++ b/admin/notes/tree-sitter/build-module/README
@@ -0,0 +1,17 @@
+To build the language definition for a particular language, run
+
+ ./build.sh <language>
+
+eg,
+
+ ./build.sh html
+
+The dynamic module will be in /dist directory
+
+To build all modules at once, run
+
+ ./batch.sh
+
+This gives you C, JSON, Go, HTML, Javascript, CSS, Python, Typescript,
+C#, C++, Rust. More can be added to batch.sh unless it's directory
+strucure is not standard. \ No newline at end of file
diff --git a/admin/notes/tree-sitter/build-module/batch.sh b/admin/notes/tree-sitter/build-module/batch.sh
new file mode 100755
index 00000000000..deed18978a1
--- /dev/null
+++ b/admin/notes/tree-sitter/build-module/batch.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+languages=(
+ 'c'
+ 'cpp'
+ 'css'
+ 'c-sharp'
+ 'go'
+ 'html'
+ 'javascript'
+ 'json'
+ 'python'
+ 'rust'
+ 'typescript'
+)
+
+for language in "${languages[@]}"
+do
+ ./build.sh $language
+done
diff --git a/admin/notes/tree-sitter/build-module/build.sh b/admin/notes/tree-sitter/build-module/build.sh
new file mode 100755
index 00000000000..16792d05cbb
--- /dev/null
+++ b/admin/notes/tree-sitter/build-module/build.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+lang=$1
+
+if [ $(uname) == "Darwin" ]
+then
+ soext="dylib"
+else
+ soext="so"
+fi
+
+echo "Building ${lang}"
+
+# Retrieve sources.
+git clone "https://github.com/tree-sitter/tree-sitter-${lang}.git" \
+ --depth 1 --quiet
+if [ "${lang}" == "typescript" ]
+then
+ lang="typescript/tsx"
+fi
+cp tree-sitter-lang.in "tree-sitter-${lang}/src"
+cp emacs-module.h "tree-sitter-${lang}/src"
+cp "tree-sitter-${lang}/grammar.js" "tree-sitter-${lang}/src"
+cd "tree-sitter-${lang}/src"
+
+if [ "${lang}" == "typescript/tsx" ]
+then
+ lang="typescript"
+fi
+
+# Build.
+cc -c -I. parser.c
+# Compile scanner.c.
+if test -f scanner.c
+then
+ cc -fPIC -c -I. scanner.c
+fi
+# Compile scanner.cc.
+if test -f scanner.cc
+then
+ c++ -fPIC -I. -c scanner.cc
+fi
+# Link.
+if test -f scanner.cc
+then
+ c++ -fPIC -shared *.o -o "libtree-sitter-${lang}.${soext}"
+else
+ cc -fPIC -shared *.o -o "libtree-sitter-${lang}.${soext}"
+fi
+
+# Copy out.
+
+if [ "${lang}" == "typescript" ]
+then
+ cp "libtree-sitter-${lang}.${soext}" ..
+ cd ..
+fi
+
+mkdir -p ../../dist
+cp "libtree-sitter-${lang}.${soext}" ../../dist
+cd ../../
+rm -rf "tree-sitter-${lang}"
diff --git a/admin/notes/tree-sitter/html-manual/Accessing-Node.html b/admin/notes/tree-sitter/html-manual/Accessing-Node.html
new file mode 100644
index 00000000000..00ac63b8339
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Accessing-Node.html
@@ -0,0 +1,206 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Accessing Node (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Accessing Node (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Accessing Node (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Pattern-Matching.html" rel="next" title="Pattern Matching">
+<link href="Retrieving-Node.html" rel="prev" title="Retrieving Node">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Accessing-Node">
+<div class="header">
+<p>
+Next: <a href="Pattern-Matching.html" accesskey="n" rel="next">Pattern Matching Tree-sitter Nodes</a>, Previous: <a href="Retrieving-Node.html" accesskey="p" rel="prev">Retrieving Node</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Accessing-Node-Information"></span><h3 class="section">37.4 Accessing Node Information</h3>
+
+<p>Before going further, make sure you have read the basic conventions
+about tree-sitter nodes in the previous node.
+</p>
+<span id="Basic-information"></span><h3 class="heading">Basic information</h3>
+
+<p>Every node is associated with a parser, and that parser is associated
+with a buffer. The following functions let you retrieve them.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dnode_002dparser"><span class="category">Function: </span><span><strong>treesit-node-parser</strong> <em>node</em><a href='#index-treesit_002dnode_002dparser' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns <var>node</var>&rsquo;s associated parser.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dbuffer"><span class="category">Function: </span><span><strong>treesit-node-buffer</strong> <em>node</em><a href='#index-treesit_002dnode_002dbuffer' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns <var>node</var>&rsquo;s parser&rsquo;s associated buffer.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dlanguage"><span class="category">Function: </span><span><strong>treesit-node-language</strong> <em>node</em><a href='#index-treesit_002dnode_002dlanguage' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns <var>node</var>&rsquo;s parser&rsquo;s associated language.
+</p></dd></dl>
+
+<p>Each node represents a piece of text in the buffer. Functions below
+finds relevant information about that text.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dnode_002dstart"><span class="category">Function: </span><span><strong>treesit-node-start</strong> <em>node</em><a href='#index-treesit_002dnode_002dstart' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Return the start position of <var>node</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dend"><span class="category">Function: </span><span><strong>treesit-node-end</strong> <em>node</em><a href='#index-treesit_002dnode_002dend' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Return the end position of <var>node</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dtext"><span class="category">Function: </span><span><strong>treesit-node-text</strong> <em>node &amp;optional object</em><a href='#index-treesit_002dnode_002dtext' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Returns the buffer text that <var>node</var> represents. (If <var>node</var> is
+retrieved from parsing a string, it will be text from that string.)
+</p></dd></dl>
+
+<p>Here are some basic checks on tree-sitter nodes.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dnode_002dp"><span class="category">Function: </span><span><strong>treesit-node-p</strong> <em>object</em><a href='#index-treesit_002dnode_002dp' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Checks if <var>object</var> is a tree-sitter syntax node.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002deq"><span class="category">Function: </span><span><strong>treesit-node-eq</strong> <em>node1 node2</em><a href='#index-treesit_002dnode_002deq' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Checks if <var>node1</var> and <var>node2</var> are the same node in a syntax
+tree.
+</p></dd></dl>
+
+<span id="Property-information"></span><h3 class="heading">Property information</h3>
+
+<p>In general, nodes in a concrete syntax tree fall into two categories:
+<em>named nodes</em> and <em>anonymous nodes</em>. Whether a node is named
+or anonymous is determined by the language definition
+(see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>).
+</p>
+<span id="index-tree_002dsitter-missing-node"></span>
+<p>Apart from being named/anonymous, a node can have other properties. A
+node can be &ldquo;missing&rdquo;: missing nodes are inserted by the parser in
+order to recover from certain kinds of syntax errors, i.e., something
+should probably be there according to the grammar, but not there.
+</p>
+<span id="index-tree_002dsitter-extra-node"></span>
+<p>A node can be &ldquo;extra&rdquo;: extra nodes represent things like comments,
+which can appear anywhere in the text.
+</p>
+<span id="index-tree_002dsitter-node-that-has-changes"></span>
+<p>A node &ldquo;has changes&rdquo; if the buffer changed since when the node is
+retrieved, i.e., outdated.
+</p>
+<span id="index-tree_002dsitter-node-that-has-error"></span>
+<p>A node &ldquo;has error&rdquo; if the text it spans contains a syntax error. It
+can be the node itself has an error, or one of its
+children/grandchildren... has an error.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dnode_002dcheck"><span class="category">Function: </span><span><strong>treesit-node-check</strong> <em>node property</em><a href='#index-treesit_002dnode_002dcheck' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function checks if <var>node</var> has <var>property</var>. <var>property</var>
+can be <code>'named</code>, <code>'missing</code>, <code>'extra</code>,
+<code>'has-changes</code>, or <code>'has-error</code>.
+</p></dd></dl>
+
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dtype"><span class="category">Function: </span><span><strong>treesit-node-type</strong> <em>node</em><a href='#index-treesit_002dnode_002dtype' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Named nodes have &ldquo;types&rdquo; (see <a href="Language-Definitions.html#tree_002dsitter-node-type">node type</a>).
+For example, a named node can be a <code>string_literal</code> node, where
+<code>string_literal</code> is its type.
+</p>
+<p>This function returns <var>node</var>&rsquo;s type as a string.
+</p></dd></dl>
+
+<span id="Information-as-a-child-or-parent"></span><h3 class="heading">Information as a child or parent</h3>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dindex"><span class="category">Function: </span><span><strong>treesit-node-index</strong> <em>node &amp;optional named</em><a href='#index-treesit_002dnode_002dindex' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the index of <var>node</var> as a child node of its
+parent. If <var>named</var> is non-nil, it only count named nodes
+(see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>).
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dfield_002dname"><span class="category">Function: </span><span><strong>treesit-node-field-name</strong> <em>node</em><a href='#index-treesit_002dnode_002dfield_002dname' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>A child of a parent node could have a field name (see <a href="Language-Definitions.html#tree_002dsitter-node-field-name">field name</a>). This function returns the field name
+of <var>node</var> as a child of its parent.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dfield_002dname_002dfor_002dchild"><span class="category">Function: </span><span><strong>treesit-node-field-name-for-child</strong> <em>node n</em><a href='#index-treesit_002dnode_002dfield_002dname_002dfor_002dchild' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the field name of the <var>n</var>&rsquo;th child of
+<var>node</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dchild_002dcount"><span class="category">Function: </span><span><strong>treesit-child-count</strong> <em>node &amp;optional named</em><a href='#index-treesit_002dchild_002dcount' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the number of children of <var>node</var>. If
+<var>named</var> is non-nil, it only counts named child (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>).
+</p></dd></dl>
+
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>, Previous: <a href="Retrieving-Node.html">Retrieving Node</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Language-Definitions.html b/admin/notes/tree-sitter/html-manual/Language-Definitions.html
new file mode 100644
index 00000000000..ba3eeb9eeb9
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Language-Definitions.html
@@ -0,0 +1,326 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Language Definitions (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Language Definitions (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Language Definitions (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Using-Parser.html" rel="next" title="Using Parser">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Language-Definitions">
+<div class="header">
+<p>
+Next: <a href="Using-Parser.html" accesskey="n" rel="next">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Tree_002dsitter-Language-Definitions"></span><h3 class="section">37.1 Tree-sitter Language Definitions</h3>
+
+<span id="Loading-a-language-definition"></span><h3 class="heading">Loading a language definition</h3>
+
+<p>Tree-sitter relies on language definitions to parse text in that
+language. In Emacs, A language definition is represented by a symbol.
+For example, C language definition is represented as <code>c</code>, and
+<code>c</code> can be passed to tree-sitter functions as the <var>language</var>
+argument.
+</p>
+<span id="index-treesit_002dextra_002dload_002dpath"></span>
+<span id="index-treesit_002dload_002dlanguage_002derror"></span>
+<span id="index-treesit_002dload_002dsuffixes"></span>
+<p>Tree-sitter language definitions are distributed as dynamic libraries.
+In order to use a language definition in Emacs, you need to make sure
+that the dynamic library is installed on the system. Emacs looks for
+language definitions under load paths in
+<code>treesit-extra-load-path</code>, <code>user-emacs-directory</code>/tree-sitter,
+and system default locations for dynamic libraries, in that order.
+Emacs tries each extensions in <code>treesit-load-suffixes</code>. If Emacs
+cannot find the library or has problem loading it, Emacs signals
+<code>treesit-load-language-error</code>. The signal data is a list of
+specific error messages.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dlanguage_002davailable_002dp"><span class="category">Function: </span><span><strong>treesit-language-available-p</strong> <em>language</em><a href='#index-treesit_002dlanguage_002davailable_002dp' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function checks whether the dynamic library for <var>language</var> is
+present on the system, and return non-nil if it is.
+</p></dd></dl>
+
+<span id="index-treesit_002dload_002dname_002doverride_002dlist"></span>
+<p>By convention, the dynamic library for <var>language</var> is
+<code>libtree-sitter-<var>language</var>.<var>ext</var></code>, where <var>ext</var> is the
+system-specific extension for dynamic libraries. Also by convention,
+the function provided by that library is named
+<code>tree_sitter_<var>language</var></code>. If a language definition doesn&rsquo;t
+follow this convention, you should add an entry
+</p>
+<div class="example">
+<pre class="example">(<var>language</var> <var>library-base-name</var> <var>function-name</var>)
+</pre></div>
+
+<p>to <code>treesit-load-name-override-list</code>, where
+<var>library-base-name</var> is the base filename for the dynamic library
+(conventionally <code>libtree-sitter-<var>language</var></code>), and
+<var>function-name</var> is the function provided by the library
+(conventionally <code>tree_sitter_<var>language</var></code>). For example,
+</p>
+<div class="example">
+<pre class="example">(cool-lang &quot;libtree-sitter-coool&quot; &quot;tree_sitter_cooool&quot;)
+</pre></div>
+
+<p>for a language too cool to abide by conventions.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dlanguage_002dversion"><span class="category">Function: </span><span><strong>treesit-language-version</strong> <em>&amp;optional min-compatible</em><a href='#index-treesit_002dlanguage_002dversion' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Tree-sitter library has a <em>language version</em>, a language
+definition&rsquo;s version needs to match this version to be compatible.
+</p>
+<p>This function returns tree-sitter library’s language version. If
+<var>min-compatible</var> is non-nil, it returns the minimal compatible
+version.
+</p></dd></dl>
+
+<span id="Concrete-syntax-tree"></span><h3 class="heading">Concrete syntax tree</h3>
+
+<p>A syntax tree is what a parser generates. In a syntax tree, each node
+represents a piece of text, and is connected to each other by a
+parent-child relationship. For example, if the source text is
+</p>
+<div class="example">
+<pre class="example">1 + 2
+</pre></div>
+
+<p>its syntax tree could be
+</p>
+<div class="example">
+<pre class="example"> +--------------+
+ | root &quot;1 + 2&quot; |
+ +--------------+
+ |
+ +--------------------------------+
+ | expression &quot;1 + 2&quot; |
+ +--------------------------------+
+ | | |
++------------+ +--------------+ +------------+
+| number &quot;1&quot; | | operator &quot;+&quot; | | number &quot;2&quot; |
++------------+ +--------------+ +------------+
+</pre></div>
+
+<p>We can also represent it in s-expression:
+</p>
+<div class="example">
+<pre class="example">(root (expression (number) (operator) (number)))
+</pre></div>
+
+<span id="Node-types"></span><h4 class="subheading">Node types</h4>
+
+<span id="index-tree_002dsitter-node-type"></span>
+<span id="tree_002dsitter-node-type"></span><span id="index-tree_002dsitter-named-node"></span>
+<span id="tree_002dsitter-named-node"></span><span id="index-tree_002dsitter-anonymous-node"></span>
+<p>Names like <code>root</code>, <code>expression</code>, <code>number</code>,
+<code>operator</code> are nodes&rsquo; <em>type</em>. However, not all nodes in a
+syntax tree have a type. Nodes that don&rsquo;t are <em>anonymous nodes</em>,
+and nodes with a type are <em>named nodes</em>. Anonymous nodes are
+tokens with fixed spellings, including punctuation characters like
+bracket &lsquo;<samp>]</samp>&rsquo;, and keywords like <code>return</code>.
+</p>
+<span id="Field-names"></span><h4 class="subheading">Field names</h4>
+
+<span id="index-tree_002dsitter-node-field-name"></span>
+<span id="tree_002dsitter-node-field-name"></span><p>To make the syntax tree easier to
+analyze, many language definitions assign <em>field names</em> to child
+nodes. For example, a <code>function_definition</code> node could have a
+<code>declarator</code> and a <code>body</code>:
+</p>
+<div class="example">
+<pre class="example">(function_definition
+ declarator: (declaration)
+ body: (compound_statement))
+</pre></div>
+
+<dl class="def">
+<dt id="index-treesit_002dinspect_002dmode"><span class="category">Command: </span><span><strong>treesit-inspect-mode</strong><a href='#index-treesit_002dinspect_002dmode' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This minor mode displays the node that <em>starts</em> at point in
+mode-line. The mode-line will display
+</p>
+<div class="example">
+<pre class="example"><var>parent</var> <var>field-name</var>: (<var>child</var> (<var>grand-child</var> (...)))
+</pre></div>
+
+<p><var>child</var>, <var>grand-child</var>, and <var>grand-grand-child</var>, etc, are
+nodes that have their beginning at point. And <var>parent</var> is the
+parent of <var>child</var>.
+</p>
+<p>If there is no node that starts at point, i.e., point is in the middle
+of a node, then the mode-line only displays the smallest node that
+spans point, and its immediate parent.
+</p>
+<p>This minor mode doesn&rsquo;t create parsers on its own. It simply uses the
+first parser in <code>(treesit-parser-list)</code> (see <a href="Using-Parser.html">Using Tree-sitter Parser</a>).
+</p></dd></dl>
+
+<span id="Reading-the-grammar-definition"></span><h3 class="heading">Reading the grammar definition</h3>
+
+<p>Authors of language definitions define the <em>grammar</em> of a
+language, and this grammar determines how does a parser construct a
+concrete syntax tree out of the text. In order to use the syntax
+tree effectively, we need to read the <em>grammar file</em>.
+</p>
+<p>The grammar file is usually <code>grammar.js</code> in a language
+definition’s project repository. The link to a language definition’s
+home page can be found in tree-sitter’s homepage
+(<a href="https://tree-sitter.github.io/tree-sitter">https://tree-sitter.github.io/tree-sitter</a>).
+</p>
+<p>The grammar is written in JavaScript syntax. For example, the rule
+matching a <code>function_definition</code> node looks like
+</p>
+<div class="example">
+<pre class="example">function_definition: $ =&gt; seq(
+ $.declaration_specifiers,
+ field('declarator', $.declaration),
+ field('body', $.compound_statement)
+)
+</pre></div>
+
+<p>The rule is represented by a function that takes a single argument
+<var>$</var>, representing the whole grammar. The function itself is
+constructed by other functions: the <code>seq</code> function puts together a
+sequence of children; the <code>field</code> function annotates a child with
+a field name. If we write the above definition in BNF syntax, it
+would look like
+</p>
+<div class="example">
+<pre class="example">function_definition :=
+ &lt;declaration_specifiers&gt; &lt;declaration&gt; &lt;compound_statement&gt;
+</pre></div>
+
+<p>and the node returned by the parser would look like
+</p>
+<div class="example">
+<pre class="example">(function_definition
+ (declaration_specifier)
+ declarator: (declaration)
+ body: (compound_statement))
+</pre></div>
+
+<p>Below is a list of functions that one will see in a grammar
+definition. Each function takes other rules as arguments and returns
+a new rule.
+</p>
+<ul>
+<li> <code>seq(rule1, rule2, ...)</code> matches each rule one after another.
+
+</li><li> <code>choice(rule1, rule2, ...)</code> matches one of the rules in its
+arguments.
+
+</li><li> <code>repeat(rule)</code> matches <var>rule</var> for <em>zero or more</em> times.
+This is like the &lsquo;<samp>*</samp>&rsquo; operator in regular expressions.
+
+</li><li> <code>repeat1(rule)</code> matches <var>rule</var> for <em>one or more</em> times.
+This is like the &lsquo;<samp>+</samp>&rsquo; operator in regular expressions.
+
+</li><li> <code>optional(rule)</code> matches <var>rule</var> for <em>zero or one</em> time.
+This is like the &lsquo;<samp>?</samp>&rsquo; operator in regular expressions.
+
+</li><li> <code>field(name, rule)</code> assigns field name <var>name</var> to the child
+node matched by <var>rule</var>.
+
+</li><li> <code>alias(rule, alias)</code> makes nodes matched by <var>rule</var> appear as
+<var>alias</var> in the syntax tree generated by the parser. For example,
+
+<div class="example">
+<pre class="example">alias(preprocessor_call_exp, call_expression)
+</pre></div>
+
+<p>makes any node matched by <code>preprocessor_call_exp</code> to appear as
+<code>call_expression</code>.
+</p></li></ul>
+
+<p>Below are grammar functions less interesting for a reader of a
+language definition.
+</p>
+<ul>
+<li> <code>token(rule)</code> marks <var>rule</var> to produce a single leaf node.
+That is, instead of generating a parent node with individual child
+nodes under it, everything is combined into a single leaf node.
+
+</li><li> Normally, grammar rules ignore preceding whitespaces,
+<code>token.immediate(rule)</code> changes <var>rule</var> to match only when
+there is no preceding whitespaces.
+
+</li><li> <code>prec(n, rule)</code> gives <var>rule</var> a level <var>n</var> precedence.
+
+</li><li> <code>prec.left([n,] rule)</code> marks <var>rule</var> as left-associative,
+optionally with level <var>n</var>.
+
+</li><li> <code>prec.right([n,] rule)</code> marks <var>rule</var> as right-associative,
+optionally with level <var>n</var>.
+
+</li><li> <code>prec.dynamic(n, rule)</code> is like <code>prec</code>, but the precedence
+is applied at runtime instead.
+</li></ul>
+
+<p>The tree-sitter project talks about writing a grammar in more detail:
+<a href="https://tree-sitter.github.io/tree-sitter/creating-parsers">https://tree-sitter.github.io/tree-sitter/creating-parsers</a>.
+Read especially &ldquo;The Grammar DSL&rdquo; section.
+</p>
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Using-Parser.html">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Multiple-Languages.html b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html
new file mode 100644
index 00000000000..1ee2df7f442
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Multiple-Languages.html
@@ -0,0 +1,255 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Multiple Languages (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Multiple Languages (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Multiple Languages (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Tree_002dsitter-C-API.html" rel="next" title="Tree-sitter C API">
+<link href="Pattern-Matching.html" rel="prev" title="Pattern Matching">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Multiple-Languages">
+<div class="header">
+<p>
+Next: <a href="Tree_002dsitter-C-API.html" accesskey="n" rel="next">Tree-sitter C API Correspondence</a>, Previous: <a href="Pattern-Matching.html" accesskey="p" rel="prev">Pattern Matching Tree-sitter Nodes</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Parsing-Text-in-Multiple-Languages"></span><h3 class="section">37.6 Parsing Text in Multiple Languages</h3>
+
+<p>Sometimes, the source of a programming language could contain sources
+of other languages, HTML + CSS + JavaScript is one example. In that
+case, we need to assign individual parsers to text segments written in
+different languages. Traditionally this is achieved by using
+narrowing. While tree-sitter works with narrowing (see <a href="Using-Parser.html#tree_002dsitter-narrowing">narrowing</a>), the recommended way is to set ranges in which
+a parser will operate.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dparser_002dset_002dincluded_002dranges"><span class="category">Function: </span><span><strong>treesit-parser-set-included-ranges</strong> <em>parser ranges</em><a href='#index-treesit_002dparser_002dset_002dincluded_002dranges' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function sets the range of <var>parser</var> to <var>ranges</var>. Then
+<var>parser</var> will only read the text covered in each range. Each
+range in <var>ranges</var> is a list of cons <code>(<var>beg</var>
+. <var>end</var>)</code>.
+</p>
+<p>Each range in <var>ranges</var> must come in order and not overlap. That
+is, in pseudo code:
+</p>
+<div class="example">
+<pre class="example">(cl-loop for idx from 1 to (1- (length ranges))
+ for prev = (nth (1- idx) ranges)
+ for next = (nth idx ranges)
+ should (&lt;= (car prev) (cdr prev)
+ (car next) (cdr next)))
+</pre></div>
+
+<span id="index-treesit_002drange_002dinvalid"></span>
+<p>If <var>ranges</var> violates this constraint, or something else went
+wrong, this function signals a <code>treesit-range-invalid</code>. The
+signal data contains a specific error message and the ranges we are
+trying to set.
+</p>
+<p>This function can also be used for disabling ranges. If <var>ranges</var>
+is nil, the parser is set to parse the whole buffer.
+</p>
+<p>Example:
+</p>
+<div class="example">
+<pre class="example">(treesit-parser-set-included-ranges
+ parser '((1 . 9) (16 . 24) (24 . 25)))
+</pre></div>
+</dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparser_002dincluded_002dranges"><span class="category">Function: </span><span><strong>treesit-parser-included-ranges</strong> <em>parser</em><a href='#index-treesit_002dparser_002dincluded_002dranges' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the ranges set for <var>parser</var>. The return
+value is the same as the <var>ranges</var> argument of
+<code>treesit-parser-included-ranges</code>: a list of cons
+<code>(<var>beg</var> . <var>end</var>)</code>. And if <var>parser</var> doesn&rsquo;t have any
+ranges, the return value is nil.
+</p>
+<div class="example">
+<pre class="example">(treesit-parser-included-ranges parser)
+ &rArr; ((1 . 9) (16 . 24) (24 . 25))
+</pre></div>
+</dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dset_002dranges"><span class="category">Function: </span><span><strong>treesit-set-ranges</strong> <em>parser-or-lang ranges</em><a href='#index-treesit_002dset_002dranges' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Like <code>treesit-parser-set-included-ranges</code>, this function sets
+the ranges of <var>parser-or-lang</var> to <var>ranges</var>. Conveniently,
+<var>parser-or-lang</var> could be either a parser or a language. If it is
+a language, this function looks for the first parser in
+<code>(treesit-parser-list)</code> for that language in the current buffer,
+and set range for it.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dget_002dranges"><span class="category">Function: </span><span><strong>treesit-get-ranges</strong> <em>parser-or-lang</em><a href='#index-treesit_002dget_002dranges' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the ranges of <var>parser-or-lang</var>, like
+<code>treesit-parser-included-ranges</code>. And like
+<code>treesit-set-ranges</code>, <var>parser-or-lang</var> can be a parser or
+a language symbol.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dquery_002drange"><span class="category">Function: </span><span><strong>treesit-query-range</strong> <em>source query &amp;optional beg end</em><a href='#index-treesit_002dquery_002drange' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function matches <var>source</var> with <var>query</var> and returns the
+ranges of captured nodes. The return value has the same shape of
+other functions: a list of <code>(<var>beg</var> . <var>end</var>)</code>.
+</p>
+<p>For convenience, <var>source</var> can be a language symbol, a parser, or a
+node. If a language symbol, this function matches in the root node of
+the first parser using that language; if a parser, this function
+matches in the root node of that parser; if a node, this function
+matches in that node.
+</p>
+<p>Parameter <var>query</var> is the query used to capture nodes
+(see <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>). The capture names don&rsquo;t matter. Parameter
+<var>beg</var> and <var>end</var>, if both non-nil, limits the range in which
+this function queries.
+</p>
+<p>Like other query functions, this function raises an
+<var>treesit-query-error</var> if <var>query</var> is malformed.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dlanguage_002dat"><span class="category">Function: </span><span><strong>treesit-language-at</strong> <em>point</em><a href='#index-treesit_002dlanguage_002dat' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function tries to figure out which language is responsible for
+the text at <var>point</var>. It goes over each parser in
+<code>(treesit-parser-list)</code> and see if that parser&rsquo;s range covers
+<var>point</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002drange_002dfunctions"><span class="category">Variable: </span><span><strong>treesit-range-functions</strong><a href='#index-treesit_002drange_002dfunctions' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>A list of range functions. Font-locking and indenting code uses
+functions in this alist to set correct ranges for a language parser
+before using it.
+</p>
+<p>The signature of each function should be
+</p>
+<div class="example">
+<pre class="example">(<var>start</var> <var>end</var> &amp;rest <var>_</var>)
+</pre></div>
+
+<p>where <var>start</var> and <var>end</var> marks the region that is about to be
+used. A range function only need to (but not limited to) update
+ranges in that region.
+</p>
+<p>Each function in the list is called in-order.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dupdate_002dranges"><span class="category">Function: </span><span><strong>treesit-update-ranges</strong> <em>&amp;optional start end</em><a href='#index-treesit_002dupdate_002dranges' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function is used by font-lock and indent to update ranges before
+using any parser. Each range function in
+<var>treesit-range-functions</var> is called in-order. Arguments
+<var>start</var> and <var>end</var> are passed to each range function.
+</p></dd></dl>
+
+<span id="An-example"></span><h3 class="heading">An example</h3>
+
+<p>Normally, in a set of languages that can be mixed together, there is a
+major language and several embedded languages. We first parse the
+whole document with the major language’s parser, set ranges for the
+embedded languages, then parse the embedded languages.
+</p>
+<p>Suppose we want to parse a very simple document that mixes HTML, CSS
+and JavaScript:
+</p>
+<div class="example">
+<pre class="example">&lt;html&gt;
+ &lt;script&gt;1 + 2&lt;/script&gt;
+ &lt;style&gt;body { color: &quot;blue&quot;; }&lt;/style&gt;
+&lt;/html&gt;
+</pre></div>
+
+<p>We first parse with HTML, then set ranges for CSS and JavaScript:
+</p>
+<div class="example">
+<pre class="example">;; Create parsers.
+(setq html (treesit-get-parser-create 'html))
+(setq css (treesit-get-parser-create 'css))
+(setq js (treesit-get-parser-create 'javascript))
+
+;; Set CSS ranges.
+(setq css-range
+ (treesit-query-range
+ 'html
+ &quot;(style_element (raw_text) @capture)&quot;))
+(treesit-parser-set-included-ranges css css-range)
+
+;; Set JavaScript ranges.
+(setq js-range
+ (treesit-query-range
+ 'html
+ &quot;(script_element (raw_text) @capture)&quot;))
+(treesit-parser-set-included-ranges js js-range)
+</pre></div>
+
+<p>We use a query pattern <code>(style_element (raw_text) @capture)</code> to
+find CSS nodes in the HTML parse tree. For how to write query
+patterns, see <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>.
+</p>
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Tree_002dsitter-C-API.html">Tree-sitter C API Correspondence</a>, Previous: <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html
new file mode 100644
index 00000000000..ec89b7749c8
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Font-Lock.html
@@ -0,0 +1,160 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Parser-based Font Lock (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Parser-based Font Lock (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Parser-based Font Lock (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Font-Lock-Mode.html" rel="up" title="Font Lock Mode">
+<link href="Multiline-Font-Lock.html" rel="prev" title="Multiline Font Lock">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="subsection" id="Parser_002dbased-Font-Lock">
+<div class="header">
+<p>
+Previous: <a href="Multiline-Font-Lock.html" accesskey="p" rel="prev">Multiline Font Lock Constructs</a>, Up: <a href="Font-Lock-Mode.html" accesskey="u" rel="up">Font Lock Mode</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Parser_002dbased-Font-Lock-1"></span><h4 class="subsection">24.6.10 Parser-based Font Lock</h4>
+
+
+<p>Besides simple syntactic font lock and regexp-based font lock, Emacs
+also provides complete syntactic font lock with the help of a parser,
+currently provided by the tree-sitter library (see <a href="Parsing-Program-Source.html">Parsing Program Source</a>).
+</p>
+<dl class="def">
+<dt id="index-treesit_002dfont_002dlock_002denable"><span class="category">Function: </span><span><strong>treesit-font-lock-enable</strong><a href='#index-treesit_002dfont_002dlock_002denable' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function enables parser-based font lock in the current buffer.
+</p></dd></dl>
+
+<p>Parser-based font lock and other font lock mechanism are not mutually
+exclusive. By default, if enabled, parser-based font lock runs first,
+then the simple syntactic font lock (if enabled), then regexp-based
+font lock.
+</p>
+<p>Although parser-based font lock doesn&rsquo;t share the same customization
+variables with regexp-based font lock, parser-based font lock uses
+similar customization schemes. The tree-sitter counterpart of
+<var>font-lock-keywords</var> is <var>treesit-font-lock-settings</var>.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dfont_002dlock_002drules"><span class="category">Function: </span><span><strong>treesit-font-lock-rules</strong> <em>:keyword value query...</em><a href='#index-treesit_002dfont_002dlock_002drules' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function is used to set <var>treesit-font-lock-settings</var>. It
+takes care of compiling queries and other post-processing and outputs
+a value that <var>treesit-font-lock-settings</var> accepts. An example:
+</p>
+<div class="example">
+<pre class="example">(treesit-font-lock-rules
+ :language 'javascript
+ :override t
+ '((true) @font-lock-constant-face
+ (false) @font-lock-constant-face)
+ :language 'html
+ &quot;(script_element) @font-lock-builtin-face&quot;)
+</pre></div>
+
+<p>This function takes a list of text or s-exp queries. Before each
+query, there are <var>:keyword</var> and <var>value</var> pairs that configure
+that query. The <code>:lang</code> keyword sets the query’s language and
+every query must specify the language. Other keywords are optional:
+</p>
+<table>
+<thead><tr><th width="15%">Keyword</th><th width="15%">Value</th><th width="60%">Description</th></tr></thead>
+<tr><td width="15%"><code>:override</code></td><td width="15%">nil</td><td width="60%">If the region already has a face, discard the new face</td></tr>
+<tr><td width="15%"></td><td width="15%">t</td><td width="60%">Always apply the new face</td></tr>
+<tr><td width="15%"></td><td width="15%"><code>append</code></td><td width="60%">Append the new face to existing ones</td></tr>
+<tr><td width="15%"></td><td width="15%"><code>prepend</code></td><td width="60%">Prepend the new face to existing ones</td></tr>
+<tr><td width="15%"></td><td width="15%"><code>keep</code></td><td width="60%">Fill-in regions without an existing face</td></tr>
+</table>
+
+<p>Capture names in <var>query</var> should be face names like
+<code>font-lock-keyword-face</code>. The captured node will be fontified
+with that face. Capture names can also be function names, in which
+case the function is called with (<var>start</var> <var>end</var> <var>node</var>),
+where <var>start</var> and <var>end</var> are the start and end position of the
+node in buffer, and <var>node</var> is the node itself. If a capture name
+is both a face and a function, the face takes priority. If a capture
+name is not a face name nor a function name, it is ignored.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dfont_002dlock_002dsettings"><span class="category">Variable: </span><span><strong>treesit-font-lock-settings</strong><a href='#index-treesit_002dfont_002dlock_002dsettings' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>A list of <var>setting</var>s for tree-sitter font lock. The exact format
+of this variable is considered internal. One should always use
+<code>treesit-font-lock-rules</code> to set this variable.
+</p>
+<p>Each <var>setting</var> is of form
+</p>
+<div class="example">
+<pre class="example">(<var>language</var> <var>query</var>)
+</pre></div>
+
+<p>Each <var>setting</var> controls one parser (often of different language).
+And <var>language</var> is the language symbol (see <a href="Language-Definitions.html">Tree-sitter Language Definitions</a>); <var>query</var> is the query (see <a href="Pattern-Matching.html">Pattern Matching Tree-sitter Nodes</a>).
+</p></dd></dl>
+
+<p>Multi-language major modes should provide range functions in
+<code>treesit-range-functions</code>, and Emacs will set the ranges
+accordingly before fontifing a region (see <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>).
+</p>
+</div>
+<hr>
+<div class="header">
+<p>
+Previous: <a href="Multiline-Font-Lock.html">Multiline Font Lock Constructs</a>, Up: <a href="Font-Lock-Mode.html">Font Lock Mode</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html
new file mode 100644
index 00000000000..691c8fba8c7
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Parser_002dbased-Indentation.html
@@ -0,0 +1,244 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Parser-based Indentation (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Parser-based Indentation (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Parser-based Indentation (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Auto_002dIndentation.html" rel="up" title="Auto-Indentation">
+<link href="SMIE.html" rel="prev" title="SMIE">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="subsection" id="Parser_002dbased-Indentation">
+<div class="header">
+<p>
+Previous: <a href="SMIE.html" accesskey="p" rel="prev">Simple Minded Indentation Engine</a>, Up: <a href="Auto_002dIndentation.html" accesskey="u" rel="up">Automatic Indentation of code</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Parser_002dbased-Indentation-1"></span><h4 class="subsection">24.7.2 Parser-based Indentation</h4>
+
+
+<p>When built with the tree-sitter library (see <a href="Parsing-Program-Source.html">Parsing Program Source</a>), Emacs could parse program source and produce a syntax tree.
+And this syntax tree can be used for indentation. For maximum
+flexibility, we could write a custom indent function that queries the
+syntax tree and indents accordingly for each language, but that would
+be a lot of work. It is more convenient to use the simple indentation
+engine described below: we only need to write some indentation rules
+and the engine takes care of the rest.
+</p>
+<p>To enable the indentation engine, set the value of
+<code>indent-line-function</code> to <code>treesit-indent</code>.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dindent_002dfunction"><span class="category">Variable: </span><span><strong>treesit-indent-function</strong><a href='#index-treesit_002dindent_002dfunction' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This variable stores the actual function called by
+<code>treesit-indent</code>. By default, its value is
+<code>treesit-simple-indent</code>. In the future we might add other
+more complex indentation engines.
+</p></dd></dl>
+
+<span id="Writing-indentation-rules"></span><h3 class="heading">Writing indentation rules</h3>
+
+<dl class="def">
+<dt id="index-treesit_002dsimple_002dindent_002drules"><span class="category">Variable: </span><span><strong>treesit-simple-indent-rules</strong><a href='#index-treesit_002dsimple_002dindent_002drules' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This local variable stores indentation rules for every language. It is
+a list of
+</p>
+<div class="example">
+<pre class="example">(<var>language</var> . <var>rules</var>)
+</pre></div>
+
+<p>where <var>language</var> is a language symbol, and <var>rules</var> is a list
+of
+</p>
+<div class="example">
+<pre class="example">(<var>matcher</var> <var>anchor</var> <var>offset</var>)
+</pre></div>
+
+<p>First Emacs passes the node at point to <var>matcher</var>, if it return
+non-nil, this rule applies. Then Emacs passes the node to
+<var>anchor</var>, it returns a point. Emacs takes the column number of
+that point, add <var>offset</var> to it, and the result is the indent for
+the current line.
+</p>
+<p>The <var>matcher</var> and <var>anchor</var> are functions, and Emacs provides
+convenient presets for them. You can skip over to
+<code>treesit-simple-indent-presets</code> below, those presets should be
+more than enough.
+</p>
+<p>A <var>matcher</var> or an <var>anchor</var> is a function that takes three
+arguments (<var>node</var> <var>parent</var> <var>bol</var>). Argument <var>bol</var> is
+the point at where we are indenting: the position of the first
+non-whitespace character from the beginning of line; <var>node</var> is the
+largest (highest-in-tree) node that starts at that point; <var>parent</var>
+is the parent of <var>node</var>. A <var>matcher</var> returns nil/non-nil, and
+<var>anchor</var> returns a point.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dsimple_002dindent_002dpresets"><span class="category">Variable: </span><span><strong>treesit-simple-indent-presets</strong><a href='#index-treesit_002dsimple_002dindent_002dpresets' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This is a list of presets for <var>matcher</var>s and <var>anchor</var>s in
+<code>treesit-simple-indent-rules</code>. Each of them represent a function
+that takes <var>node</var>, <var>parent</var> and <var>bol</var> as arguments.
+</p>
+<div class="example">
+<pre class="example">no-node
+</pre></div>
+
+<p>This matcher matches the case where <var>node</var> is nil, i.e., there is
+no node that starts at <var>bol</var>. This is the case when <var>bol</var> is
+at an empty line or inside a multi-line string, etc.
+</p>
+<div class="example">
+<pre class="example">(parent-is <var>type</var>)
+</pre></div>
+
+<p>This matcher matches if <var>parent</var>&rsquo;s type is <var>type</var>.
+</p>
+<div class="example">
+<pre class="example">(node-is <var>type</var>)
+</pre></div>
+
+<p>This matcher matches if <var>node</var>&rsquo;s type is <var>type</var>.
+</p>
+<div class="example">
+<pre class="example">(query <var>query</var>)
+</pre></div>
+
+<p>This matcher matches if querying <var>parent</var> with <var>query</var>
+captures <var>node</var>. The capture name does not matter.
+</p>
+<div class="example">
+<pre class="example">(match <var>node-type</var> <var>parent-type</var>
+ <var>node-field</var> <var>node-index-min</var> <var>node-index-max</var>)
+</pre></div>
+
+<p>This matcher checks if <var>node</var>&rsquo;s type is <var>node-type</var>,
+<var>parent</var>&rsquo;s type is <var>parent-type</var>, <var>node</var>&rsquo;s field name in
+<var>parent</var> is <var>node-field</var>, and <var>node</var>&rsquo;s index among its
+siblings is between <var>node-index-min</var> and <var>node-index-max</var>. If
+the value of a constraint is nil, this matcher doesn&rsquo;t check for that
+constraint. For example, to match the first child where parent is
+<code>argument_list</code>, use
+</p>
+<div class="example">
+<pre class="example">(match nil &quot;argument_list&quot; nil nil 0 0)
+</pre></div>
+
+<div class="example">
+<pre class="example">first-sibling
+</pre></div>
+
+<p>This anchor returns the start of the first child of <var>parent</var>.
+</p>
+<div class="example">
+<pre class="example">parent
+</pre></div>
+
+<p>This anchor returns the start of <var>parent</var>.
+</p>
+<div class="example">
+<pre class="example">parent-bol
+</pre></div>
+
+<p>This anchor returns the beginning of non-space characters on the line
+where <var>parent</var> is on.
+</p>
+<div class="example">
+<pre class="example">prev-sibling
+</pre></div>
+
+<p>This anchor returns the start of the previous sibling of <var>node</var>.
+</p>
+<div class="example">
+<pre class="example">no-indent
+</pre></div>
+
+<p>This anchor returns the start of <var>node</var>, i.e., no indent.
+</p>
+<div class="example">
+<pre class="example">prev-line
+</pre></div>
+
+<p>This anchor returns the first non-whitespace charater on the previous
+line.
+</p></dd></dl>
+
+<span id="Indentation-utilities"></span><h3 class="heading">Indentation utilities</h3>
+
+<p>Here are some utility functions that can help writing indentation
+rules.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dcheck_002dindent"><span class="category">Function: </span><span><strong>treesit-check-indent</strong> <em>mode</em><a href='#index-treesit_002dcheck_002dindent' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function checks current buffer&rsquo;s indentation against major mode
+<var>mode</var>. It indents the current buffer in <var>mode</var> and compares
+the indentation with the current indentation. Then it pops up a diff
+buffer showing the difference. Correct indentation (target) is in
+green, current indentation is in red.
+</p></dd></dl>
+
+<p>It is also helpful to use <code>treesit-inspect-mode</code> when writing
+indentation rules.
+</p>
+</div>
+<hr>
+<div class="header">
+<p>
+Previous: <a href="SMIE.html">Simple Minded Indentation Engine</a>, Up: <a href="Auto_002dIndentation.html">Automatic Indentation of code</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html
new file mode 100644
index 00000000000..7b6e51468a6
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Parsing-Program-Source.html
@@ -0,0 +1,125 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Parsing Program Source (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Parsing Program Source (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Parsing Program Source (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html" rel="up" title="Top">
+<link href="Abbrevs.html" rel="next" title="Abbrevs">
+<link href="Syntax-Tables.html" rel="prev" title="Syntax Tables">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="chapter" id="Parsing-Program-Source">
+<div class="header">
+<p>
+Next: <a href="Abbrevs.html" accesskey="n" rel="next">Abbrevs and Abbrev Expansion</a>, Previous: <a href="Syntax-Tables.html" accesskey="p" rel="prev">Syntax Tables</a>, Up: <a href="index.html" accesskey="u" rel="up">Emacs Lisp</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Parsing-Program-Source-1"></span><h2 class="chapter">37 Parsing Program Source</h2>
+
+<p>Emacs provides various ways to parse program source text and produce a
+<em>syntax tree</em>. In a syntax tree, text is no longer a
+one-dimensional stream but a structured tree of nodes, where each node
+representing a piece of text. Thus a syntax tree can enable
+interesting features like precise fontification, indentation,
+navigation, structured editing, etc.
+</p>
+<p>Emacs has a simple facility for parsing balanced expressions
+(see <a href="Parsing-Expressions.html">Parsing Expressions</a>). There is also SMIE library for generic
+navigation and indentation (see <a href="SMIE.html">Simple Minded Indentation Engine</a>).
+</p>
+<p>Emacs also provides integration with tree-sitter library
+(<a href="https://tree-sitter.github.io/tree-sitter">https://tree-sitter.github.io/tree-sitter</a>) if compiled with
+it. The tree-sitter library implements an incremental parser and has
+support from a wide range of programming languages.
+</p>
+<dl class="def">
+<dt id="index-treesit_002davailable_002dp"><span class="category">Function: </span><span><strong>treesit-available-p</strong><a href='#index-treesit_002davailable_002dp' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns non-nil if tree-sitter features are available
+for this Emacs instance.
+</p></dd></dl>
+
+<p>For tree-sitter integration with existing Emacs features,
+see <a href="Parser_002dbased-Font-Lock.html">Parser-based Font Lock</a>, <a href="Parser_002dbased-Indentation.html">Parser-based Indentation</a>, and
+<a href="List-Motion.html">Moving over Balanced Expressions</a>.
+</p>
+<p>To access the syntax tree of the text in a buffer, we need to first
+load a language definition and create a parser with it. Next, we can
+query the parser for specific nodes in the syntax tree. Then, we can
+access various information about the node, and we can pattern-match a
+node with a powerful syntax. Finally, we explain how to work with
+source files that mixes multiple languages. The following sections
+explain how to do each of the tasks in detail.
+</p>
+
+<ul class="section-toc">
+<li><a href="Language-Definitions.html" accesskey="1">Tree-sitter Language Definitions</a></li>
+<li><a href="Using-Parser.html" accesskey="2">Using Tree-sitter Parser</a></li>
+<li><a href="Retrieving-Node.html" accesskey="3">Retrieving Node</a></li>
+<li><a href="Accessing-Node.html" accesskey="4">Accessing Node Information</a></li>
+<li><a href="Pattern-Matching.html" accesskey="5">Pattern Matching Tree-sitter Nodes</a></li>
+<li><a href="Multiple-Languages.html" accesskey="6">Parsing Text in Multiple Languages</a></li>
+<li><a href="Tree_002dsitter-C-API.html" accesskey="7">Tree-sitter C API Correspondence</a></li>
+</ul>
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Abbrevs.html">Abbrevs and Abbrev Expansion</a>, Previous: <a href="Syntax-Tables.html">Syntax Tables</a>, Up: <a href="index.html">Emacs Lisp</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Pattern-Matching.html b/admin/notes/tree-sitter/html-manual/Pattern-Matching.html
new file mode 100644
index 00000000000..e14efe71629
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Pattern-Matching.html
@@ -0,0 +1,430 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Pattern Matching (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Pattern Matching (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Pattern Matching (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Multiple-Languages.html" rel="next" title="Multiple Languages">
+<link href="Accessing-Node.html" rel="prev" title="Accessing Node">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Pattern-Matching">
+<div class="header">
+<p>
+Next: <a href="Multiple-Languages.html" accesskey="n" rel="next">Parsing Text in Multiple Languages</a>, Previous: <a href="Accessing-Node.html" accesskey="p" rel="prev">Accessing Node Information</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Pattern-Matching-Tree_002dsitter-Nodes"></span><h3 class="section">37.5 Pattern Matching Tree-sitter Nodes</h3>
+
+<p>Tree-sitter let us pattern match with a small declarative language.
+Pattern matching consists of two steps: first tree-sitter matches a
+<em>pattern</em> against nodes in the syntax tree, then it <em>captures</em>
+specific nodes in that pattern and returns the captured nodes.
+</p>
+<p>We describe first how to write the most basic query pattern and how to
+capture nodes in a pattern, then the pattern-match function, finally
+more advanced pattern syntax.
+</p>
+<span id="Basic-query-syntax"></span><h3 class="heading">Basic query syntax</h3>
+
+<span id="index-Tree_002dsitter-query-syntax"></span>
+<span id="index-Tree_002dsitter-query-pattern"></span>
+<p>A <em>query</em> consists of multiple <em>patterns</em>. Each pattern is an
+s-expression that matches a certain node in the syntax node. A
+pattern has the following shape:
+</p>
+<div class="example">
+<pre class="example">(<var>type</var> <var>child</var>...)
+</pre></div>
+
+<p>For example, a pattern that matches a <code>binary_expression</code> node that
+contains <code>number_literal</code> child nodes would look like
+</p>
+<div class="example">
+<pre class="example">(binary_expression (number_literal))
+</pre></div>
+
+<p>To <em>capture</em> a node in the query pattern above, append
+<code>@capture-name</code> after the node pattern you want to capture. For
+example,
+</p>
+<div class="example">
+<pre class="example">(binary_expression (number_literal) @number-in-exp)
+</pre></div>
+
+<p>captures <code>number_literal</code> nodes that are inside a
+<code>binary_expression</code> node with capture name <code>number-in-exp</code>.
+</p>
+<p>We can capture the <code>binary_expression</code> node too, with capture
+name <code>biexp</code>:
+</p>
+<div class="example">
+<pre class="example">(binary_expression
+ (number_literal) @number-in-exp) @biexp
+</pre></div>
+
+<span id="Query-function"></span><h3 class="heading">Query function</h3>
+
+<p>Now we can introduce the query functions.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dquery_002dcapture"><span class="category">Function: </span><span><strong>treesit-query-capture</strong> <em>node query &amp;optional beg end node-only</em><a href='#index-treesit_002dquery_002dcapture' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function matches patterns in <var>query</var> in <var>node</var>.
+Parameter <var>query</var> can be either a string, a s-expression, or a
+compiled query object. For now, we focus on the string syntax;
+s-expression syntax and compiled query are described at the end of the
+section.
+</p>
+<p>Parameter <var>node</var> can also be a parser or a language symbol. A
+parser means using its root node, a language symbol means find or
+create a parser for that language in the current buffer, and use the
+root node.
+</p>
+<p>The function returns all captured nodes in a list of
+<code>(<var>capture_name</var> . <var>node</var>)</code>. If <var>node-only</var> is
+non-nil, a list of node is returned instead. If <var>beg</var> and
+<var>end</var> are both non-nil, this function only pattern matches nodes
+in that range.
+</p>
+<span id="index-treesit_002dquery_002derror"></span>
+<p>This function raise a <var>treesit-query-error</var> if <var>query</var> is
+malformed. The signal data contains a description of the specific
+error. You can use <code>treesit-query-validate</code> to debug the query.
+</p></dd></dl>
+
+<p>For example, suppose <var>node</var>&rsquo;s content is <code>1 + 2</code>, and
+<var>query</var> is
+</p>
+<div class="example">
+<pre class="example">(setq query
+ &quot;(binary_expression
+ (number_literal) @number-in-exp) @biexp&quot;)
+</pre></div>
+
+<p>Querying that query would return
+</p>
+<div class="example">
+<pre class="example">(treesit-query-capture node query)
+ &rArr; ((biexp . <var>&lt;node for &quot;1 + 2&quot;&gt;</var>)
+ (number-in-exp . <var>&lt;node for &quot;1&quot;&gt;</var>)
+ (number-in-exp . <var>&lt;node for &quot;2&quot;&gt;</var>))
+</pre></div>
+
+<p>As we mentioned earlier, a <var>query</var> could contain multiple
+patterns. For example, it could have two top-level patterns:
+</p>
+<div class="example">
+<pre class="example">(setq query
+ &quot;(binary_expression) @biexp
+ (number_literal) @number @biexp&quot;)
+</pre></div>
+
+<dl class="def">
+<dt id="index-treesit_002dquery_002dstring"><span class="category">Function: </span><span><strong>treesit-query-string</strong> <em>string query language</em><a href='#index-treesit_002dquery_002dstring' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function parses <var>string</var> with <var>language</var>, pattern matches
+its root node with <var>query</var>, and returns the result.
+</p></dd></dl>
+
+<span id="More-query-syntax"></span><h3 class="heading">More query syntax</h3>
+
+<p>Besides node type and capture, tree-sitter&rsquo;s query syntax can express
+anonymous node, field name, wildcard, quantification, grouping,
+alternation, anchor, and predicate.
+</p>
+<span id="Anonymous-node"></span><h4 class="subheading">Anonymous node</h4>
+
+<p>An anonymous node is written verbatim, surrounded by quotes. A
+pattern matching (and capturing) keyword <code>return</code> would be
+</p>
+<div class="example">
+<pre class="example">&quot;return&quot; @keyword
+</pre></div>
+
+<span id="Wild-card"></span><h4 class="subheading">Wild card</h4>
+
+<p>In a query pattern, &lsquo;<samp>(_)</samp>&rsquo; matches any named node, and &lsquo;<samp>_</samp>&rsquo;
+matches any named and anonymous node. For example, to capture any
+named child of a <code>binary_expression</code> node, the pattern would be
+</p>
+<div class="example">
+<pre class="example">(binary_expression (_) @in_biexp)
+</pre></div>
+
+<span id="Field-name"></span><h4 class="subheading">Field name</h4>
+
+<p>We can capture child nodes that has specific field names:
+</p>
+<div class="example">
+<pre class="example">(function_definition
+ declarator: (_) @func-declarator
+ body: (_) @func-body)
+</pre></div>
+
+<p>We can also capture a node that doesn&rsquo;t have certain field, say, a
+<code>function_definition</code> without a <code>body</code> field.
+</p>
+<div class="example">
+<pre class="example">(function_definition !body) @func-no-body
+</pre></div>
+
+<span id="Quantify-node"></span><h4 class="subheading">Quantify node</h4>
+
+<p>Tree-sitter recognizes quantification operators &lsquo;<samp>*</samp>&rsquo;, &lsquo;<samp>+</samp>&rsquo; and
+&lsquo;<samp>?</samp>&rsquo;. Their meanings are the same as in regular expressions:
+&lsquo;<samp>*</samp>&rsquo; matches the preceding pattern zero or more times, &lsquo;<samp>+</samp>&rsquo;
+matches one or more times, and &lsquo;<samp>?</samp>&rsquo; matches zero or one time.
+</p>
+<p>For example, this pattern matches <code>type_declaration</code> nodes
+that has <em>zero or more</em> <code>long</code> keyword.
+</p>
+<div class="example">
+<pre class="example">(type_declaration &quot;long&quot;*) @long-type
+</pre></div>
+
+<p>And this pattern matches a type declaration that has zero or one
+<code>long</code> keyword:
+</p>
+<div class="example">
+<pre class="example">(type_declaration &quot;long&quot;?) @long-type
+</pre></div>
+
+<span id="Grouping"></span><h4 class="subheading">Grouping</h4>
+
+<p>Similar to groups in regular expression, we can bundle patterns into a
+group and apply quantification operators to it. For example, to
+express a comma separated list of identifiers, one could write
+</p>
+<div class="example">
+<pre class="example">(identifier) (&quot;,&quot; (identifier))*
+</pre></div>
+
+<span id="Alternation"></span><h4 class="subheading">Alternation</h4>
+
+<p>Again, similar to regular expressions, we can express &ldquo;match anyone
+from this group of patterns&rdquo; in the query pattern. The syntax is a
+list of patterns enclosed in square brackets. For example, to capture
+some keywords in C, the query pattern would be
+</p>
+<div class="example">
+<pre class="example">[
+ &quot;return&quot;
+ &quot;break&quot;
+ &quot;if&quot;
+ &quot;else&quot;
+] @keyword
+</pre></div>
+
+<span id="Anchor"></span><h4 class="subheading">Anchor</h4>
+
+<p>The anchor operator &lsquo;<samp>.</samp>&rsquo; can be used to enforce juxtaposition,
+i.e., to enforce two things to be directly next to each other. The
+two &ldquo;things&rdquo; can be two nodes, or a child and the end of its parent.
+For example, to capture the first child, the last child, or two
+adjacent children:
+</p>
+<div class="example">
+<pre class="example">;; Anchor the child with the end of its parent.
+(compound_expression (_) @last-child .)
+
+;; Anchor the child with the beginning of its parent.
+(compound_expression . (_) @first-child)
+
+;; Anchor two adjacent children.
+(compound_expression
+ (_) @prev-child
+ .
+ (_) @next-child)
+</pre></div>
+
+<p>Note that the enforcement of juxtaposition ignores any anonymous
+nodes.
+</p>
+<span id="Predicate"></span><h4 class="subheading">Predicate</h4>
+
+<p>We can add predicate constraints to a pattern. For example, if we use
+the following query pattern
+</p>
+<div class="example">
+<pre class="example">(
+ (array . (_) @first (_) @last .)
+ (#equal @first @last)
+)
+</pre></div>
+
+<p>Then tree-sitter only matches arrays where the first element equals to
+the last element. To attach a predicate to a pattern, we need to
+group then together. A predicate always starts with a &lsquo;<samp>#</samp>&rsquo;.
+Currently there are two predicates, <code>#equal</code> and <code>#match</code>.
+</p>
+<dl class="def">
+<dt id="index-equal-1"><span class="category">Predicate: </span><span><strong>equal</strong> <em>arg1 arg2</em><a href='#index-equal-1' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Matches if <var>arg1</var> equals to <var>arg2</var>. Arguments can be either a
+string or a capture name. Capture names represent the text that the
+captured node spans in the buffer.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-match"><span class="category">Predicate: </span><span><strong>match</strong> <em>regexp capture-name</em><a href='#index-match' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Matches if the text that <var>capture-name</var>’s node spans in the buffer
+matches regular expression <var>regexp</var>. Matching is case-sensitive.
+</p></dd></dl>
+
+<p>Note that a predicate can only refer to capture names appeared in the
+same pattern. Indeed, it makes little sense to refer to capture names
+in other patterns anyway.
+</p>
+<span id="S_002dexpression-patterns"></span><h3 class="heading">S-expression patterns</h3>
+
+<p>Besides strings, Emacs provides a s-expression based syntax for query
+patterns. It largely resembles the string-based syntax. For example,
+the following pattern
+</p>
+<div class="example">
+<pre class="example">(treesit-query-capture
+ node &quot;(addition_expression
+ left: (_) @left
+ \&quot;+\&quot; @plus-sign
+ right: (_) @right) @addition
+
+ [\&quot;return\&quot; \&quot;break\&quot;] @keyword&quot;)
+</pre></div>
+
+<p>is equivalent to
+</p>
+<div class="example">
+<pre class="example">(treesit-query-capture
+ node '((addition_expression
+ left: (_) @left
+ &quot;+&quot; @plus-sign
+ right: (_) @right) @addition
+
+ [&quot;return&quot; &quot;break&quot;] @keyword))
+</pre></div>
+
+<p>Most pattern syntax can be written directly as strange but
+never-the-less valid s-expressions. Only a few of them needs
+modification:
+</p>
+<ul>
+<li> Anchor &lsquo;<samp>.</samp>&rsquo; is written as <code>:anchor</code>.
+</li><li> &lsquo;<samp>?</samp>&rsquo; is written as &lsquo;<samp>:?</samp>&rsquo;.
+</li><li> &lsquo;<samp>*</samp>&rsquo; is written as &lsquo;<samp>:*</samp>&rsquo;.
+</li><li> &lsquo;<samp>+</samp>&rsquo; is written as &lsquo;<samp>:+</samp>&rsquo;.
+</li><li> <code>#equal</code> is written as <code>:equal</code>. In general, predicates
+change their &lsquo;<samp>#</samp>&rsquo; to &lsquo;<samp>:</samp>&rsquo;.
+</li></ul>
+
+<p>For example,
+</p>
+<div class="example">
+<pre class="example">&quot;(
+ (compound_expression . (_) @first (_)* @rest)
+ (#match \&quot;love\&quot; @first)
+ )&quot;
+</pre></div>
+
+<p>is written in s-expression as
+</p>
+<div class="example">
+<pre class="example">'((
+ (compound_expression :anchor (_) @first (_) :* @rest)
+ (:match &quot;love&quot; @first)
+ ))
+</pre></div>
+
+<span id="Compiling-queries"></span><h3 class="heading">Compiling queries</h3>
+
+<p>If a query will be used repeatedly, especially in tight loops, it is
+important to compile that query, because a compiled query is much
+faster than an uncompiled one. A compiled query can be used anywhere
+a query is accepted.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dquery_002dcompile"><span class="category">Function: </span><span><strong>treesit-query-compile</strong> <em>language query</em><a href='#index-treesit_002dquery_002dcompile' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function compiles <var>query</var> for <var>language</var> into a compiled
+query object and returns it.
+</p>
+<p>This function raise a <var>treesit-query-error</var> if <var>query</var> is
+malformed. The signal data contains a description of the specific
+error. You can use <code>treesit-query-validate</code> to debug the query.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dquery_002dexpand"><span class="category">Function: </span><span><strong>treesit-query-expand</strong> <em>query</em><a href='#index-treesit_002dquery_002dexpand' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function expands the s-expression <var>query</var> into a string
+query.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dpattern_002dexpand"><span class="category">Function: </span><span><strong>treesit-pattern-expand</strong> <em>pattern</em><a href='#index-treesit_002dpattern_002dexpand' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function expands the s-expression <var>pattern</var> into a string
+pattern.
+</p></dd></dl>
+
+<p>Finally, tree-sitter project&rsquo;s documentation about
+pattern-matching can be found at
+<a href="https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries">https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries</a>.
+</p>
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>, Previous: <a href="Accessing-Node.html">Accessing Node Information</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Retrieving-Node.html b/admin/notes/tree-sitter/html-manual/Retrieving-Node.html
new file mode 100644
index 00000000000..1bea0dde76b
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Retrieving-Node.html
@@ -0,0 +1,362 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Retrieving Node (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Retrieving Node (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Retrieving Node (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Accessing-Node.html" rel="next" title="Accessing Node">
+<link href="Using-Parser.html" rel="prev" title="Using Parser">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Retrieving-Node">
+<div class="header">
+<p>
+Next: <a href="Accessing-Node.html" accesskey="n" rel="next">Accessing Node Information</a>, Previous: <a href="Using-Parser.html" accesskey="p" rel="prev">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Retrieving-Node-1"></span><h3 class="section">37.3 Retrieving Node</h3>
+
+<span id="index-tree_002dsitter-find-node"></span>
+<span id="index-tree_002dsitter-get-node"></span>
+<p>Before we continue, lets go over some conventions of tree-sitter
+functions.
+</p>
+<p>We talk about a node being &ldquo;smaller&rdquo; or &ldquo;larger&rdquo;, and &ldquo;lower&rdquo; or
+&ldquo;higher&rdquo;. A smaller and lower node is lower in the syntax tree and
+therefore spans a smaller piece of text; a larger and higher node is
+higher up in the syntax tree, containing many smaller nodes as its
+children, and therefore spans a larger piece of text.
+</p>
+<p>When a function cannot find a node, it returns nil. And for the
+convenience for function chaining, all the functions that take a node
+as argument and returns a node accept the node to be nil; in that
+case, the function just returns nil.
+</p>
+<span id="index-treesit_002dnode_002doutdated"></span>
+<p>Nodes are not automatically updated when the associated buffer is
+modified. And there is no way to update a node once it is retrieved.
+Using an outdated node throws <code>treesit-node-outdated</code> error.
+</p>
+<span id="Retrieving-node-from-syntax-tree"></span><h3 class="heading">Retrieving node from syntax tree</h3>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dat"><span class="category">Function: </span><span><strong>treesit-node-at</strong> <em>beg end &amp;optional parser-or-lang named</em><a href='#index-treesit_002dnode_002dat' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the <em>smallest</em> node that starts at or after
+the <var>point</var>. In other words, the start of the node is equal or
+greater than <var>point</var>.
+</p>
+<p>When <var>parser-or-lang</var> is nil, this function uses the first parser
+in <code>(treesit-parser-list)</code> in the current buffer. If
+<var>parser-or-lang</var> is a parser object, it use that parser; if
+<var>parser-or-lang</var> is a language, it finds the first parser using
+that language in <code>(treesit-parser-list)</code> and use that.
+</p>
+<p>If <var>named</var> is non-nil, this function looks for a named node
+only (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>).
+</p>
+<p>Example:
+</p><div class="example">
+<pre class="example">;; Find the node at point in a C parser's syntax tree.
+(treesit-node-at (point) 'c)
+ </pre></div>
+</dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002don"><span class="category">Function: </span><span><strong>treesit-node-on</strong> <em>beg end &amp;optional parser-or-lang named</em><a href='#index-treesit_002dnode_002don' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the <em>smallest</em> node that covers the span
+from <var>beg</var> to <var>end</var>. In other words, the start of the node is
+less or equal to <var>beg</var>, and the end of the node is greater or
+equal to <var>end</var>.
+</p>
+<p><em>Beware</em> that calling this function on an empty line that is not
+inside any top-level construct (function definition, etc) most
+probably will give you the root node, because the root node is the
+smallest node that covers that empty line. Most of the time, you want
+to use <code>treesit-node-at</code>.
+</p>
+<p>When <var>parser-or-lang</var> is nil, this function uses the first parser
+in <code>(treesit-parser-list)</code> in the current buffer. If
+<var>parser-or-lang</var> is a parser object, it use that parser; if
+<var>parser-or-lang</var> is a language, it finds the first parser using
+that language in <code>(treesit-parser-list)</code> and use that.
+</p>
+<p>If <var>named</var> is non-nil, this function looks for a named node only
+(see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>).
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparser_002droot_002dnode"><span class="category">Function: </span><span><strong>treesit-parser-root-node</strong> <em>parser</em><a href='#index-treesit_002dparser_002droot_002dnode' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the root node of the syntax tree generated by
+<var>parser</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dbuffer_002droot_002dnode"><span class="category">Function: </span><span><strong>treesit-buffer-root-node</strong> <em>&amp;optional language</em><a href='#index-treesit_002dbuffer_002droot_002dnode' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the first parser that uses <var>language</var> in
+<code>(treesit-parser-list)</code> in the current buffer, and returns the
+root node of that buffer. If it cannot find an appropriate parser,
+nil is returned.
+</p></dd></dl>
+
+<p>Once we have a node, we can retrieve other nodes from it, or query for
+information about this node.
+</p>
+<span id="Retrieving-node-from-other-nodes"></span><h3 class="heading">Retrieving node from other nodes</h3>
+
+<span id="By-kinship"></span><h4 class="subheading">By kinship</h4>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dparent"><span class="category">Function: </span><span><strong>treesit-node-parent</strong> <em>node</em><a href='#index-treesit_002dnode_002dparent' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the immediate parent of <var>node</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dchild"><span class="category">Function: </span><span><strong>treesit-node-child</strong> <em>node n &amp;optional named</em><a href='#index-treesit_002dnode_002dchild' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the <var>n</var>&rsquo;th child of <var>node</var>. If
+<var>named</var> is non-nil, then it only counts named nodes
+(see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>). For example, in a node
+that represents a string: <code>&quot;text&quot;</code>, there are three children
+nodes: the opening quote <code>&quot;</code>, the string content <code>text</code>, and
+the enclosing quote <code>&quot;</code>. Among these nodes, the first child is
+the opening quote <code>&quot;</code>, the first named child is the string
+content <code>text</code>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002dchildren"><span class="category">Function: </span><span><strong>treesit-node-children</strong> <em>node &amp;optional named</em><a href='#index-treesit_002dnode_002dchildren' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns all of <var>node</var>&rsquo;s children in a list. If
+<var>named</var> is non-nil, then it only retrieves named nodes.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnext_002dsibling"><span class="category">Function: </span><span><strong>treesit-next-sibling</strong> <em>node &amp;optional named</em><a href='#index-treesit_002dnext_002dsibling' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the next sibling of <var>node</var>. If <var>named</var> is
+non-nil, it finds the next named sibling.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dprev_002dsibling"><span class="category">Function: </span><span><strong>treesit-prev-sibling</strong> <em>node &amp;optional named</em><a href='#index-treesit_002dprev_002dsibling' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the previous sibling of <var>node</var>. If
+<var>named</var> is non-nil, it finds the previous named sibling.
+</p></dd></dl>
+
+<span id="By-field-name"></span><h4 class="subheading">By field name</h4>
+
+<p>To make the syntax tree easier to analyze, many language definitions
+assign <em>field names</em> to child nodes (see <a href="Language-Definitions.html#tree_002dsitter-node-field-name">field name</a>). For example, a <code>function_definition</code> node
+could have a <code>declarator</code> and a <code>body</code>.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dchild_002dby_002dfield_002dname"><span class="category">Function: </span><span><strong>treesit-child-by-field-name</strong> <em>node field-name</em><a href='#index-treesit_002dchild_002dby_002dfield_002dname' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the child of <var>node</var> that has <var>field-name</var>
+as its field name.
+</p>
+<div class="example">
+<pre class="example">;; Get the child that has &quot;body&quot; as its field name.
+(treesit-child-by-field-name node &quot;body&quot;)
+ </pre></div>
+</dd></dl>
+
+<span id="By-position"></span><h4 class="subheading">By position</h4>
+
+<dl class="def">
+<dt id="index-treesit_002dfirst_002dchild_002dfor_002dpos"><span class="category">Function: </span><span><strong>treesit-first-child-for-pos</strong> <em>node pos &amp;optional named</em><a href='#index-treesit_002dfirst_002dchild_002dfor_002dpos' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the first child of <var>node</var> that extends beyond
+<var>pos</var>. &ldquo;Extend beyond&rdquo; means the end of the child node &gt;=
+<var>pos</var>. This function only looks for immediate children of
+<var>node</var>, and doesn&rsquo;t look in its grand children. If <var>named</var> is
+non-nil, it only looks for named child (see <a href="Language-Definitions.html#tree_002dsitter-named-node">named node</a>).
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dnode_002ddescendant_002dfor_002drange"><span class="category">Function: </span><span><strong>treesit-node-descendant-for-range</strong> <em>node beg end &amp;optional named</em><a href='#index-treesit_002dnode_002ddescendant_002dfor_002drange' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds the <em>smallest</em> child/grandchild... of
+<var>node</var> that spans the range from <var>beg</var> to <var>end</var>. It is
+similar to <code>treesit-node-at</code>. If <var>named</var> is non-nil, it only
+looks for named child.
+</p></dd></dl>
+
+<span id="Searching-for-node"></span><h3 class="heading">Searching for node</h3>
+
+<dl class="def">
+<dt id="index-treesit_002dsearch_002dsubtree"><span class="category">Function: </span><span><strong>treesit-search-subtree</strong> <em>node predicate &amp;optional all backward limit</em><a href='#index-treesit_002dsearch_002dsubtree' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function traverses the subtree of <var>node</var> (including
+<var>node</var>), and match <var>predicate</var> with each node along the way.
+And <var>predicate</var> is a regexp that matches (case-insensitively)
+against each node&rsquo;s type, or a function that takes a node and returns
+nil/non-nil. If a node matches, that node is returned, if no node
+ever matches, nil is returned.
+</p>
+<p>By default, this function only traverses named nodes, if <var>all</var> is
+non-nil, it traverses all nodes. If <var>backward</var> is non-nil, it
+traverses backwards. If <var>limit</var> is non-nil, it only traverses
+that number of levels down in the tree.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dsearch_002dforward"><span class="category">Function: </span><span><strong>treesit-search-forward</strong> <em>start predicate &amp;optional all backward up</em><a href='#index-treesit_002dsearch_002dforward' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function is somewhat similar to <code>treesit-search-subtree</code>.
+It also traverse the parse tree and match each node with
+<var>predicate</var> (except for <var>start</var>), where <var>predicate</var> can be
+a (case-insensitive) regexp or a function. For a tree like the below
+where <var>start</var> is marked 1, this function traverses as numbered:
+</p>
+<div class="example">
+<pre class="example"> o
+ |
+ 3--------4-----------8
+ | | |
+o--o-+--1 5--+--6 9---+-----12
+| | | | | |
+o o 2 7 +-+-+ +--+--+
+ | | | | |
+ 10 11 13 14 15
+</pre></div>
+
+<p>Same as in <code>treesit-search-subtree</code>, this function only searches
+for named nodes by default. But if <var>all</var> is non-nil, it searches
+for all nodes. If <var>backward</var> is non-nil, it searches backwards.
+</p>
+<p>If <var>up</var> is non-nil, this function will only traverse to siblings
+and parents. In that case, only 1 3 4 8 would be traversed.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dsearch_002dforward_002dgoto"><span class="category">Function: </span><span><strong>treesit-search-forward-goto</strong> <em>predicate side &amp;optional all backward up</em><a href='#index-treesit_002dsearch_002dforward_002dgoto' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function jumps to the start or end of the next node in buffer
+that matches <var>predicate</var>. Parameters <var>predicate</var>, <var>all</var>,
+<var>backward</var>, and <var>up</var> are the same as in
+<code>treesit-search-forward</code>. And <var>side</var> controls which side of
+the matched no do we stop at, it can be <code>start</code> or <code>end</code>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dinduce_002dsparse_002dtree"><span class="category">Function: </span><span><strong>treesit-induce-sparse-tree</strong> <em>root predicate &amp;optional process-fn limit</em><a href='#index-treesit_002dinduce_002dsparse_002dtree' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function creates a sparse tree from <var>root</var>&rsquo;s subtree.
+</p>
+<p>Basically, it takes the subtree under <var>root</var>, and combs it so only
+the nodes that match <var>predicate</var> are left, like picking out grapes
+on the vine. Like previous functions, <var>predicate</var> can be a regexp
+string that matches against each node&rsquo;s type case-insensitively, or a
+function that takes a node and return nil/non-nil.
+</p>
+<p>For example, for a subtree on the left that consist of both numbers
+and letters, if <var>predicate</var> is &ldquo;letter only&rdquo;, the returned tree
+is the one on the right.
+</p>
+<div class="example">
+<pre class="example"> a a a
+ | | |
++---+---+ +---+---+ +---+---+
+| | | | | | | | |
+b 1 2 b | | b c d
+ | | =&gt; | | =&gt; |
+ c +--+ c + e
+ | | | | |
+ +--+ d 4 +--+ d
+ | | |
+ e 5 e
+</pre></div>
+
+<p>If <var>process-fn</var> is non-nil, instead of returning the matched
+nodes, this function passes each node to <var>process-fn</var> and uses the
+returned value instead. If non-nil, <var>limit</var> is the number of
+levels to go down from <var>root</var>.
+</p>
+<p>Each node in the returned tree looks like <code>(<var>tree-sitter
+node</var> . (<var>child</var> ...))</code>. The <var>tree-sitter node</var> of the root
+of this tree will be nil if <var>ROOT</var> doesn&rsquo;t match <var>pred</var>. If
+no node matches <var>predicate</var>, return nil.
+</p></dd></dl>
+
+<span id="More-convenient-functions"></span><h3 class="heading">More convenient functions</h3>
+
+<dl class="def">
+<dt id="index-treesit_002dfilter_002dchild"><span class="category">Function: </span><span><strong>treesit-filter-child</strong> <em>node pred &amp;optional named</em><a href='#index-treesit_002dfilter_002dchild' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function finds immediate children of <var>node</var> that satisfies
+<var>pred</var>.
+</p>
+<p>Function <var>pred</var> takes the child node as the argument and should
+return non-nil to indicated keeping the child. If <var>named</var>
+non-nil, this function only searches for named nodes.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparent_002duntil"><span class="category">Function: </span><span><strong>treesit-parent-until</strong> <em>node pred</em><a href='#index-treesit_002dparent_002duntil' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function repeatedly finds the parent of <var>node</var>, and returns
+the parent if it satisfies <var>pred</var> (which takes the parent as the
+argument). If no parent satisfies <var>pred</var>, this function returns
+nil.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparent_002dwhile"><span class="category">Function: </span><span><strong>treesit-parent-while</strong><a href='#index-treesit_002dparent_002dwhile' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function repeatedly finds the parent of <var>node</var>, and keeps
+doing so as long as the parent satisfies <var>pred</var> (which takes the
+parent as the single argument). I.e., this function returns the
+farthest parent that still satisfies <var>pred</var>.
+</p></dd></dl>
+
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Accessing-Node.html">Accessing Node Information</a>, Previous: <a href="Using-Parser.html">Using Tree-sitter Parser</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html
new file mode 100644
index 00000000000..77cea6b3f95
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Tree_002dsitter-C-API.html
@@ -0,0 +1,212 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Tree-sitter C API (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Tree-sitter C API (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Tree-sitter C API (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Multiple-Languages.html" rel="prev" title="Multiple Languages">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Tree_002dsitter-C-API">
+<div class="header">
+<p>
+Previous: <a href="Multiple-Languages.html" accesskey="p" rel="prev">Parsing Text in Multiple Languages</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Tree_002dsitter-C-API-Correspondence"></span><h3 class="section">37.7 Tree-sitter C API Correspondence</h3>
+
+<p>Emacs&rsquo; tree-sitter integration doesn&rsquo;t expose every feature
+tree-sitter&rsquo;s C API provides. Missing features include:
+</p>
+<ul>
+<li> Creating a tree cursor and navigating the syntax tree with it.
+</li><li> Setting timeout and cancellation flag for a parser.
+</li><li> Setting the logger for a parser.
+</li><li> Printing a DOT graph of the syntax tree to a file.
+</li><li> Coping and modifying a syntax tree. (Emacs doesn&rsquo;t expose a tree
+object.)
+</li><li> Using (row, column) coordinates as position.
+</li><li> Updating a node with changes. (In Emacs, retrieve a new node instead
+of updating the existing one.)
+</li><li> Querying statics of a language definition.
+</li></ul>
+
+<p>In addition, Emacs makes some changes to the C API to make the API more
+convenient and idiomatic:
+</p>
+<ul>
+<li> Instead of using byte positions, the ELisp API uses character
+positions.
+</li><li> Null nodes are converted to nil.
+</li></ul>
+
+<p>Below is the correspondence between all C API functions and their
+ELisp counterparts. Sometimes one ELisp function corresponds to
+multiple C functions, and many C functions don&rsquo;t have an ELisp
+counterpart.
+</p>
+<div class="example">
+<pre class="example">ts_parser_new treesit-parser-create
+ts_parser_delete
+ts_parser_set_language
+ts_parser_language treesit-parser-language
+ts_parser_set_included_ranges treesit-parser-set-included-ranges
+ts_parser_included_ranges treesit-parser-included-ranges
+ts_parser_parse
+ts_parser_parse_string treesit-parse-string
+ts_parser_parse_string_encoding
+ts_parser_reset
+ts_parser_set_timeout_micros
+ts_parser_timeout_micros
+ts_parser_set_cancellation_flag
+ts_parser_cancellation_flag
+ts_parser_set_logger
+ts_parser_logger
+ts_parser_print_dot_graphs
+ts_tree_copy
+ts_tree_delete
+ts_tree_root_node
+ts_tree_language
+ts_tree_edit
+ts_tree_get_changed_ranges
+ts_tree_print_dot_graph
+ts_node_type treesit-node-type
+ts_node_symbol
+ts_node_start_byte treesit-node-start
+ts_node_start_point
+ts_node_end_byte treesit-node-end
+ts_node_end_point
+ts_node_string treesit-node-string
+ts_node_is_null
+ts_node_is_named treesit-node-check
+ts_node_is_missing treesit-node-check
+ts_node_is_extra treesit-node-check
+ts_node_has_changes treesit-node-check
+ts_node_has_error treesit-node-check
+ts_node_parent treesit-node-parent
+ts_node_child treesit-node-child
+ts_node_field_name_for_child treesit-node-field-name-for-child
+ts_node_child_count treesit-node-child-count
+ts_node_named_child treesit-node-child
+ts_node_named_child_count treesit-node-child-count
+ts_node_child_by_field_name treesit-node-by-field-name
+ts_node_child_by_field_id
+ts_node_next_sibling treesit-next-sibling
+ts_node_prev_sibling treesit-prev-sibling
+ts_node_next_named_sibling treesit-next-sibling
+ts_node_prev_named_sibling treesit-prev-sibling
+ts_node_first_child_for_byte treesit-first-child-for-pos
+ts_node_first_named_child_for_byte treesit-first-child-for-pos
+ts_node_descendant_for_byte_range treesit-descendant-for-range
+ts_node_descendant_for_point_range
+ts_node_named_descendant_for_byte_range treesit-descendant-for-range
+ts_node_named_descendant_for_point_range
+ts_node_edit
+ts_node_eq treesit-node-eq
+ts_tree_cursor_new
+ts_tree_cursor_delete
+ts_tree_cursor_reset
+ts_tree_cursor_current_node
+ts_tree_cursor_current_field_name
+ts_tree_cursor_current_field_id
+ts_tree_cursor_goto_parent
+ts_tree_cursor_goto_next_sibling
+ts_tree_cursor_goto_first_child
+ts_tree_cursor_goto_first_child_for_byte
+ts_tree_cursor_goto_first_child_for_point
+ts_tree_cursor_copy
+ts_query_new
+ts_query_delete
+ts_query_pattern_count
+ts_query_capture_count
+ts_query_string_count
+ts_query_start_byte_for_pattern
+ts_query_predicates_for_pattern
+ts_query_step_is_definite
+ts_query_capture_name_for_id
+ts_query_string_value_for_id
+ts_query_disable_capture
+ts_query_disable_pattern
+ts_query_cursor_new
+ts_query_cursor_delete
+ts_query_cursor_exec treesit-query-capture
+ts_query_cursor_did_exceed_match_limit
+ts_query_cursor_match_limit
+ts_query_cursor_set_match_limit
+ts_query_cursor_set_byte_range
+ts_query_cursor_set_point_range
+ts_query_cursor_next_match
+ts_query_cursor_remove_match
+ts_query_cursor_next_capture
+ts_language_symbol_count
+ts_language_symbol_name
+ts_language_symbol_for_name
+ts_language_field_count
+ts_language_field_name_for_id
+ts_language_field_id_for_name
+ts_language_symbol_type
+ts_language_version
+</pre></div>
+</div>
+<hr>
+<div class="header">
+<p>
+Previous: <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/Using-Parser.html b/admin/notes/tree-sitter/html-manual/Using-Parser.html
new file mode 100644
index 00000000000..438e3858f1b
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/Using-Parser.html
@@ -0,0 +1,186 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 6.8, https://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<!-- This is the GNU Emacs Lisp Reference Manual
+corresponding to Emacs version 29.0.50.
+
+Copyright © 1990-1996, 1998-2022 Free Software Foundation,
+Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being "GNU General Public License," with the
+Front-Cover Texts being "A GNU Manual," and with the Back-Cover
+Texts as in (a) below. A copy of the license is included in the
+section entitled "GNU Free Documentation License."
+
+(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
+modify this GNU manual. Buying copies from the FSF supports it in
+developing GNU and promoting software freedom." -->
+<title>Using Parser (GNU Emacs Lisp Reference Manual)</title>
+
+<meta name="description" content="Using Parser (GNU Emacs Lisp Reference Manual)">
+<meta name="keywords" content="Using Parser (GNU Emacs Lisp Reference Manual)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<link href="index.html" rel="start" title="Top">
+<link href="Index.html" rel="index" title="Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="Parsing-Program-Source.html" rel="up" title="Parsing Program Source">
+<link href="Retrieving-Node.html" rel="next" title="Retrieving Node">
+<link href="Language-Definitions.html" rel="prev" title="Language Definitions">
+<style type="text/css">
+<!--
+a.copiable-anchor {visibility: hidden; text-decoration: none; line-height: 0em}
+a.summary-letter {text-decoration: none}
+blockquote.indentedblock {margin-right: 0em}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+kbd {font-style: oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+span.nolinebreak {white-space: nowrap}
+span.roman {font-family: initial; font-weight: normal}
+span.sansserif {font-family: sans-serif; font-weight: normal}
+span:hover a.copiable-anchor {visibility: visible}
+ul.no-bullet {list-style: none}
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="./manual.css">
+
+
+</head>
+
+<body lang="en">
+<div class="section" id="Using-Parser">
+<div class="header">
+<p>
+Next: <a href="Retrieving-Node.html" accesskey="n" rel="next">Retrieving Node</a>, Previous: <a href="Language-Definitions.html" accesskey="p" rel="prev">Tree-sitter Language Definitions</a>, Up: <a href="Parsing-Program-Source.html" accesskey="u" rel="up">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<span id="Using-Tree_002dsitter-Parser"></span><h3 class="section">37.2 Using Tree-sitter Parser</h3>
+<span id="index-Tree_002dsitter-parser"></span>
+
+<p>This section described how to create and configure a tree-sitter
+parser. In Emacs, each tree-sitter parser is associated with a
+buffer. As we edit the buffer, the associated parser and the syntax
+tree is automatically kept up-to-date.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dmax_002dbuffer_002dsize"><span class="category">Variable: </span><span><strong>treesit-max-buffer-size</strong><a href='#index-treesit_002dmax_002dbuffer_002dsize' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This variable contains the maximum size of buffers in which
+tree-sitter can be activated. Major modes should check this value
+when deciding whether to enable tree-sitter features.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dcan_002denable_002dp"><span class="category">Function: </span><span><strong>treesit-can-enable-p</strong><a href='#index-treesit_002dcan_002denable_002dp' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function checks whether the current buffer is suitable for
+activating tree-sitter features. It basically checks
+<code>treesit-available-p</code> and <code>treesit-max-buffer-size</code>.
+</p></dd></dl>
+
+<span id="index-Creating-tree_002dsitter-parsers"></span>
+<dl class="def">
+<dt id="index-treesit_002dparser_002dcreate"><span class="category">Function: </span><span><strong>treesit-parser-create</strong> <em>language &amp;optional buffer no-reuse</em><a href='#index-treesit_002dparser_002dcreate' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>To create a parser, we provide a <var>buffer</var> and the <var>language</var>
+to use (see <a href="Language-Definitions.html">Tree-sitter Language Definitions</a>). If <var>buffer</var> is nil, the
+current buffer is used.
+</p>
+<p>By default, this function reuses a parser if one already exists for
+<var>language</var> in <var>buffer</var>, if <var>no-reuse</var> is non-nil, this
+function always creates a new parser.
+</p></dd></dl>
+
+<p>Given a parser, we can query information about it:
+</p>
+<dl class="def">
+<dt id="index-treesit_002dparser_002dbuffer"><span class="category">Function: </span><span><strong>treesit-parser-buffer</strong> <em>parser</em><a href='#index-treesit_002dparser_002dbuffer' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Returns the buffer associated with <var>parser</var>.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparser_002dlanguage"><span class="category">Function: </span><span><strong>treesit-parser-language</strong> <em>parser</em><a href='#index-treesit_002dparser_002dlanguage' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Returns the language that <var>parser</var> uses.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparser_002dp"><span class="category">Function: </span><span><strong>treesit-parser-p</strong> <em>object</em><a href='#index-treesit_002dparser_002dp' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Checks if <var>object</var> is a tree-sitter parser. Return non-nil if it
+is, return nil otherwise.
+</p></dd></dl>
+
+<p>There is no need to explicitly parse a buffer, because parsing is done
+automatically and lazily. A parser only parses when we query for a
+node in its syntax tree. Therefore, when a parser is first created,
+it doesn&rsquo;t parse the buffer; it waits until we query for a node for
+the first time. Similarly, when some change is made in the buffer, a
+parser doesn&rsquo;t re-parse immediately.
+</p>
+<span id="index-treesit_002dbuffer_002dtoo_002dlarge"></span>
+<p>When a parser do parse, it checks for the size of the buffer.
+Tree-sitter can only handle buffer no larger than about 4GB. If the
+size exceeds that, Emacs signals <code>treesit-buffer-too-large</code>
+with signal data being the buffer size.
+</p>
+<p>Once a parser is created, Emacs automatically adds it to the
+internal parser list. Every time a change is made to the buffer,
+Emacs updates parsers in this list so they can update their syntax
+tree incrementally.
+</p>
+<dl class="def">
+<dt id="index-treesit_002dparser_002dlist"><span class="category">Function: </span><span><strong>treesit-parser-list</strong> <em>&amp;optional buffer</em><a href='#index-treesit_002dparser_002dlist' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function returns the parser list of <var>buffer</var>. And
+<var>buffer</var> defaults to the current buffer.
+</p></dd></dl>
+
+<dl class="def">
+<dt id="index-treesit_002dparser_002ddelete"><span class="category">Function: </span><span><strong>treesit-parser-delete</strong> <em>parser</em><a href='#index-treesit_002dparser_002ddelete' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>This function deletes <var>parser</var>.
+</p></dd></dl>
+
+<span id="index-tree_002dsitter-narrowing"></span>
+<span id="tree_002dsitter-narrowing"></span><p>Normally, a parser &ldquo;sees&rdquo; the whole
+buffer, but when the buffer is narrowed (see <a href="Narrowing.html">Narrowing</a>), the
+parser will only see the visible region. As far as the parser can
+tell, the hidden region is deleted. And when the buffer is later
+widened, the parser thinks text is inserted in the beginning and in
+the end. Although parsers respect narrowing, narrowing shouldn&rsquo;t be
+the mean to handle a multi-language buffer; instead, set the ranges in
+which a parser should operate in. See <a href="Multiple-Languages.html">Parsing Text in Multiple Languages</a>.
+</p>
+<p>Because a parser parses lazily, when we narrow the buffer, the parser
+is not affected immediately; as long as we don&rsquo;t query for a node
+while the buffer is narrowed, the parser is oblivious of the
+narrowing.
+</p>
+<span id="index-tree_002dsitter-parse-string"></span>
+<dl class="def">
+<dt id="index-treesit_002dparse_002dstring"><span class="category">Function: </span><span><strong>treesit-parse-string</strong> <em>string language</em><a href='#index-treesit_002dparse_002dstring' class='copiable-anchor'> &para;</a></span></dt>
+<dd><p>Besides creating a parser for a buffer, we can also just parse a
+string. Unlike a buffer, parsing a string is a one-time deal, and
+there is no way to update the result.
+</p>
+<p>This function parses <var>string</var> with <var>language</var>, and returns the
+root node of the generated syntax tree.
+</p></dd></dl>
+
+</div>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Retrieving-Node.html">Retrieving Node</a>, Previous: <a href="Language-Definitions.html">Tree-sitter Language Definitions</a>, Up: <a href="Parsing-Program-Source.html">Parsing Program Source</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Index.html" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/admin/notes/tree-sitter/html-manual/build-manual.sh b/admin/notes/tree-sitter/html-manual/build-manual.sh
new file mode 100755
index 00000000000..adde3f2a2af
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/build-manual.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+MANUAL_DIR="../../../doc/lispref"
+THIS_DIR=$(pwd)
+
+echo "Build manual"
+cd "${MANUAL_DIR}"
+make elisp.html HTML_OPTS="--html --css-ref=./manual.css"
+
+cd "${THIS_DIR}"
+
+echo "Copy manual"
+cp -f "${MANUAL_DIR}/elisp.html/Parsing-Program-Source.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Language-Definitions.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Using-Parser.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Retrieving-Node.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Accessing-Node.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Pattern-Matching.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Multiple-Languages.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Tree_002dsitter-C-API.html" .
+
+cp -f "${MANUAL_DIR}/elisp.html/Parser_002dbased-Font-Lock.html" .
+cp -f "${MANUAL_DIR}/elisp.html/Parser_002dbased-Indentation.html" .
diff --git a/admin/notes/tree-sitter/html-manual/manual.css b/admin/notes/tree-sitter/html-manual/manual.css
new file mode 100644
index 00000000000..5a6790a3458
--- /dev/null
+++ b/admin/notes/tree-sitter/html-manual/manual.css
@@ -0,0 +1,374 @@
+/* Style-sheet to use for Emacs manuals */
+
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved. This file is offered as-is,
+without any warranty.
+*/
+
+/* style.css begins here */
+
+/* This stylesheet is used by manuals and a few older resources. */
+
+/* reset.css begins here */
+
+/*
+Software License Agreement (BSD License)
+
+Copyright (c) 2006, Yahoo! Inc.
+All rights reserved.
+
+Redistribution and use of this software in source and
+binary forms, with or without modification, arepermitted
+provided that the following conditions are met:
+
+* Redistributions of source code must retain the above
+copyright notice, this list of conditions and the
+following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the
+following disclaimer in the documentation and/or other
+materials provided with the distribution.
+
+* Neither the name of Yahoo! Inc. nor the names of its
+contributors may be used to endorse or promote products
+derived from this software without specific prior
+written permission of Yahoo! Inc.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+
+html {
+ color: #000;
+ background: #FFF;
+}
+
+body, div, dl, dt, dd, ul, ol, li, h1, h2, h3, h4,
+h5, h6, pre, code, form, fieldset, legend, input,
+button, textarea, p, blockquote, th, td {
+ margin: 0;
+ padding: 0;
+}
+
+table {
+ border-collapse: collapse;
+ border-spacing: 0;
+}
+
+fieldset, img {
+ border: 0;
+}
+
+address, caption, cite, code, dfn, em, strong,
+th, var, optgroup {
+ font-style: inherit;
+ font-weight: inherit;
+}
+
+del, ins {
+ text-decoration: none;
+}
+
+li {
+ list-style:none;
+}
+
+caption, th {
+ text-align: left;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ font-size: 100%;
+ font-weight: normal;
+}
+
+q:before, q:after {
+ content:'';
+}
+
+abbr, acronym {
+ border: 0;
+ font-variant: normal;
+}
+
+sup {
+ vertical-align: baseline;
+}
+sub {
+ vertical-align: baseline;
+}
+
+legend {
+ color: #000;
+}
+
+input, button, textarea, select, optgroup, option {
+ font-family: inherit;
+ font-size: inherit;
+ font-style: inherit;
+ font-weight: inherit;
+}
+
+input, button, textarea, select {
+ *font-size: 100%;
+}
+
+
+/* reset.css ends here */
+
+/*** PAGE LAYOUT ***/
+
+html, body {
+ font-size: 1em;
+ text-align: left;
+ text-decoration: none;
+}
+html { background-color: #e7e7e7; }
+
+body {
+ max-width: 74.92em;
+ margin: 0 auto;
+ padding: .5em 1em 1em 1em;
+ background-color: white;
+ border: .1em solid #c0c0c0;
+}
+
+
+/*** BASIC ELEMENTS ***/
+
+/* Size and positioning */
+
+p, pre, li, dt, dd, table, code, address { line-height: 1.3em; }
+
+h1 { font-size: 2em; margin: 1em 0 }
+h2 { font-size: 1.50em; margin: 1.0em 0 0.87em 0; }
+h3 { font-size: 1.30em; margin: 1.0em 0 0.87em 0; }
+h4 { font-size: 1.13em; margin: 1.0em 0 0.88em 0; }
+h5 { font-size: 1.00em; margin: 1.0em 0 1.00em 0; }
+
+p, pre { margin: 1em 0; }
+pre { overflow: auto; padding-bottom: .3em; }
+
+ul, ol, blockquote { margin-left: 1.5%; margin-right: 1.5%; }
+hr { margin: 1em 0; }
+/* Lists of underlined links are difficult to read. The top margin
+ gives a little more spacing between entries. */
+ul li { margin: .5em 1em; }
+ol li { margin: 1em; }
+ol ul li { margin: .5em 1em; }
+ul li p, ul ul li { margin-top: .3em; margin-bottom: .3em; }
+ul ul, ol ul { margin-top: 0; margin-bottom: 0; }
+
+/* Separate description lists from preceding text */
+dl { margin: 1em 0 0 0; }
+/* separate the "term" from subsequent "description" */
+dt { margin: .5em 0; }
+/* separate the "description" from subsequent list item
+ when the final <dd> child is an anonymous box */
+dd { margin: .5em 3% 1em 3%; }
+/* separate anonymous box (used to be the first element in <dd>)
+ from subsequent <p> */
+dd p { margin: .5em 0; }
+
+table {
+ display: block; overflow: auto;
+ margin-top: 1.5em; margin-bottom: 1.5em;
+}
+th { padding: .3em .5em; text-align: center; }
+td { padding: .2em .5em; }
+
+address { margin-bottom: 1em; }
+caption { margin-bottom: .5em; text-align: center; }
+sup { vertical-align: super; }
+sub { vertical-align: sub; }
+
+/* Style */
+
+h1, h2, h3, h4, h5, h6, strong, dt, th { font-weight: bold; }
+
+/* The default color (black) is too dark for large text in
+ bold font. */
+h1, h2, h3, h4 { color: #333; }
+h5, h6, dt { color: #222; }
+
+a[href] { color: #005090; }
+a[href]:visited { color: #100070; }
+a[href]:active, a[href]:hover {
+ color: #100070;
+ text-decoration: none;
+}
+
+h1 a[href]:visited, h2 a[href]:visited, h3 a[href]:visited,
+h4 a[href]:visited { color: #005090; }
+h1 a[href]:hover, h2 a[href]:hover, h3 a[href]:hover,
+h4 a[href]:hover { color: #100070; }
+
+ol { list-style: decimal outside;}
+ul { list-style: square outside; }
+ul ul, ol ul { list-style: circle; }
+li { list-style: inherit; }
+
+hr { background-color: #ede6d5; }
+table { border: 0; }
+
+abbr,acronym {
+ border-bottom:1px dotted #000;
+ text-decoration: none;
+ cursor:help;
+}
+del { text-decoration: line-through; }
+em { font-style: italic; }
+small { font-size: .9em; }
+
+img { max-width: 100%}
+
+
+/*** SIMPLE CLASSES ***/
+
+.center, .c { text-align: center; }
+.nocenter{ text-align: left; }
+
+.underline { text-decoration: underline; }
+.nounderline { text-decoration: none; }
+
+.no-bullet { list-style: none; }
+.inline-list li { display: inline }
+
+.netscape4, .no-display { display: none; }
+
+
+/*** MANUAL PAGES ***/
+
+/* This makes the very long tables of contents in Gnulib and other
+ manuals easier to read. */
+.contents ul, .shortcontents ul { font-weight: bold; }
+.contents ul ul, .shortcontents ul ul { font-weight: normal; }
+.contents ul { list-style: none; }
+
+/* For colored navigation bars (Emacs manual): make the bar extend
+ across the whole width of the page and give it a decent height. */
+.header, .node { margin: 0 -1em; padding: 0 1em; }
+.header p, .node p { line-height: 2em; }
+
+/* For navigation links */
+.node a, .header a { display: inline-block; line-height: 2em; }
+.node a:hover, .header a:hover { background: #f2efe4; }
+
+/* Inserts */
+table.cartouche td { padding: 1.5em; }
+
+div.display, div.lisp, div.smalldisplay,
+div.smallexample, div.smalllisp { margin-left: 3%; }
+
+div.example { padding: .8em 1.2em .4em; }
+pre.example { padding: .8em 1.2em; }
+div.example, pre.example {
+ margin: 1em 0 1em 3% ;
+ -webkit-border-radius: .3em;
+ -moz-border-radius: .3em;
+ border-radius: .3em;
+ border: 1px solid #d4cbb6;
+ background-color: #f2efe4;
+}
+div.example > pre.example {
+ padding: 0 0 .4em;
+ margin: 0;
+ border: none;
+}
+
+pre.menu-comment { padding-top: 1.3em; margin: 0; }
+
+
+/*** FOR WIDE SCREENS ***/
+
+@media (min-width: 40em) {
+ body { padding: .5em 3em 1em 3em; }
+ div.header, div.node { margin: 0 -3em; padding: 0 3em; }
+}
+
+/* style.css ends here */
+
+/* makeinfo convert @deffn and similar functions to something inside
+ <blockquote>. style.css uses italic for blockquote. This looks poor
+ in the Emacs manuals, which make extensive use of @defun (etc).
+ In particular, references to function arguments appear as <var>
+ inside <blockquote>. Since <var> is also italic, it makes it
+ impossible to distinguish variables. We could change <var> to
+ e.g. bold-italic, or normal, or a different color, but that does
+ not look as good IMO. So we just override blockquote to be non-italic.
+ */
+blockquote { font-style: normal; }
+
+var { font-style: italic; }
+
+div.header {
+ background-color: #DDDDFF;
+ padding-top: 0.2em;
+}
+
+
+/*** Customization ***/
+
+body {
+ font-family: Charter, serif;
+ font-size: 14pt;
+ line-height: 1.4;
+ background-color: #fefefc;
+ color: #202010;
+}
+
+pre.menu-comment {
+ font-family: Charter, serif;
+ font-size: 14pt;
+}
+
+body > *, body > div.display, body > div.lisp, body > div.smalldisplay,
+body > div.example, body > div.smallexample, body > div.smalllisp {
+ width: 700px;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+div.header {
+ width: 100%;
+ min-height: 3em;
+ font-size: 13pt;
+}
+
+/* Documentation block for functions and variables. Make then
+ narrower*/
+dd {
+ margin: .5em 6% 1em 6%
+}
+
+code, pre, kbd, samp, tt {
+ font-size: 12pt;
+ font-family: monospace;
+}
+
+/* In each node we have index table to all sub-nodes. Make more space
+ for the first column, which is the name to each sub-node. */
+table.menu tbody tr td:nth-child(1) {
+ white-space: nowrap;
+}
+
+div.header p {
+ text-align: center;
+ margin: 0.5em auto 0.5em auto;
+}
diff --git a/admin/notes/tree-sitter/starter-guide b/admin/notes/tree-sitter/starter-guide
new file mode 100644
index 00000000000..6cf8cf8a236
--- /dev/null
+++ b/admin/notes/tree-sitter/starter-guide
@@ -0,0 +1,442 @@
+STARTER GUIDE ON WRITTING MAJOR MODE WITH TREE-SITTER -*- org -*-
+
+This document guides you on adding tree-sitter support to a major
+mode.
+
+TOC:
+
+- Building Emacs with tree-sitter
+- Install language definitions
+- Setup
+- Font-lock
+- Indent
+- Imenu
+- Navigation
+- Which-func
+- More features?
+- Common tasks (code snippets)
+- Manual
+
+* Building Emacs with tree-sitter
+
+You can either install tree-sitter by your package manager, or from
+source:
+
+ git clone https://github.com/tree-sitter/tree-sitter.git
+ cd tree-sitter
+ make
+ make install
+
+Then pull the tree-sitter branch (or the master branch, if it has
+merged) and rebuild Emacs.
+
+* Install language definitions
+
+Tree-sitter by itself doesn’t know how to parse any particular
+language. We need to install language definitions (or “grammars”) for
+a language to be able to parse it. There are a couple of ways to get
+them.
+
+You can use this script that I put together here:
+
+ https://github.com/casouri/tree-sitter-module
+
+You can also find them under this directory in /build-modules.
+
+This script automatically pulls and builds language definitions for C,
+C++, Rust, JSON, Go, HTML, Javascript, CSS, Python, Typescript,
+and C#. Better yet, I pre-built these language definitions for
+GNU/Linux and macOS, they can be downloaded here:
+
+ https://github.com/casouri/tree-sitter-module/releases/tag/v2.1
+
+To build them yourself, run
+
+ git clone git@github.com:casouri/tree-sitter-module.git
+ cd tree-sitter-module
+ ./batch.sh
+
+and language definitions will be in the /dist directory. You can
+either copy them to standard dynamic library locations of your system,
+eg, /usr/local/lib, or leave them in /dist and later tell Emacs where
+to find language definitions by setting ‘treesit-extra-load-path’.
+
+Language definition sources can be found on GitHub under
+tree-sitter/xxx, like tree-sitter/tree-sitter-python. The tree-sitter
+organization has all the "official" language definitions:
+
+ https://github.com/tree-sitter
+
+* Setting up for adding major mode features
+
+Start Emacs, and load tree-sitter with
+
+ (require 'treesit)
+
+Now check if Emacs is built with tree-sitter library
+
+ (treesit-available-p)
+
+For your major mode, first create a tree-sitter switch:
+
+#+begin_src elisp
+(defcustom python-use-tree-sitter nil
+ "If non-nil, `python-mode' tries to use tree-sitter.
+Currently `python-mode' can utilize tree-sitter for font-locking,
+imenu, and movement functions."
+ :type 'boolean)
+#+end_src
+
+Then in other places, we decide on whether to enable tree-sitter by
+
+#+begin_src elisp
+(and python-use-tree-sitter
+ (treesit-can-enable-p))
+#+end_src
+
+* Font-lock
+
+Tree-sitter works like this: You provide a query made of patterns and
+capture names, tree-sitter finds the nodes that match these patterns,
+tag the corresponding capture names onto the nodes and return them to
+you. The query function returns a list of (capture-name . node). For
+font-lock, we use face names as capture names. And the captured node
+will be fontified in their capture name. The capture name could also
+be a function, in which case (START END NODE) is passed to the
+function for font-lock. START and END is the start and end the
+captured NODE.
+
+** Query syntax
+
+There are two types of nodes, named, like (identifier),
+(function_definition), and anonymous, like "return", "def", "(",
+"}". Parent-child relationship is expressed as
+
+ (parent (child) (child) (child (grand_child)))
+
+Eg, an argument list (1, "3", 1) could be:
+
+ (argument_list "(" (number) (string) (number) ")")
+
+Children could have field names in its parent:
+
+ (function_definition name: (identifier) type: (identifier))
+
+Match any of the list:
+
+ ["true" "false" "none"]
+
+Capture names can come after any node in the pattern:
+
+ (parent (child) @child) @parent
+
+The query above captures both parent and child.
+
+ ["return" "continue" "break"] @keyword
+
+The query above captures all the keywords with capture name
+"keyword".
+
+These are the common syntax, see all of them in the manual
+("Parsing Program Source" section).
+
+** Query references
+
+But how do one come up with the queries? Take python for an
+example, open any python source file, evaluate
+
+ (treesit-parser-create 'python)
+
+so there is a parser available, then enable ‘treesit-inspect-mode’.
+Now you should see information of the node under point in
+mode-line. Move around and you should be able to get a good
+picture. Besides this, you can consult the grammar of the language
+definition. For example, Python’s grammar file is at
+
+ https://github.com/tree-sitter/tree-sitter-python/blob/master/grammar.js
+
+Neovim also has a bunch of queries to reference:
+
+ https://github.com/nvim-treesitter/nvim-treesitter/tree/master/queries
+
+The manual explains how to read grammar files in the bottom of section
+"Tree-sitter Language Definitions".
+
+** Debugging queires
+
+If your query has problems, it usually cannot compile. In that case
+use ‘treesit-query-validate’ to debug the query. It will pop a buffer
+containing the query (in text format) and mark the offending part in
+red.
+
+** Code
+
+To enable tree-sitter font-lock, set ‘treesit-font-lock-settings’
+buffer-locally and call ‘treesit-font-lock-enable’. For example, see
+‘python--treesit-settings’ in python.el. Below I paste a snippet of
+it.
+
+Note that like the current font-lock, if the to-be-fontified region
+already has a face (ie, an earlier match fontified part/all of the
+region), the new face is discarded rather than applied. If you want
+later matches always override earlier matches, use the :override
+keyword.
+
+#+begin_src elisp
+(defvar python--treesit-settings
+ (treesit-font-lock-rules
+ :language 'python
+ :override t
+ `(;; Queries for def and class.
+ (function_definition
+ name: (identifier) @font-lock-function-name-face)
+
+ (class_definition
+ name: (identifier) @font-lock-type-face)
+
+ ;; Comment and string.
+ (comment) @font-lock-comment-face
+
+ ...)))
+#+end_src
+
+Then in ‘python-mode’, enable tree-sitter font-lock:
+
+#+begin_src elisp
+(treesit-parser-create 'python)
+;; This turns off the syntax-based font-lock for comments and
+;; strings. So it doesn’t override tree-sitter’s fontification.
+(setq-local font-lock-keywords-only t)
+(setq-local treesit-font-lock-settings
+ python--treesit-settings)
+(treesit-font-lock-enable)
+#+end_src
+
+Concretely, something like this:
+
+#+begin_src elisp
+(define-derived-mode python-mode prog-mode "Python"
+ ...
+
+ (treesit-parser-create 'python)
+
+ (if (and python-use-tree-sitter
+ (treesit-can-enable-p))
+ ;; Tree-sitter.
+ (progn
+ (setq-local font-lock-keywords-only t)
+ (setq-local treesit-font-lock-settings
+ python--treesit-settings)
+ (treesit-font-lock-enable))
+ ;; No tree-sitter
+ (setq-local font-lock-defaults ...))
+
+ ...)
+#+end_src
+
+You’ll notice that tree-sitter’s font-lock doesn’t respect
+‘font-lock-maximum-decoration’, major modes are free to set
+‘treesit-font-lock-settings’ based on the value of
+‘font-lock-maximum-decoration’, or provide more fine-grained control
+through other mode-specific means.
+
+* Indent
+
+Indent works like this: We have a bunch of rules that look like this:
+
+ (MATCHER ANCHOR OFFSET)
+
+At the beginning point is at the BOL of a line, we want to know which
+column to indent this line to. Let NODE be the node at point, we pass
+this node to the MATCHER of each rule, one of them will match the node
+("this node is a closing bracket!"). Then we pass the node to the
+ANCHOR, which returns a point, eg, the BOL of the previous line. We
+find the column number of that point (eg, 4), add OFFSET to it (eg,
+0), and that is the column we want to indent the current line to (4 +
+0 = 4).
+
+For MATHCER we have
+
+ (parent-is TYPE)
+ (node-is TYPE)
+ (query QUERY) => matches if querying PARENT with QUERY
+ captures NODE.
+
+ (match NODE-TYPE PARENT-TYPE NODE-FIELD
+ NODE-INDEX-MIN NODE-INDEX-MAX)
+
+ => checks everything. If an argument is nil, don’t match that. Eg,
+ (match nil nil TYPE) is the same as (parent-is TYPE)
+
+For ANCHOR we have
+
+ first-sibling => start of the first sibling
+ parent => start of parent
+ parent-bol => BOL of the line parent is on.
+ prev-sibling
+ no-indent => don’t indent
+ prev-line => same indent as previous line
+
+There is also a manual section for indent: "Parser-based Indentation".
+
+When writing indent rules, you can use ‘treesit-check-indent’ to
+check if your indentation is correct. To debug what went wrong, set
+‘treesit--indent-verboase’ to non-nil. Then when you indent, Emacs
+tells you which rule is applied in the echo area.
+
+#+begin_src elisp
+(defvar typescript-mode-indent-rules
+ (let ((offset typescript-indent-offset))
+ `((typescript
+ ;; This rule matches if node at point is "}", ANCHOR is the
+ ;; parent node’s BOL, and offset is 0.
+ ((node-is "}") parent-bol 0)
+ ((node-is ")") parent-bol 0)
+ ((node-is "]") parent-bol 0)
+ ((node-is ">") parent-bol 0)
+ ((node-is ".") parent-bol ,offset)
+ ((parent-is "ternary_expression") parent-bol ,offset)
+ ((parent-is "named_imports") parent-bol ,offset)
+ ((parent-is "statement_block") parent-bol ,offset)
+ ((parent-is "type_arguments") parent-bol ,offset)
+ ((parent-is "variable_declarator") parent-bol ,offset)
+ ((parent-is "arguments") parent-bol ,offset)
+ ((parent-is "array") parent-bol ,offset)
+ ((parent-is "formal_parameters") parent-bol ,offset)
+ ((parent-is "template_substitution") parent-bol ,offset)
+ ((parent-is "object_pattern") parent-bol ,offset)
+ ((parent-is "object") parent-bol ,offset)
+ ((parent-is "object_type") parent-bol ,offset)
+ ((parent-is "enum_body") parent-bol ,offset)
+ ((parent-is "arrow_function") parent-bol ,offset)
+ ((parent-is "parenthesized_expression") parent-bol ,offset)
+ ...))))
+#+end_src
+
+Then you set ‘treesit-simple-indent-rules’ to your rules, and set
+‘indent-line-function’:
+
+#+begin_src elisp
+(setq-local treesit-simple-indent-rules typescript-mode-indent-rules)
+(setq-local indent-line-function #'treesit-indent)
+#+end_src
+
+* Imenu
+
+Not much to say except for utilizing ‘treesit-induce-sparse-tree’.
+See ‘python--imenu-treesit-create-index-1’ in python.el for an
+example.
+
+Once you have the index builder, set ‘imenu-create-index-function’.
+
+* Navigation
+
+Mainly ‘beginning-of-defun-function’ and ‘end-of-defun-function’.
+You can find the end of a defun with something like
+
+(treesit-search-forward-goto "function_definition" 'end)
+
+where "function_definition" matches the node type of a function
+definition node, and ’end means we want to go to the end of that
+node.
+
+Something like this should suffice:
+
+#+begin_src elisp
+(defun xxx-beginning-of-defun (&optional arg)
+ (if (> arg 0)
+ ;; Go backward.
+ (while (and (> arg 0)
+ (treesit-search-forward-goto
+ "function_definition" 'start nil t))
+ (setq arg (1- arg)))
+ ;; Go forward.
+ (while (and (< arg 0)
+ (treesit-search-forward-goto
+ "function_definition" 'start))
+ (setq arg (1+ arg)))))
+
+(setq-local beginning-of-defun-function #'xxx-beginning-of-defun)
+#+end_src
+
+And the same for end-of-defun.
+
+* Which-func
+
+You can find the current function by going up the tree and looking for
+the function_definition node. See ‘python-info-treesit-current-defun’
+in python.el for an example. Since Python allows nested function
+definitions, that function keeps going until it reaches the root node,
+and records all the function names along the way.
+
+#+begin_src elisp
+(defun python-info-treesit-current-defun (&optional include-type)
+ "Identical to `python-info-current-defun' but use tree-sitter.
+For INCLUDE-TYPE see `python-info-current-defun'."
+ (let ((node (treesit-node-at (point)))
+ (name-list ())
+ (type nil))
+ (cl-loop while node
+ if (pcase (treesit-node-type node)
+ ("function_definition"
+ (setq type 'def))
+ ("class_definition"
+ (setq type 'class))
+ (_ nil))
+ do (push (treesit-node-text
+ (treesit-node-child-by-field-name node "name")
+ t)
+ name-list)
+ do (setq node (treesit-node-parent node))
+ finally return (concat (if include-type
+ (format "%s " type)
+ "")
+ (string-join name-list ".")))))
+#+end_src
+
+* More features?
+
+Obviously this list is just a starting point, if there are features in
+the major mode that would benefit a parse tree, adding tree-sitter
+support for that would be great. But in the minimal case, just adding
+font-lock is awesome.
+
+* Common tasks
+
+How to...
+
+** Get the buffer text corresponding to a node?
+
+(treesit-node-text node)
+
+BTW ‘treesit-node-string’ does different things.
+
+** Scan the whole tree for stuff?
+
+(treesit-search-subtree)
+(treesit-search-forward)
+(treesit-induce-sparse-tree)
+
+** Move to next node that...?
+
+(treesit-search-forward-goto)
+
+** Get the root node?
+
+(treesit-buffer-root-node)
+
+** Get the node at point?
+
+(treesit-node-at (point))
+
+* Manual
+
+I suggest you read the manual section for tree-sitter in Info. The
+section is Parsing Program Source. Typing
+
+ C-h i d m elisp RET g Parsing Program Source RET
+
+will bring you to that section. You can also read the HTML version
+under /html-manual in this directory. I find the HTML version easier
+to read. You don’t need to read through every sentence, just read the
+text paragraphs and glance over function names.
diff --git a/configure.ac b/configure.ac
index 2d843440503..7d751fd6a81 100644
--- a/configure.ac
+++ b/configure.ac
@@ -463,6 +463,7 @@ OPTION_DEFAULT_ON([xml2],[don't compile with XML parsing support])
OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support])
OPTION_DEFAULT_ON([native-image-api], [don't use native image APIs (GDI+ on Windows)])
OPTION_DEFAULT_IFAVAILABLE([json], [compile with native JSON support])
+OPTION_DEFAULT_IFAVAILABLE([tree-sitter], [compile with tree-sitter])
OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts])
OPTION_DEFAULT_ON([harfbuzz],[don't use HarfBuzz for text shaping])
@@ -3210,6 +3211,26 @@ AC_SUBST([JSON_LIBS])
AC_SUBST([JSON_CFLAGS])
AC_SUBST([JSON_OBJ])
+HAVE_TREE_SITTER=no
+TREE_SITTER_OBJ=
+
+if test "${with_tree_sitter}" != "no"; then
+ dnl TODO: we should use tree-sitter >= 0.20.2, but right now all
+ dnl tree-sitter libraries distributed are versioned at 0.0, so for
+ dnl the easy of development we'll just leave the version
+ dnl requirement at 0.0 for now.
+ EMACS_CHECK_MODULES([TREE_SITTER], [tree-sitter >= 0.0],
+ [HAVE_TREE_SITTER=yes], [HAVE_TREE_SITTER=no])
+ if test "${HAVE_TREE_SITTER}" = yes; then
+ AC_DEFINE(HAVE_TREE_SITTER, 1, [Define if using tree-sitter.])
+ TREE_SITTER_OBJ="treesit.o"
+ fi
+fi
+
+AC_SUBST(TREE_SITTER_LIBS)
+AC_SUBST(TREE_SITTER_CFLAGS)
+AC_SUBST(TREE_SITTER_OBJ)
+
NOTIFY_OBJ=
NOTIFY_SUMMARY=no
@@ -4080,20 +4101,31 @@ if test "${HAVE_ZLIB}" = "yes"; then
fi
AC_SUBST([LIBZ])
+### Dynamic library support
+case $opsys in
+ cygwin|mingw32) DYNAMIC_LIB_SUFFIX=".dll" ;;
+ darwin) DYNAMIC_LIB_SUFFIX=".dylib" ;;
+ *) DYNAMIC_LIB_SUFFIX=".so" ;;
+esac
+case "${opsys}" in
+ darwin) DYNAMIC_LIB_SECONDARY_SUFFIX='.so' ;;
+ *) DYNAMIC_LIB_SECONDARY_SUFFIX='' ;;
+esac
+AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SUFFIX, "$DYNAMIC_LIB_SUFFIX",
+ [System extension for dynamic libraries])
+AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SECONDARY_SUFFIX, "$DYNAMIC_LIB_SECONDARY_SUFFIX",
+ [Alternative system extension for dynamic libraries.])
+
+AC_SUBST(DYNAMIC_LIB_SUFFIX)
+AC_SUBST(DYNAMIC_LIB_SECONDARY_SUFFIX)
+
### Dynamic modules support
LIBMODULES=
HAVE_MODULES=no
MODULES_OBJ=
NEED_DYNLIB=no
-case $opsys in
- cygwin|mingw32) MODULES_SUFFIX=".dll" ;;
- darwin) MODULES_SUFFIX=".dylib" ;;
- *) MODULES_SUFFIX=".so" ;;
-esac
-case "${opsys}" in
- darwin) MODULES_SECONDARY_SUFFIX='.so' ;;
- *) MODULES_SECONDARY_SUFFIX='' ;;
-esac
+MODULES_SUFFIX="${DYNAMIC_LIB_SUFFIX}"
+MODULES_SECONDARY_SUFFIX="${DYNAMIC_LIB_SECONDARY_SUFFIX}"
# pgtkterm.c uses dlsym
if test $window_system = pgtk; then
@@ -4510,6 +4542,12 @@ case $with_json,$HAVE_JSON in
*) MISSING="$MISSING json"
WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-json=ifavailable";;
esac
+case $with_tree_sitter,$HAVE_TREE_SITTER in
+ no,* | ifavailable,* | *,yes) ;;
+ *) MISSING="$MISSING tree-sitter"
+ WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-tree-sitter=ifavailable";;
+esac
+
if test "X${MISSING}" != X; then
# If we have a missing library, and we don't have pkg-config installed,
# the missing pkg-config may be the reason. Give the user a hint.
@@ -6550,7 +6588,7 @@ Configured for '${canonical}'.
optsep=
emacs_config_features=
for opt in ACL BE_APP CAIRO DBUS FREETYPE GCONF GIF GLIB GMP GNUTLS GPM GSETTINGS \
- HARFBUZZ IMAGEMAGICK JPEG JSON LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \
+ HARFBUZZ IMAGEMAGICK JPEG JSON TREE-SITTER LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \
M17N_FLT MODULES NATIVE_COMP NOTIFY NS OLDXMENU PDUMPER PGTK PNG RSVG SECCOMP \
SOUND SQLITE3 THREADS TIFF TOOLKIT_SCROLL_BARS \
UNEXEC WEBP X11 XAW3D XDBE XFT XIM XINPUT2 XPM XWIDGETS X_TOOLKIT \
@@ -6621,6 +6659,7 @@ AS_ECHO([" Does Emacs use -lXaw3d? ${HAVE_XAW3D
Does Emacs use -lxft? ${HAVE_XFT}
Does Emacs use -lsystemd? ${HAVE_LIBSYSTEMD}
Does Emacs use -ljansson? ${HAVE_JSON}
+ Does Emacs use -ltree-sitter? ${HAVE_TREE_SITTER}
Does Emacs use the GMP library? ${HAVE_GMP}
Does Emacs directly use zlib? ${HAVE_ZLIB}
Does Emacs have dynamic modules support? ${HAVE_MODULES}
diff --git a/doc/lispref/elisp.texi b/doc/lispref/elisp.texi
index a3d1d804086..09e7aad714e 100644
--- a/doc/lispref/elisp.texi
+++ b/doc/lispref/elisp.texi
@@ -222,6 +222,7 @@ To view this manual in other formats, click
* Non-ASCII Characters:: Non-ASCII text in buffers and strings.
* Searching and Matching:: Searching buffers for strings or regexps.
* Syntax Tables:: The syntax table controls word and list parsing.
+* Parsing Program Source:: Generate syntax tree for program sources.
* Abbrevs:: How Abbrev mode works, and its data structures.
* Threads:: Concurrency in Emacs Lisp.
@@ -1359,6 +1360,16 @@ Syntax Tables
* Syntax Table Internals:: How syntax table information is stored.
* Categories:: Another way of classifying character syntax.
+Parsing Program Source
+
+* Language Definitions:: Loading tree-sitter language definitions.
+* Using Parser:: Introduction to parsers.
+* Retrieving Node:: Retrieving node from syntax tree.
+* Accessing Node:: Accessing node information.
+* Pattern Matching:: Pattern matching with query patterns.
+* Multiple Languages:: Parse text written in multiple languages.
+* Tree-sitter C API:: Compare the C API and the ELisp API.
+
Syntax Descriptors
* Syntax Class Table:: Table of syntax classes.
@@ -1703,6 +1714,7 @@ Object Internals
@include searching.texi
@include syntax.texi
+@include parsing.texi
@include abbrevs.texi
@include threads.texi
@include processes.texi
diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi
index 75eb21522f1..883f9d8491f 100644
--- a/doc/lispref/modes.texi
+++ b/doc/lispref/modes.texi
@@ -2851,11 +2851,13 @@ mode; most major modes define syntactic criteria for which faces to use
in which contexts. This section explains how to customize Font Lock for
a particular major mode.
- Font Lock mode finds text to highlight in two ways: through
-syntactic parsing based on the syntax table, and through searching
-(usually for regular expressions). Syntactic fontification happens
-first; it finds comments and string constants and highlights them.
-Search-based fontification happens second.
+ Font Lock mode finds text to highlight in three ways: through
+syntactic parsing based on the syntax table, through searching
+(usually for regular expressions), and through parsing based on a
+full-blown parser. Syntactic fontification happens first; it finds
+comments and string constants and highlights them. Search-based
+fontification happens second. Parser-based fontification can be
+optionally enabled and it will precede the other two fontifications.
@menu
* Font Lock Basics:: Overview of customizing Font Lock.
@@ -2870,6 +2872,7 @@ Search-based fontification happens second.
* Syntactic Font Lock:: Fontification based on syntax tables.
* Multiline Font Lock:: How to coerce Font Lock into properly
highlighting multiline constructs.
+* Parser-based Font Lock:: Use a parser for fontification.
@end menu
@node Font Lock Basics
@@ -3873,6 +3876,94 @@ Since this function is called after every buffer change, it should be
reasonably fast.
@end defvar
+@node Parser-based Font Lock
+@subsection Parser-based Font Lock
+
+@c This node is written when the only parser Emacs has is tree-sitter,
+@c if in the future more parser are supported, feel free to reorganize
+@c and rewrite this node to describe multiple parsers in parallel.
+
+Besides simple syntactic font lock and regexp-based font lock, Emacs
+also provides complete syntactic font lock with the help of a parser,
+currently provided by the tree-sitter library (@pxref{Parsing Program
+Source}).
+
+@defun treesit-font-lock-enable
+This function enables parser-based font lock in the current buffer.
+@end defun
+
+Parser-based font lock and other font lock mechanism are not mutually
+exclusive. By default, if enabled, parser-based font lock runs first,
+then the simple syntactic font lock (if enabled), then regexp-based
+font lock.
+
+Although parser-based font lock doesn't share the same customization
+variables with regexp-based font lock, parser-based font lock uses
+similar customization schemes. The tree-sitter counterpart of
+@var{font-lock-keywords} is @var{treesit-font-lock-settings}.
+
+@defun treesit-font-lock-rules :keyword value query...
+This function is used to set @var{treesit-font-lock-settings}. It
+takes care of compiling queries and other post-processing and outputs
+a value that @var{treesit-font-lock-settings} accepts. An example:
+
+@example
+@group
+(treesit-font-lock-rules
+ :language 'javascript
+ :override t
+ '((true) @@font-lock-constant-face
+ (false) @@font-lock-constant-face)
+ :language 'html
+ "(script_element) @@font-lock-builtin-face")
+@end group
+@end example
+
+This function takes a list of text or s-exp queries. Before each
+query, there are @var{:keyword} and @var{value} pairs that configure
+that query. The @code{:lang} keyword sets the query’s language and
+every query must specify the language. Other keywords are optional:
+
+@multitable @columnfractions .15 .15 .6
+@headitem Keyword @tab Value @tab Description
+@item @code{:override} @tab nil
+@tab If the region already has a face, discard the new face
+@item @tab t @tab Always apply the new face
+@item @tab @code{append} @tab Append the new face to existing ones
+@item @tab @code{prepend} @tab Prepend the new face to existing ones
+@item @tab @code{keep} @tab Fill-in regions without an existing face
+@end multitable
+
+Capture names in @var{query} should be face names like
+@code{font-lock-keyword-face}. The captured node will be fontified
+with that face. Capture names can also be function names, in which
+case the function is called with (@var{start} @var{end} @var{node}),
+where @var{start} and @var{end} are the start and end position of the
+node in buffer, and @var{node} is the node itself. If a capture name
+is both a face and a function, the face takes priority. If a capture
+name is not a face name nor a function name, it is ignored.
+@end defun
+
+@defvar treesit-font-lock-settings
+A list of @var{setting}s for tree-sitter font lock. The exact format
+of this variable is considered internal. One should always use
+@code{treesit-font-lock-rules} to set this variable.
+
+Each @var{setting} is of form
+
+@example
+(@var{language} @var{query})
+@end example
+
+Each @var{setting} controls one parser (often of different language).
+And @var{language} is the language symbol (@pxref{Language
+Definitions}); @var{query} is the query (@pxref{Pattern Matching}).
+@end defvar
+
+Multi-language major modes should provide range functions in
+@code{treesit-range-functions}, and Emacs will set the ranges
+accordingly before fontifing a region (@pxref{Multiple Languages}).
+
@node Auto-Indentation
@section Automatic Indentation of code
@@ -3929,10 +4020,12 @@ and a few other such modes) has been made more generic over the years,
so if your language seems somewhat similar to one of those languages,
you might try to use that engine. @c FIXME: documentation?
Another one is SMIE which takes an approach in the spirit
-of Lisp sexps and adapts it to non-Lisp languages.
+of Lisp sexps and adapts it to non-Lisp languages. Yet another one is
+to rely on a full-blown parser, for example, the tree-sitter library.
@menu
* SMIE:: A simple minded indentation engine.
+* Parser-based indentation:: Parser-based indentation engine.
@end menu
@node SMIE
@@ -4592,6 +4685,172 @@ to the file's local variables of the form:
@code{eval: (smie-config-local '(@var{rules}))}.
@end defun
+@node Parser-based Indentation
+@subsection Parser-based Indentation
+
+@c This node is written when the only parser Emacs has is tree-sitter,
+@c if in the future more parser are supported, feel free to reorganize
+@c and rewrite this node to describe multiple parsers in parallel.
+
+When built with the tree-sitter library (@pxref{Parsing Program
+Source}), Emacs could parse program source and produce a syntax tree.
+And this syntax tree can be used for indentation. For maximum
+flexibility, we could write a custom indent function that queries the
+syntax tree and indents accordingly for each language, but that would
+be a lot of work. It is more convenient to use the simple indentation
+engine described below: we only need to write some indentation rules
+and the engine takes care of the rest.
+
+To enable the indentation engine, set the value of
+@code{indent-line-function} to @code{treesit-indent}.
+
+@defvar treesit-indent-function
+This variable stores the actual function called by
+@code{treesit-indent}. By default, its value is
+@code{treesit-simple-indent}. In the future we might add other
+more complex indentation engines.
+@end defvar
+
+@heading Writing indentation rules
+
+@defvar treesit-simple-indent-rules
+This local variable stores indentation rules for every language. It is
+a list of
+
+@example
+(@var{language} . @var{rules})
+@end example
+
+where @var{language} is a language symbol, and @var{rules} is a list
+of
+
+@example
+(@var{matcher} @var{anchor} @var{offset})
+@end example
+
+First Emacs passes the node at point to @var{matcher}, if it return
+non-nil, this rule applies. Then Emacs passes the node to
+@var{anchor}, it returns a point. Emacs takes the column number of
+that point, add @var{offset} to it, and the result is the indent for
+the current line.
+
+The @var{matcher} and @var{anchor} are functions, and Emacs provides
+convenient presets for them. You can skip over to
+@code{treesit-simple-indent-presets} below, those presets should be
+more than enough.
+
+A @var{matcher} or an @var{anchor} is a function that takes three
+arguments (@var{node} @var{parent} @var{bol}). Argument @var{bol} is
+the point at where we are indenting: the position of the first
+non-whitespace character from the beginning of line; @var{node} is the
+largest (highest-in-tree) node that starts at that point; @var{parent}
+is the parent of @var{node}. A @var{matcher} returns nil/non-nil, and
+@var{anchor} returns a point.
+@end defvar
+
+@defvar treesit-simple-indent-presets
+This is a list of presets for @var{matcher}s and @var{anchor}s in
+@code{treesit-simple-indent-rules}. Each of them represent a function
+that takes @var{node}, @var{parent} and @var{bol} as arguments.
+
+@example
+no-node
+@end example
+
+This matcher matches the case where @var{node} is nil, i.e., there is
+no node that starts at @var{bol}. This is the case when @var{bol} is
+at an empty line or inside a multi-line string, etc.
+
+@example
+(parent-is @var{type})
+@end example
+
+This matcher matches if @var{parent}'s type is @var{type}.
+
+@example
+(node-is @var{type})
+@end example
+
+This matcher matches if @var{node}'s type is @var{type}.
+
+@example
+(query @var{query})
+@end example
+
+This matcher matches if querying @var{parent} with @var{query}
+captures @var{node}. The capture name does not matter.
+
+@example
+(match @var{node-type} @var{parent-type}
+ @var{node-field} @var{node-index-min} @var{node-index-max})
+@end example
+
+This matcher checks if @var{node}'s type is @var{node-type},
+@var{parent}'s type is @var{parent-type}, @var{node}'s field name in
+@var{parent} is @var{node-field}, and @var{node}'s index among its
+siblings is between @var{node-index-min} and @var{node-index-max}. If
+the value of a constraint is nil, this matcher doesn't check for that
+constraint. For example, to match the first child where parent is
+@code{argument_list}, use
+
+@example
+(match nil "argument_list" nil nil 0 0)
+@end example
+
+@example
+first-sibling
+@end example
+
+This anchor returns the start of the first child of @var{parent}.
+
+@example
+parent
+@end example
+
+This anchor returns the start of @var{parent}.
+
+@example
+parent-bol
+@end example
+
+This anchor returns the beginning of non-space characters on the line
+where @var{parent} is on.
+
+@example
+prev-sibling
+@end example
+
+This anchor returns the start of the previous sibling of @var{node}.
+
+@example
+no-indent
+@end example
+
+This anchor returns the start of @var{node}, i.e., no indent.
+
+@example
+prev-line
+@end example
+
+This anchor returns the first non-whitespace charater on the previous
+line.
+@end defvar
+
+@heading Indentation utilities
+
+Here are some utility functions that can help writing indentation
+rules.
+
+@defun treesit-check-indent mode
+This function checks current buffer's indentation against major mode
+@var{mode}. It indents the current buffer in @var{mode} and compares
+the indentation with the current indentation. Then it pops up a diff
+buffer showing the difference. Correct indentation (target) is in
+green, current indentation is in red.
+@end defun
+
+It is also helpful to use @code{treesit-inspect-mode} when writing
+indentation rules.
@node Desktop Save Mode
@section Desktop Save Mode
diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi
new file mode 100644
index 00000000000..3784531fe59
--- /dev/null
+++ b/doc/lispref/parsing.texi
@@ -0,0 +1,1515 @@
+@c -*- mode: texinfo; coding: utf-8 -*-
+@c This is part of the GNU Emacs Lisp Reference Manual.
+@c Copyright (C) 2021 Free Software Foundation, Inc.
+@c See the file elisp.texi for copying conditions.
+@node Parsing Program Source
+@chapter Parsing Program Source
+
+Emacs provides various ways to parse program source text and produce a
+@dfn{syntax tree}. In a syntax tree, text is no longer a
+one-dimensional stream but a structured tree of nodes, where each node
+representing a piece of text. Thus a syntax tree can enable
+interesting features like precise fontification, indentation,
+navigation, structured editing, etc.
+
+Emacs has a simple facility for parsing balanced expressions
+(@pxref{Parsing Expressions}). There is also SMIE library for generic
+navigation and indentation (@pxref{SMIE}).
+
+Emacs also provides integration with tree-sitter library
+(@uref{https://tree-sitter.github.io/tree-sitter}) if compiled with
+it. The tree-sitter library implements an incremental parser and has
+support from a wide range of programming languages.
+
+@defun treesit-available-p
+This function returns non-nil if tree-sitter features are available
+for this Emacs instance.
+@end defun
+
+For tree-sitter integration with existing Emacs features,
+@pxref{Parser-based Font Lock}, @ref{Parser-based Indentation}, and
+@ref{List Motion}.
+
+To access the syntax tree of the text in a buffer, we need to first
+load a language definition and create a parser with it. Next, we can
+query the parser for specific nodes in the syntax tree. Then, we can
+access various information about the node, and we can pattern-match a
+node with a powerful syntax. Finally, we explain how to work with
+source files that mixes multiple languages. The following sections
+explain how to do each of the tasks in detail.
+
+@menu
+* Language Definitions:: Loading tree-sitter language definitions.
+* Using Parser:: Introduction to parsers.
+* Retrieving Node:: Retrieving node from syntax tree.
+* Accessing Node:: Accessing node information.
+* Pattern Matching:: Pattern matching with query patterns.
+* Multiple Languages:: Parse text written in multiple languages.
+* Tree-sitter C API:: Compare the C API and the ELisp API.
+@end menu
+
+@node Language Definitions
+@section Tree-sitter Language Definitions
+
+@heading Loading a language definition
+
+Tree-sitter relies on language definitions to parse text in that
+language. In Emacs, A language definition is represented by a symbol.
+For example, C language definition is represented as @code{c}, and
+@code{c} can be passed to tree-sitter functions as the @var{language}
+argument.
+
+@vindex treesit-extra-load-path
+@vindex treesit-load-language-error
+@vindex treesit-load-suffixes
+Tree-sitter language definitions are distributed as dynamic libraries.
+In order to use a language definition in Emacs, you need to make sure
+that the dynamic library is installed on the system. Emacs looks for
+language definitions under load paths in
+@code{treesit-extra-load-path}, @code{user-emacs-directory}/tree-sitter,
+and system default locations for dynamic libraries, in that order.
+Emacs tries each extensions in @code{treesit-load-suffixes}. If Emacs
+cannot find the library or has problem loading it, Emacs signals
+@code{treesit-load-language-error}. The signal data is a list of
+specific error messages.
+
+@defun treesit-language-available-p language
+This function checks whether the dynamic library for @var{language} is
+present on the system, and return non-nil if it is.
+@end defun
+
+@vindex treesit-load-name-override-list
+By convention, the dynamic library for @var{language} is
+@code{libtree-sitter-@var{language}.@var{ext}}, where @var{ext} is the
+system-specific extension for dynamic libraries. Also by convention,
+the function provided by that library is named
+@code{tree_sitter_@var{language}}. If a language definition doesn't
+follow this convention, you should add an entry
+
+@example
+(@var{language} @var{library-base-name} @var{function-name})
+@end example
+
+to @code{treesit-load-name-override-list}, where
+@var{library-base-name} is the base filename for the dynamic library
+(conventionally @code{libtree-sitter-@var{language}}), and
+@var{function-name} is the function provided by the library
+(conventionally @code{tree_sitter_@var{language}}). For example,
+
+@example
+(cool-lang "libtree-sitter-coool" "tree_sitter_cooool")
+@end example
+
+for a language too cool to abide by conventions.
+
+@defun treesit-language-version &optional min-compatible
+Tree-sitter library has a @dfn{language version}, a language
+definition's version needs to match this version to be compatible.
+
+This function returns tree-sitter library’s language version. If
+@var{min-compatible} is non-nil, it returns the minimal compatible
+version.
+@end defun
+
+@heading Concrete syntax tree
+
+A syntax tree is what a parser generates. In a syntax tree, each node
+represents a piece of text, and is connected to each other by a
+parent-child relationship. For example, if the source text is
+
+@example
+1 + 2
+@end example
+
+@noindent
+its syntax tree could be
+
+@example
+@group
+ +--------------+
+ | root "1 + 2" |
+ +--------------+
+ |
+ +--------------------------------+
+ | expression "1 + 2" |
+ +--------------------------------+
+ | | |
++------------+ +--------------+ +------------+
+| number "1" | | operator "+" | | number "2" |
++------------+ +--------------+ +------------+
+@end group
+@end example
+
+We can also represent it in s-expression:
+
+@example
+(root (expression (number) (operator) (number)))
+@end example
+
+@subheading Node types
+
+@cindex tree-sitter node type
+@anchor{tree-sitter node type}
+@cindex tree-sitter named node
+@anchor{tree-sitter named node}
+@cindex tree-sitter anonymous node
+Names like @code{root}, @code{expression}, @code{number},
+@code{operator} are nodes' @dfn{type}. However, not all nodes in a
+syntax tree have a type. Nodes that don't are @dfn{anonymous nodes},
+and nodes with a type are @dfn{named nodes}. Anonymous nodes are
+tokens with fixed spellings, including punctuation characters like
+bracket @samp{]}, and keywords like @code{return}.
+
+@subheading Field names
+
+@cindex tree-sitter node field name
+@anchor{tree-sitter node field name} To make the syntax tree easier to
+analyze, many language definitions assign @dfn{field names} to child
+nodes. For example, a @code{function_definition} node could have a
+@code{declarator} and a @code{body}:
+
+@example
+@group
+(function_definition
+ declarator: (declaration)
+ body: (compound_statement))
+@end group
+@end example
+
+@deffn Command treesit-inspect-mode
+This minor mode displays the node that @emph{starts} at point in
+mode-line. The mode-line will display
+
+@example
+@var{parent} @var{field-name}: (@var{child} (@var{grand-child} (...)))
+@end example
+
+@var{child}, @var{grand-child}, and @var{grand-grand-child}, etc, are
+nodes that have their beginning at point. And @var{parent} is the
+parent of @var{child}.
+
+If there is no node that starts at point, i.e., point is in the middle
+of a node, then the mode-line only displays the smallest node that
+spans point, and its immediate parent.
+
+This minor mode doesn't create parsers on its own. It simply uses the
+first parser in @code{(treesit-parser-list)} (@pxref{Using Parser}).
+@end deffn
+
+@heading Reading the grammar definition
+
+Authors of language definitions define the @dfn{grammar} of a
+language, and this grammar determines how does a parser construct a
+concrete syntax tree out of the text. In order to use the syntax
+tree effectively, we need to read the @dfn{grammar file}.
+
+The grammar file is usually @code{grammar.js} in a language
+definition’s project repository. The link to a language definition’s
+home page can be found in tree-sitter’s homepage
+(@uref{https://tree-sitter.github.io/tree-sitter}).
+
+The grammar is written in JavaScript syntax. For example, the rule
+matching a @code{function_definition} node looks like
+
+@example
+@group
+function_definition: $ => seq(
+ $.declaration_specifiers,
+ field('declarator', $.declaration),
+ field('body', $.compound_statement)
+)
+@end group
+@end example
+
+The rule is represented by a function that takes a single argument
+@var{$}, representing the whole grammar. The function itself is
+constructed by other functions: the @code{seq} function puts together a
+sequence of children; the @code{field} function annotates a child with
+a field name. If we write the above definition in BNF syntax, it
+would look like
+
+@example
+@group
+function_definition :=
+ <declaration_specifiers> <declaration> <compound_statement>
+@end group
+@end example
+
+@noindent
+and the node returned by the parser would look like
+
+@example
+@group
+(function_definition
+ (declaration_specifier)
+ declarator: (declaration)
+ body: (compound_statement))
+@end group
+@end example
+
+Below is a list of functions that one will see in a grammar
+definition. Each function takes other rules as arguments and returns
+a new rule.
+
+@itemize @bullet
+@item
+@code{seq(rule1, rule2, ...)} matches each rule one after another.
+
+@item
+@code{choice(rule1, rule2, ...)} matches one of the rules in its
+arguments.
+
+@item
+@code{repeat(rule)} matches @var{rule} for @emph{zero or more} times.
+This is like the @samp{*} operator in regular expressions.
+
+@item
+@code{repeat1(rule)} matches @var{rule} for @emph{one or more} times.
+This is like the @samp{+} operator in regular expressions.
+
+@item
+@code{optional(rule)} matches @var{rule} for @emph{zero or one} time.
+This is like the @samp{?} operator in regular expressions.
+
+@item
+@code{field(name, rule)} assigns field name @var{name} to the child
+node matched by @var{rule}.
+
+@item
+@code{alias(rule, alias)} makes nodes matched by @var{rule} appear as
+@var{alias} in the syntax tree generated by the parser. For example,
+
+@example
+alias(preprocessor_call_exp, call_expression)
+@end example
+
+makes any node matched by @code{preprocessor_call_exp} to appear as
+@code{call_expression}.
+@end itemize
+
+Below are grammar functions less interesting for a reader of a
+language definition.
+
+@itemize
+@item
+@code{token(rule)} marks @var{rule} to produce a single leaf node.
+That is, instead of generating a parent node with individual child
+nodes under it, everything is combined into a single leaf node.
+
+@item
+Normally, grammar rules ignore preceding whitespaces,
+@code{token.immediate(rule)} changes @var{rule} to match only when
+there is no preceding whitespaces.
+
+@item
+@code{prec(n, rule)} gives @var{rule} a level @var{n} precedence.
+
+@item
+@code{prec.left([n,] rule)} marks @var{rule} as left-associative,
+optionally with level @var{n}.
+
+@item
+@code{prec.right([n,] rule)} marks @var{rule} as right-associative,
+optionally with level @var{n}.
+
+@item
+@code{prec.dynamic(n, rule)} is like @code{prec}, but the precedence
+is applied at runtime instead.
+@end itemize
+
+The tree-sitter project talks about writing a grammar in more detail:
+@uref{https://tree-sitter.github.io/tree-sitter/creating-parsers}.
+Read especially ``The Grammar DSL'' section.
+
+@node Using Parser
+@section Using Tree-sitter Parser
+@cindex Tree-sitter parser
+
+This section described how to create and configure a tree-sitter
+parser. In Emacs, each tree-sitter parser is associated with a
+buffer. As we edit the buffer, the associated parser and the syntax
+tree is automatically kept up-to-date.
+
+@defvar treesit-max-buffer-size
+This variable contains the maximum size of buffers in which
+tree-sitter can be activated. Major modes should check this value
+when deciding whether to enable tree-sitter features.
+@end defvar
+
+@defun treesit-can-enable-p
+This function checks whether the current buffer is suitable for
+activating tree-sitter features. It basically checks
+@code{treesit-available-p} and @code{treesit-max-buffer-size}.
+@end defun
+
+@cindex Creating tree-sitter parsers
+@defun treesit-parser-create language &optional buffer no-reuse
+To create a parser, we provide a @var{buffer} and the @var{language}
+to use (@pxref{Language Definitions}). If @var{buffer} is nil, the
+current buffer is used.
+
+By default, this function reuses a parser if one already exists for
+@var{language} in @var{buffer}, if @var{no-reuse} is non-nil, this
+function always creates a new parser.
+@end defun
+
+Given a parser, we can query information about it:
+
+@defun treesit-parser-buffer parser
+Returns the buffer associated with @var{parser}.
+@end defun
+
+@defun treesit-parser-language parser
+Returns the language that @var{parser} uses.
+@end defun
+
+@defun treesit-parser-p object
+Checks if @var{object} is a tree-sitter parser. Return non-nil if it
+is, return nil otherwise.
+@end defun
+
+There is no need to explicitly parse a buffer, because parsing is done
+automatically and lazily. A parser only parses when we query for a
+node in its syntax tree. Therefore, when a parser is first created,
+it doesn't parse the buffer; it waits until we query for a node for
+the first time. Similarly, when some change is made in the buffer, a
+parser doesn't re-parse immediately.
+
+@vindex treesit-buffer-too-large
+When a parser do parse, it checks for the size of the buffer.
+Tree-sitter can only handle buffer no larger than about 4GB. If the
+size exceeds that, Emacs signals @code{treesit-buffer-too-large}
+with signal data being the buffer size.
+
+Once a parser is created, Emacs automatically adds it to the
+internal parser list. Every time a change is made to the buffer,
+Emacs updates parsers in this list so they can update their syntax
+tree incrementally.
+
+@defun treesit-parser-list &optional buffer
+This function returns the parser list of @var{buffer}. And
+@var{buffer} defaults to the current buffer.
+@end defun
+
+@defun treesit-parser-delete parser
+This function deletes @var{parser}.
+@end defun
+
+@cindex tree-sitter narrowing
+@anchor{tree-sitter narrowing} Normally, a parser ``sees'' the whole
+buffer, but when the buffer is narrowed (@pxref{Narrowing}), the
+parser will only see the visible region. As far as the parser can
+tell, the hidden region is deleted. And when the buffer is later
+widened, the parser thinks text is inserted in the beginning and in
+the end. Although parsers respect narrowing, narrowing shouldn't be
+the mean to handle a multi-language buffer; instead, set the ranges in
+which a parser should operate in. @xref{Multiple Languages}.
+
+Because a parser parses lazily, when we narrow the buffer, the parser
+is not affected immediately; as long as we don't query for a node
+while the buffer is narrowed, the parser is oblivious of the
+narrowing.
+
+@cindex tree-sitter parse string
+@defun treesit-parse-string string language
+Besides creating a parser for a buffer, we can also just parse a
+string. Unlike a buffer, parsing a string is a one-time deal, and
+there is no way to update the result.
+
+This function parses @var{string} with @var{language}, and returns the
+root node of the generated syntax tree.
+@end defun
+
+@node Retrieving Node
+@section Retrieving Node
+
+@cindex tree-sitter find node
+@cindex tree-sitter get node
+Before we continue, lets go over some conventions of tree-sitter
+functions.
+
+We talk about a node being ``smaller'' or ``larger'', and ``lower'' or
+``higher''. A smaller and lower node is lower in the syntax tree and
+therefore spans a smaller piece of text; a larger and higher node is
+higher up in the syntax tree, containing many smaller nodes as its
+children, and therefore spans a larger piece of text.
+
+When a function cannot find a node, it returns nil. And for the
+convenience for function chaining, all the functions that take a node
+as argument and returns a node accept the node to be nil; in that
+case, the function just returns nil.
+
+@vindex treesit-node-outdated
+Nodes are not automatically updated when the associated buffer is
+modified. And there is no way to update a node once it is retrieved.
+Using an outdated node throws @code{treesit-node-outdated} error.
+
+@heading Retrieving node from syntax tree
+
+@defun treesit-node-at beg end &optional parser-or-lang named
+This function returns the @emph{smallest} node that starts at or after
+the @var{point}. In other words, the start of the node is equal or
+greater than @var{point}.
+
+When @var{parser-or-lang} is nil, this function uses the first parser
+in @code{(treesit-parser-list)} in the current buffer. If
+@var{parser-or-lang} is a parser object, it use that parser; if
+@var{parser-or-lang} is a language, it finds the first parser using
+that language in @code{(treesit-parser-list)} and use that.
+
+If @var{named} is non-nil, this function looks for a named node
+only (@pxref{tree-sitter named node, named node}).
+
+Example:
+@example
+@group
+;; Find the node at point in a C parser's syntax tree.
+(treesit-node-at (point) 'c)
+ @c @result{} #<treesit-node from 1 to 4 in *scratch*>
+@end group
+@end example
+@end defun
+
+@defun treesit-node-on beg end &optional parser-or-lang named
+This function returns the @emph{smallest} node that covers the span
+from @var{beg} to @var{end}. In other words, the start of the node is
+less or equal to @var{beg}, and the end of the node is greater or
+equal to @var{end}.
+
+@emph{Beware} that calling this function on an empty line that is not
+inside any top-level construct (function definition, etc) most
+probably will give you the root node, because the root node is the
+smallest node that covers that empty line. Most of the time, you want
+to use @code{treesit-node-at}.
+
+When @var{parser-or-lang} is nil, this function uses the first parser
+in @code{(treesit-parser-list)} in the current buffer. If
+@var{parser-or-lang} is a parser object, it use that parser; if
+@var{parser-or-lang} is a language, it finds the first parser using
+that language in @code{(treesit-parser-list)} and use that.
+
+If @var{named} is non-nil, this function looks for a named node only
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@defun treesit-parser-root-node parser
+This function returns the root node of the syntax tree generated by
+@var{parser}.
+@end defun
+
+@defun treesit-buffer-root-node &optional language
+This function finds the first parser that uses @var{language} in
+@code{(treesit-parser-list)} in the current buffer, and returns the
+root node of that buffer. If it cannot find an appropriate parser,
+nil is returned.
+@end defun
+
+Once we have a node, we can retrieve other nodes from it, or query for
+information about this node.
+
+@heading Retrieving node from other nodes
+
+@subheading By kinship
+
+@defun treesit-node-parent node
+This function returns the immediate parent of @var{node}.
+@end defun
+
+@defun treesit-node-child node n &optional named
+This function returns the @var{n}'th child of @var{node}. If
+@var{named} is non-nil, then it only counts named nodes
+(@pxref{tree-sitter named node, named node}). For example, in a node
+that represents a string: @code{"text"}, there are three children
+nodes: the opening quote @code{"}, the string content @code{text}, and
+the enclosing quote @code{"}. Among these nodes, the first child is
+the opening quote @code{"}, the first named child is the string
+content @code{text}.
+@end defun
+
+@defun treesit-node-children node &optional named
+This function returns all of @var{node}'s children in a list. If
+@var{named} is non-nil, then it only retrieves named nodes.
+@end defun
+
+@defun treesit-next-sibling node &optional named
+This function finds the next sibling of @var{node}. If @var{named} is
+non-nil, it finds the next named sibling.
+@end defun
+
+@defun treesit-prev-sibling node &optional named
+This function finds the previous sibling of @var{node}. If
+@var{named} is non-nil, it finds the previous named sibling.
+@end defun
+
+@subheading By field name
+
+To make the syntax tree easier to analyze, many language definitions
+assign @dfn{field names} to child nodes (@pxref{tree-sitter node field
+name, field name}). For example, a @code{function_definition} node
+could have a @code{declarator} and a @code{body}.
+
+@defun treesit-child-by-field-name node field-name
+This function finds the child of @var{node} that has @var{field-name}
+as its field name.
+
+@example
+@group
+;; Get the child that has "body" as its field name.
+(treesit-child-by-field-name node "body")
+ @c @result{} #<treesit-node from 3 to 11 in *scratch*>
+@end group
+@end example
+@end defun
+
+@subheading By position
+
+@defun treesit-first-child-for-pos node pos &optional named
+This function finds the first child of @var{node} that extends beyond
+@var{pos}. ``Extend beyond'' means the end of the child node >=
+@var{pos}. This function only looks for immediate children of
+@var{node}, and doesn't look in its grand children. If @var{named} is
+non-nil, it only looks for named child (@pxref{tree-sitter named node,
+named node}).
+@end defun
+
+@defun treesit-node-descendant-for-range node beg end &optional named
+This function finds the @emph{smallest} child/grandchild... of
+@var{node} that spans the range from @var{beg} to @var{end}. It is
+similar to @code{treesit-node-at}. If @var{named} is non-nil, it only
+looks for named child.
+@end defun
+
+@heading Searching for node
+
+@defun treesit-search-subtree node predicate &optional all backward limit
+This function traverses the subtree of @var{node} (including
+@var{node}), and match @var{predicate} with each node along the way.
+And @var{predicate} is a regexp that matches (case-insensitively)
+against each node's type, or a function that takes a node and returns
+nil/non-nil. If a node matches, that node is returned, if no node
+ever matches, nil is returned.
+
+By default, this function only traverses named nodes, if @var{all} is
+non-nil, it traverses all nodes. If @var{backward} is non-nil, it
+traverses backwards. If @var{limit} is non-nil, it only traverses
+that number of levels down in the tree.
+@end defun
+
+@defun treesit-search-forward start predicate &optional all backward up
+This function is somewhat similar to @code{treesit-search-subtree}.
+It also traverse the parse tree and match each node with
+@var{predicate} (except for @var{start}), where @var{predicate} can be
+a (case-insensitive) regexp or a function. For a tree like the below
+where @var{start} is marked 1, this function traverses as numbered:
+
+@example
+@group
+ o
+ |
+ 3--------4-----------8
+ | | |
+o--o-+--1 5--+--6 9---+-----12
+| | | | | |
+o o 2 7 +-+-+ +--+--+
+ | | | | |
+ 10 11 13 14 15
+@end group
+@end example
+
+Same as in @code{treesit-search-subtree}, this function only searches
+for named nodes by default. But if @var{all} is non-nil, it searches
+for all nodes. If @var{backward} is non-nil, it searches backwards.
+
+If @var{up} is non-nil, this function will only traverse to siblings
+and parents. In that case, only 1 3 4 8 would be traversed.
+@end defun
+
+@defun treesit-search-forward-goto predicate side &optional all backward up
+This function jumps to the start or end of the next node in buffer
+that matches @var{predicate}. Parameters @var{predicate}, @var{all},
+@var{backward}, and @var{up} are the same as in
+@code{treesit-search-forward}. And @var{side} controls which side of
+the matched no do we stop at, it can be @code{start} or @code{end}.
+@end defun
+
+@defun treesit-induce-sparse-tree root predicate &optional process-fn limit
+This function creates a sparse tree from @var{root}'s subtree.
+
+Basically, it takes the subtree under @var{root}, and combs it so only
+the nodes that match @var{predicate} are left, like picking out grapes
+on the vine. Like previous functions, @var{predicate} can be a regexp
+string that matches against each node's type case-insensitively, or a
+function that takes a node and return nil/non-nil.
+
+For example, for a subtree on the left that consist of both numbers
+and letters, if @var{predicate} is ``letter only'', the returned tree
+is the one on the right.
+
+@example
+@group
+ a a a
+ | | |
++---+---+ +---+---+ +---+---+
+| | | | | | | | |
+b 1 2 b | | b c d
+ | | => | | => |
+ c +--+ c + e
+ | | | | |
+ +--+ d 4 +--+ d
+ | | |
+ e 5 e
+@end group
+@end example
+
+If @var{process-fn} is non-nil, instead of returning the matched
+nodes, this function passes each node to @var{process-fn} and uses the
+returned value instead. If non-nil, @var{limit} is the number of
+levels to go down from @var{root}.
+
+Each node in the returned tree looks like @code{(@var{tree-sitter
+node} . (@var{child} ...))}. The @var{tree-sitter node} of the root
+of this tree will be nil if @var{ROOT} doesn't match @var{pred}. If
+no node matches @var{predicate}, return nil.
+@end defun
+
+@heading More convenient functions
+
+@defun treesit-filter-child node pred &optional named
+This function finds immediate children of @var{node} that satisfies
+@var{pred}.
+
+Function @var{pred} takes the child node as the argument and should
+return non-nil to indicated keeping the child. If @var{named}
+non-nil, this function only searches for named nodes.
+@end defun
+
+@defun treesit-parent-until node pred
+This function repeatedly finds the parent of @var{node}, and returns
+the parent if it satisfies @var{pred} (which takes the parent as the
+argument). If no parent satisfies @var{pred}, this function returns
+nil.
+@end defun
+
+@defun treesit-parent-while
+This function repeatedly finds the parent of @var{node}, and keeps
+doing so as long as the parent satisfies @var{pred} (which takes the
+parent as the single argument). I.e., this function returns the
+farthest parent that still satisfies @var{pred}.
+@end defun
+
+@node Accessing Node
+@section Accessing Node Information
+
+Before going further, make sure you have read the basic conventions
+about tree-sitter nodes in the previous node.
+
+@heading Basic information
+
+Every node is associated with a parser, and that parser is associated
+with a buffer. The following functions let you retrieve them.
+
+@defun treesit-node-parser node
+This function returns @var{node}'s associated parser.
+@end defun
+
+@defun treesit-node-buffer node
+This function returns @var{node}'s parser's associated buffer.
+@end defun
+
+@defun treesit-node-language node
+This function returns @var{node}'s parser's associated language.
+@end defun
+
+Each node represents a piece of text in the buffer. Functions below
+finds relevant information about that text.
+
+@defun treesit-node-start node
+Return the start position of @var{node}.
+@end defun
+
+@defun treesit-node-end node
+Return the end position of @var{node}.
+@end defun
+
+@defun treesit-node-text node &optional object
+Returns the buffer text that @var{node} represents. (If @var{node} is
+retrieved from parsing a string, it will be text from that string.)
+@end defun
+
+Here are some basic checks on tree-sitter nodes.
+
+@defun treesit-node-p object
+Checks if @var{object} is a tree-sitter syntax node.
+@end defun
+
+@defun treesit-node-eq node1 node2
+Checks if @var{node1} and @var{node2} are the same node in a syntax
+tree.
+@end defun
+
+@heading Property information
+
+In general, nodes in a concrete syntax tree fall into two categories:
+@dfn{named nodes} and @dfn{anonymous nodes}. Whether a node is named
+or anonymous is determined by the language definition
+(@pxref{tree-sitter named node, named node}).
+
+@cindex tree-sitter missing node
+Apart from being named/anonymous, a node can have other properties. A
+node can be ``missing'': missing nodes are inserted by the parser in
+order to recover from certain kinds of syntax errors, i.e., something
+should probably be there according to the grammar, but not there.
+
+@cindex tree-sitter extra node
+A node can be ``extra'': extra nodes represent things like comments,
+which can appear anywhere in the text.
+
+@cindex tree-sitter node that has changes
+A node ``has changes'' if the buffer changed since when the node is
+retrieved, i.e., outdated.
+
+@cindex tree-sitter node that has error
+A node ``has error'' if the text it spans contains a syntax error. It
+can be the node itself has an error, or one of its
+children/grandchildren... has an error.
+
+@defun treesit-node-check node property
+This function checks if @var{node} has @var{property}. @var{property}
+can be @code{'named}, @code{'missing}, @code{'extra},
+@code{'has-changes}, or @code{'has-error}.
+@end defun
+
+
+@defun treesit-node-type node
+Named nodes have ``types'' (@pxref{tree-sitter node type, node type}).
+For example, a named node can be a @code{string_literal} node, where
+@code{string_literal} is its type.
+
+This function returns @var{node}'s type as a string.
+@end defun
+
+@heading Information as a child or parent
+
+@defun treesit-node-index node &optional named
+This function returns the index of @var{node} as a child node of its
+parent. If @var{named} is non-nil, it only count named nodes
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@defun treesit-node-field-name node
+A child of a parent node could have a field name (@pxref{tree-sitter
+node field name, field name}). This function returns the field name
+of @var{node} as a child of its parent.
+@end defun
+
+@defun treesit-node-field-name-for-child node n
+This function returns the field name of the @var{n}'th child of
+@var{node}.
+@end defun
+
+@defun treesit-child-count node &optional named
+This function finds the number of children of @var{node}. If
+@var{named} is non-nil, it only counts named child (@pxref{tree-sitter
+named node, named node}).
+@end defun
+
+@node Pattern Matching
+@section Pattern Matching Tree-sitter Nodes
+
+Tree-sitter let us pattern match with a small declarative language.
+Pattern matching consists of two steps: first tree-sitter matches a
+@dfn{pattern} against nodes in the syntax tree, then it @dfn{captures}
+specific nodes in that pattern and returns the captured nodes.
+
+We describe first how to write the most basic query pattern and how to
+capture nodes in a pattern, then the pattern-match function, finally
+more advanced pattern syntax.
+
+@heading Basic query syntax
+
+@cindex Tree-sitter query syntax
+@cindex Tree-sitter query pattern
+A @dfn{query} consists of multiple @dfn{patterns}. Each pattern is an
+s-expression that matches a certain node in the syntax node. A
+pattern has the following shape:
+
+@example
+(@var{type} @var{child}...)
+@end example
+
+@noindent
+For example, a pattern that matches a @code{binary_expression} node that
+contains @code{number_literal} child nodes would look like
+
+@example
+(binary_expression (number_literal))
+@end example
+
+To @dfn{capture} a node in the query pattern above, append
+@code{@@capture-name} after the node pattern you want to capture. For
+example,
+
+@example
+(binary_expression (number_literal) @@number-in-exp)
+@end example
+
+@noindent
+captures @code{number_literal} nodes that are inside a
+@code{binary_expression} node with capture name @code{number-in-exp}.
+
+We can capture the @code{binary_expression} node too, with capture
+name @code{biexp}:
+
+@example
+(binary_expression
+ (number_literal) @@number-in-exp) @@biexp
+@end example
+
+@heading Query function
+
+Now we can introduce the query functions.
+
+@defun treesit-query-capture node query &optional beg end node-only
+This function matches patterns in @var{query} in @var{node}.
+Parameter @var{query} can be either a string, a s-expression, or a
+compiled query object. For now, we focus on the string syntax;
+s-expression syntax and compiled query are described at the end of the
+section.
+
+Parameter @var{node} can also be a parser or a language symbol. A
+parser means using its root node, a language symbol means find or
+create a parser for that language in the current buffer, and use the
+root node.
+
+The function returns all captured nodes in a list of
+@code{(@var{capture_name} . @var{node})}. If @var{node-only} is
+non-nil, a list of node is returned instead. If @var{beg} and
+@var{end} are both non-nil, this function only pattern matches nodes
+in that range.
+
+@vindex treesit-query-error
+This function raise a @var{treesit-query-error} if @var{query} is
+malformed. The signal data contains a description of the specific
+error. You can use @code{treesit-query-validate} to debug the query.
+@end defun
+
+For example, suppose @var{node}'s content is @code{1 + 2}, and
+@var{query} is
+
+@example
+@group
+(setq query
+ "(binary_expression
+ (number_literal) @@number-in-exp) @@biexp")
+@end group
+@end example
+
+Querying that query would return
+
+@example
+@group
+(treesit-query-capture node query)
+ @result{} ((biexp . @var{<node for "1 + 2">})
+ (number-in-exp . @var{<node for "1">})
+ (number-in-exp . @var{<node for "2">}))
+@end group
+@end example
+
+As we mentioned earlier, a @var{query} could contain multiple
+patterns. For example, it could have two top-level patterns:
+
+@example
+@group
+(setq query
+ "(binary_expression) @@biexp
+ (number_literal) @@number @@biexp")
+@end group
+@end example
+
+@defun treesit-query-string string query language
+This function parses @var{string} with @var{language}, pattern matches
+its root node with @var{query}, and returns the result.
+@end defun
+
+@heading More query syntax
+
+Besides node type and capture, tree-sitter's query syntax can express
+anonymous node, field name, wildcard, quantification, grouping,
+alternation, anchor, and predicate.
+
+@subheading Anonymous node
+
+An anonymous node is written verbatim, surrounded by quotes. A
+pattern matching (and capturing) keyword @code{return} would be
+
+@example
+"return" @@keyword
+@end example
+
+@subheading Wild card
+
+In a query pattern, @samp{(_)} matches any named node, and @samp{_}
+matches any named and anonymous node. For example, to capture any
+named child of a @code{binary_expression} node, the pattern would be
+
+@example
+(binary_expression (_) @@in_biexp)
+@end example
+
+@subheading Field name
+
+We can capture child nodes that has specific field names:
+
+@example
+@group
+(function_definition
+ declarator: (_) @@func-declarator
+ body: (_) @@func-body)
+@end group
+@end example
+
+We can also capture a node that doesn't have certain field, say, a
+@code{function_definition} without a @code{body} field.
+
+@example
+(function_definition !body) @@func-no-body
+@end example
+
+@subheading Quantify node
+
+Tree-sitter recognizes quantification operators @samp{*}, @samp{+} and
+@samp{?}. Their meanings are the same as in regular expressions:
+@samp{*} matches the preceding pattern zero or more times, @samp{+}
+matches one or more times, and @samp{?} matches zero or one time.
+
+For example, this pattern matches @code{type_declaration} nodes
+that has @emph{zero or more} @code{long} keyword.
+
+@example
+(type_declaration "long"*) @@long-type
+@end example
+
+And this pattern matches a type declaration that has zero or one
+@code{long} keyword:
+
+@example
+(type_declaration "long"?) @@long-type
+@end example
+
+@subheading Grouping
+
+Similar to groups in regular expression, we can bundle patterns into a
+group and apply quantification operators to it. For example, to
+express a comma separated list of identifiers, one could write
+
+@example
+(identifier) ("," (identifier))*
+@end example
+
+@subheading Alternation
+
+Again, similar to regular expressions, we can express ``match anyone
+from this group of patterns'' in the query pattern. The syntax is a
+list of patterns enclosed in square brackets. For example, to capture
+some keywords in C, the query pattern would be
+
+@example
+@group
+[
+ "return"
+ "break"
+ "if"
+ "else"
+] @@keyword
+@end group
+@end example
+
+@subheading Anchor
+
+The anchor operator @samp{.} can be used to enforce juxtaposition,
+i.e., to enforce two things to be directly next to each other. The
+two ``things'' can be two nodes, or a child and the end of its parent.
+For example, to capture the first child, the last child, or two
+adjacent children:
+
+@example
+@group
+;; Anchor the child with the end of its parent.
+(compound_expression (_) @@last-child .)
+
+;; Anchor the child with the beginning of its parent.
+(compound_expression . (_) @@first-child)
+
+;; Anchor two adjacent children.
+(compound_expression
+ (_) @@prev-child
+ .
+ (_) @@next-child)
+@end group
+@end example
+
+Note that the enforcement of juxtaposition ignores any anonymous
+nodes.
+
+@subheading Predicate
+
+We can add predicate constraints to a pattern. For example, if we use
+the following query pattern
+
+@example
+@group
+(
+ (array . (_) @@first (_) @@last .)
+ (#equal @@first @@last)
+)
+@end group
+@end example
+
+Then tree-sitter only matches arrays where the first element equals to
+the last element. To attach a predicate to a pattern, we need to
+group then together. A predicate always starts with a @samp{#}.
+Currently there are two predicates, @code{#equal} and @code{#match}.
+
+@deffn Predicate equal arg1 arg2
+Matches if @var{arg1} equals to @var{arg2}. Arguments can be either a
+string or a capture name. Capture names represent the text that the
+captured node spans in the buffer.
+@end deffn
+
+@deffn Predicate match regexp capture-name
+Matches if the text that @var{capture-name}’s node spans in the buffer
+matches regular expression @var{regexp}. Matching is case-sensitive.
+@end deffn
+
+Note that a predicate can only refer to capture names appeared in the
+same pattern. Indeed, it makes little sense to refer to capture names
+in other patterns anyway.
+
+@heading S-expression patterns
+
+Besides strings, Emacs provides a s-expression based syntax for query
+patterns. It largely resembles the string-based syntax. For example,
+the following pattern
+
+@example
+@group
+(treesit-query-capture
+ node "(addition_expression
+ left: (_) @@left
+ \"+\" @@plus-sign
+ right: (_) @@right) @@addition
+
+ [\"return\" \"break\"] @@keyword")
+@end group
+@end example
+
+@noindent
+is equivalent to
+
+@example
+@group
+(treesit-query-capture
+ node '((addition_expression
+ left: (_) @@left
+ "+" @@plus-sign
+ right: (_) @@right) @@addition
+
+ ["return" "break"] @@keyword))
+@end group
+@end example
+
+Most pattern syntax can be written directly as strange but
+never-the-less valid s-expressions. Only a few of them needs
+modification:
+
+@itemize
+@item
+Anchor @samp{.} is written as @code{:anchor}.
+@item
+@samp{?} is written as @samp{:?}.
+@item
+@samp{*} is written as @samp{:*}.
+@item
+@samp{+} is written as @samp{:+}.
+@item
+@code{#equal} is written as @code{:equal}. In general, predicates
+change their @samp{#} to @samp{:}.
+@end itemize
+
+For example,
+
+@example
+@group
+"(
+ (compound_expression . (_) @@first (_)* @@rest)
+ (#match \"love\" @@first)
+ )"
+@end group
+@end example
+
+is written in s-expression as
+
+@example
+@group
+'((
+ (compound_expression :anchor (_) @@first (_) :* @@rest)
+ (:match "love" @@first)
+ ))
+@end group
+@end example
+
+@heading Compiling queries
+
+If a query will be used repeatedly, especially in tight loops, it is
+important to compile that query, because a compiled query is much
+faster than an uncompiled one. A compiled query can be used anywhere
+a query is accepted.
+
+@defun treesit-query-compile language query
+This function compiles @var{query} for @var{language} into a compiled
+query object and returns it.
+
+This function raise a @var{treesit-query-error} if @var{query} is
+malformed. The signal data contains a description of the specific
+error. You can use @code{treesit-query-validate} to debug the query.
+@end defun
+
+@defun treesit-query-expand query
+This function expands the s-expression @var{query} into a string
+query.
+@end defun
+
+@defun treesit-pattern-expand pattern
+This function expands the s-expression @var{pattern} into a string
+pattern.
+@end defun
+
+Finally, tree-sitter project's documentation about
+pattern-matching can be found at
+@uref{https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries}.
+
+@node Multiple Languages
+@section Parsing Text in Multiple Languages
+
+Sometimes, the source of a programming language could contain sources
+of other languages, HTML + CSS + JavaScript is one example. In that
+case, we need to assign individual parsers to text segments written in
+different languages. Traditionally this is achieved by using
+narrowing. While tree-sitter works with narrowing (@pxref{tree-sitter
+narrowing, narrowing}), the recommended way is to set ranges in which
+a parser will operate.
+
+@defun treesit-parser-set-included-ranges parser ranges
+This function sets the range of @var{parser} to @var{ranges}. Then
+@var{parser} will only read the text covered in each range. Each
+range in @var{ranges} is a list of cons @code{(@var{beg}
+. @var{end})}.
+
+Each range in @var{ranges} must come in order and not overlap. That
+is, in pseudo code:
+
+@example
+@group
+(cl-loop for idx from 1 to (1- (length ranges))
+ for prev = (nth (1- idx) ranges)
+ for next = (nth idx ranges)
+ should (<= (car prev) (cdr prev)
+ (car next) (cdr next)))
+@end group
+@end example
+
+@vindex treesit-range-invalid
+If @var{ranges} violates this constraint, or something else went
+wrong, this function signals a @code{treesit-range-invalid}. The
+signal data contains a specific error message and the ranges we are
+trying to set.
+
+This function can also be used for disabling ranges. If @var{ranges}
+is nil, the parser is set to parse the whole buffer.
+
+Example:
+
+@example
+@group
+(treesit-parser-set-included-ranges
+ parser '((1 . 9) (16 . 24) (24 . 25)))
+@end group
+@end example
+@end defun
+
+@defun treesit-parser-included-ranges parser
+This function returns the ranges set for @var{parser}. The return
+value is the same as the @var{ranges} argument of
+@code{treesit-parser-included-ranges}: a list of cons
+@code{(@var{beg} . @var{end})}. And if @var{parser} doesn't have any
+ranges, the return value is nil.
+
+@example
+@group
+(treesit-parser-included-ranges parser)
+ @result{} ((1 . 9) (16 . 24) (24 . 25))
+@end group
+@end example
+@end defun
+
+@defun treesit-set-ranges parser-or-lang ranges
+Like @code{treesit-parser-set-included-ranges}, this function sets
+the ranges of @var{parser-or-lang} to @var{ranges}. Conveniently,
+@var{parser-or-lang} could be either a parser or a language. If it is
+a language, this function looks for the first parser in
+@code{(treesit-parser-list)} for that language in the current buffer,
+and set range for it.
+@end defun
+
+@defun treesit-get-ranges parser-or-lang
+This function returns the ranges of @var{parser-or-lang}, like
+@code{treesit-parser-included-ranges}. And like
+@code{treesit-set-ranges}, @var{parser-or-lang} can be a parser or
+a language symbol.
+@end defun
+
+@defun treesit-query-range source query &optional beg end
+This function matches @var{source} with @var{query} and returns the
+ranges of captured nodes. The return value has the same shape of
+other functions: a list of @code{(@var{beg} . @var{end})}.
+
+For convenience, @var{source} can be a language symbol, a parser, or a
+node. If a language symbol, this function matches in the root node of
+the first parser using that language; if a parser, this function
+matches in the root node of that parser; if a node, this function
+matches in that node.
+
+Parameter @var{query} is the query used to capture nodes
+(@pxref{Pattern Matching}). The capture names don't matter. Parameter
+@var{beg} and @var{end}, if both non-nil, limits the range in which
+this function queries.
+
+Like other query functions, this function raises an
+@var{treesit-query-error} if @var{query} is malformed.
+@end defun
+
+@defun treesit-language-at point
+This function tries to figure out which language is responsible for
+the text at @var{point}. It goes over each parser in
+@code{(treesit-parser-list)} and see if that parser's range covers
+@var{point}.
+@end defun
+
+@defvar treesit-range-functions
+A list of range functions. Font-locking and indenting code uses
+functions in this alist to set correct ranges for a language parser
+before using it.
+
+The signature of each function should be
+
+@example
+(@var{start} @var{end} &rest @var{_})
+@end example
+
+where @var{start} and @var{end} marks the region that is about to be
+used. A range function only need to (but not limited to) update
+ranges in that region.
+
+Each function in the list is called in-order.
+@end defvar
+
+@defun treesit-update-ranges &optional start end
+This function is used by font-lock and indent to update ranges before
+using any parser. Each range function in
+@var{treesit-range-functions} is called in-order. Arguments
+@var{start} and @var{end} are passed to each range function.
+@end defun
+
+@heading An example
+
+Normally, in a set of languages that can be mixed together, there is a
+major language and several embedded languages. We first parse the
+whole document with the major language’s parser, set ranges for the
+embedded languages, then parse the embedded languages.
+
+Suppose we want to parse a very simple document that mixes HTML, CSS
+and JavaScript:
+
+@example
+@group
+<html>
+ <script>1 + 2</script>
+ <style>body @{ color: "blue"; @}</style>
+</html>
+@end group
+@end example
+
+We first parse with HTML, then set ranges for CSS and JavaScript:
+
+@example
+@group
+;; Create parsers.
+(setq html (treesit-get-parser-create 'html))
+(setq css (treesit-get-parser-create 'css))
+(setq js (treesit-get-parser-create 'javascript))
+
+;; Set CSS ranges.
+(setq css-range
+ (treesit-query-range
+ 'html
+ "(style_element (raw_text) @@capture)"))
+(treesit-parser-set-included-ranges css css-range)
+
+;; Set JavaScript ranges.
+(setq js-range
+ (treesit-query-range
+ 'html
+ "(script_element (raw_text) @@capture)"))
+(treesit-parser-set-included-ranges js js-range)
+@end group
+@end example
+
+We use a query pattern @code{(style_element (raw_text) @@capture)} to
+find CSS nodes in the HTML parse tree. For how to write query
+patterns, @pxref{Pattern Matching}.
+
+@node Tree-sitter C API
+@section Tree-sitter C API Correspondence
+
+Emacs' tree-sitter integration doesn't expose every feature
+tree-sitter's C API provides. Missing features include:
+
+@itemize
+@item
+Creating a tree cursor and navigating the syntax tree with it.
+@item
+Setting timeout and cancellation flag for a parser.
+@item
+Setting the logger for a parser.
+@item
+Printing a DOT graph of the syntax tree to a file.
+@item
+Coping and modifying a syntax tree. (Emacs doesn't expose a tree
+object.)
+@item
+Using (row, column) coordinates as position.
+@item
+Updating a node with changes. (In Emacs, retrieve a new node instead
+of updating the existing one.)
+@item
+Querying statics of a language definition.
+@end itemize
+
+In addition, Emacs makes some changes to the C API to make the API more
+convenient and idiomatic:
+
+@itemize
+@item
+Instead of using byte positions, the ELisp API uses character
+positions.
+@item
+Null nodes are converted to nil.
+@end itemize
+
+Below is the correspondence between all C API functions and their
+ELisp counterparts. Sometimes one ELisp function corresponds to
+multiple C functions, and many C functions don't have an ELisp
+counterpart.
+
+@example
+ts_parser_new treesit-parser-create
+ts_parser_delete
+ts_parser_set_language
+ts_parser_language treesit-parser-language
+ts_parser_set_included_ranges treesit-parser-set-included-ranges
+ts_parser_included_ranges treesit-parser-included-ranges
+ts_parser_parse
+ts_parser_parse_string treesit-parse-string
+ts_parser_parse_string_encoding
+ts_parser_reset
+ts_parser_set_timeout_micros
+ts_parser_timeout_micros
+ts_parser_set_cancellation_flag
+ts_parser_cancellation_flag
+ts_parser_set_logger
+ts_parser_logger
+ts_parser_print_dot_graphs
+ts_tree_copy
+ts_tree_delete
+ts_tree_root_node
+ts_tree_language
+ts_tree_edit
+ts_tree_get_changed_ranges
+ts_tree_print_dot_graph
+ts_node_type treesit-node-type
+ts_node_symbol
+ts_node_start_byte treesit-node-start
+ts_node_start_point
+ts_node_end_byte treesit-node-end
+ts_node_end_point
+ts_node_string treesit-node-string
+ts_node_is_null
+ts_node_is_named treesit-node-check
+ts_node_is_missing treesit-node-check
+ts_node_is_extra treesit-node-check
+ts_node_has_changes treesit-node-check
+ts_node_has_error treesit-node-check
+ts_node_parent treesit-node-parent
+ts_node_child treesit-node-child
+ts_node_field_name_for_child treesit-node-field-name-for-child
+ts_node_child_count treesit-node-child-count
+ts_node_named_child treesit-node-child
+ts_node_named_child_count treesit-node-child-count
+ts_node_child_by_field_name treesit-node-by-field-name
+ts_node_child_by_field_id
+ts_node_next_sibling treesit-next-sibling
+ts_node_prev_sibling treesit-prev-sibling
+ts_node_next_named_sibling treesit-next-sibling
+ts_node_prev_named_sibling treesit-prev-sibling
+ts_node_first_child_for_byte treesit-first-child-for-pos
+ts_node_first_named_child_for_byte treesit-first-child-for-pos
+ts_node_descendant_for_byte_range treesit-descendant-for-range
+ts_node_descendant_for_point_range
+ts_node_named_descendant_for_byte_range treesit-descendant-for-range
+ts_node_named_descendant_for_point_range
+ts_node_edit
+ts_node_eq treesit-node-eq
+ts_tree_cursor_new
+ts_tree_cursor_delete
+ts_tree_cursor_reset
+ts_tree_cursor_current_node
+ts_tree_cursor_current_field_name
+ts_tree_cursor_current_field_id
+ts_tree_cursor_goto_parent
+ts_tree_cursor_goto_next_sibling
+ts_tree_cursor_goto_first_child
+ts_tree_cursor_goto_first_child_for_byte
+ts_tree_cursor_goto_first_child_for_point
+ts_tree_cursor_copy
+ts_query_new
+ts_query_delete
+ts_query_pattern_count
+ts_query_capture_count
+ts_query_string_count
+ts_query_start_byte_for_pattern
+ts_query_predicates_for_pattern
+ts_query_step_is_definite
+ts_query_capture_name_for_id
+ts_query_string_value_for_id
+ts_query_disable_capture
+ts_query_disable_pattern
+ts_query_cursor_new
+ts_query_cursor_delete
+ts_query_cursor_exec treesit-query-capture
+ts_query_cursor_did_exceed_match_limit
+ts_query_cursor_match_limit
+ts_query_cursor_set_match_limit
+ts_query_cursor_set_byte_range
+ts_query_cursor_set_point_range
+ts_query_cursor_next_match
+ts_query_cursor_remove_match
+ts_query_cursor_next_capture
+ts_language_symbol_count
+ts_language_symbol_name
+ts_language_symbol_for_name
+ts_language_field_count
+ts_language_field_name_for_id
+ts_language_field_id_for_name
+ts_language_symbol_type
+ts_language_version
+@end example
diff --git a/lisp/emacs-lisp/cl-preloaded.el b/lisp/emacs-lisp/cl-preloaded.el
index 94f9654b239..dbe20f92028 100644
--- a/lisp/emacs-lisp/cl-preloaded.el
+++ b/lisp/emacs-lisp/cl-preloaded.el
@@ -78,6 +78,9 @@
(font-spec atom) (font-entity atom) (font-object atom)
(vector array sequence atom)
(user-ptr atom)
+ (tree-sitter-parser atom)
+ (tree-sitter-node atom)
+ (tree-sitter-compiled-query atom)
;; Plus, really hand made:
(null symbol list sequence atom))
"Alist of supertypes.
diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el
index 80c5b31b6ea..801432cd188 100644
--- a/lisp/progmodes/python.el
+++ b/lisp/progmodes/python.el
@@ -261,10 +261,12 @@
(require 'ansi-color)
(require 'cl-lib)
(require 'comint)
+(eval-when-compile (require 'subr-x)) ;For `string-empty-p' and `string-join'.
+(require 'treesit)
+(require 'pcase)
(require 'compat nil 'noerror)
(require 'project nil 'noerror)
(require 'seq)
-(eval-when-compile (require 'subr-x)) ;For `string-empty-p'.
;; Avoid compiler warnings
(defvar compilation-error-regexp-alist)
@@ -284,6 +286,12 @@
:version "24.3"
:link '(emacs-commentary-link "python"))
+(defcustom python-use-tree-sitter nil
+ "If non-nil, `python-mode' tries to use tree-sitter.
+Currently `python-mode' uses tree-sitter for font-locking, imenu,
+and movement functions."
+ :type 'boolean)
+
(defcustom python-interpreter "python"
"Python interpreter for noninteractive use.
To customize the Python shell, modify `python-shell-interpreter'
@@ -291,6 +299,7 @@ instead."
:version "29.1"
:type 'string)
+
;;; Bindings
@@ -941,6 +950,147 @@ is used to limit the scan."
"Dotty syntax table for Python files.
It makes underscores and dots word constituent chars.")
+;;; Tree-sitter font-lock
+
+;; NOTE: Tree-sitter and font-lock works differently so this can't
+;; merge with `python-font-lock-keywords-level-2'.
+
+(defvar python--treesit-keywords
+ '("as" "assert" "async" "await" "break" "class" "continue" "def"
+ "del" "elif" "else" "except" "exec" "finally" "for" "from"
+ "global" "if" "import" "lambda" "nonlocal" "pass" "print"
+ "raise" "return" "try" "while" "with" "yield"
+ ;; These are technically operators, but we fontify them as
+ ;; keywords.
+ "and" "in" "is" "not" "or"))
+
+(defvar python--treesit-builtins
+ '("abs" "all" "any" "ascii" "bin" "bool" "breakpoint" "bytearray"
+ "bytes" "callable" "chr" "classmethod" "compile" "complex"
+ "delattr" "dict" "dir" "divmod" "enumerate" "eval" "exec"
+ "filter" "float" "format" "frozenset" "getattr" "globals"
+ "hasattr" "hash" "help" "hex" "id" "input" "int" "isinstance"
+ "issubclass" "iter" "len" "list" "locals" "map" "max"
+ "memoryview" "min" "next" "object" "oct" "open" "ord" "pow"
+ "print" "property" "range" "repr" "reversed" "round" "set"
+ "setattr" "slice" "sorted" "staticmethod" "str" "sum" "super"
+ "tuple" "type" "vars" "zip" "__import__"))
+
+(defvar python--treesit-constants
+ '("Ellipsis" "False" "None" "NotImplemented" "True" "__debug__"
+ "copyright" "credits" "exit" "license" "quit"))
+
+(defvar python--treesit-operators
+ ;; This is not used. And and, or, not, is, in are fontified as
+ ;; keywords.
+ '("-" "-=" "!=" "*" "**" "**=" "*=" "/" "//" "//=" "/=" "&" "%" "%="
+ "^" "+" "+=" "<" "<<" "<=" "<>" "=" "==" ">" ">=" ">>" "|" "~"
+ "and" "in" "is" "not" "or"))
+
+(defvar python--treesit-special-attributes
+ '("__annotations__" "__closure__" "__code__"
+ "__defaults__" "__dict__" "__doc__" "__globals__"
+ "__kwdefaults__" "__name__" "__module__" "__package__"
+ "__qualname__" "__all__"))
+
+(defvar python--treesit-exceptions
+ '(;; Python 2 and 3:
+ "ArithmeticError" "AssertionError" "AttributeError" "BaseException"
+ "BufferError" "BytesWarning" "DeprecationWarning" "EOFError"
+ "EnvironmentError" "Exception" "FloatingPointError" "FutureWarning"
+ "GeneratorExit" "IOError" "ImportError" "ImportWarning"
+ "IndentationError" "IndexError" "KeyError" "KeyboardInterrupt"
+ "LookupError" "MemoryError" "NameError" "NotImplementedError"
+ "OSError" "OverflowError" "PendingDeprecationWarning"
+ "ReferenceError" "RuntimeError" "RuntimeWarning" "StopIteration"
+ "SyntaxError" "SyntaxWarning" "SystemError" "SystemExit" "TabError"
+ "TypeError" "UnboundLocalError" "UnicodeDecodeError"
+ "UnicodeEncodeError" "UnicodeError" "UnicodeTranslateError"
+ "UnicodeWarning" "UserWarning" "ValueError" "Warning"
+ "ZeroDivisionError"
+ ;; Python 2:
+ "StandardError"
+ ;; Python 3:
+ "BlockingIOError" "BrokenPipeError" "ChildProcessError"
+ "ConnectionAbortedError" "ConnectionError" "ConnectionRefusedError"
+ "ConnectionResetError" "FileExistsError" "FileNotFoundError"
+ "InterruptedError" "IsADirectoryError" "NotADirectoryError"
+ "PermissionError" "ProcessLookupError" "RecursionError"
+ "ResourceWarning" "StopAsyncIteration" "TimeoutError"
+ ;; OS specific
+ "VMSError" "WindowsError"
+ ))
+
+(defun python--treesit-fontify-string (beg end _)
+ "Fontify string between BEG and END.
+Do not fontify the initial f for f-strings."
+ (let ((beg (if (eq (char-after beg) ?f)
+ (1+ beg) beg)))
+ (put-text-property beg end 'face 'font-lock-string-face)))
+
+(defvar python--treesit-settings
+ (treesit-font-lock-rules
+ :language 'python
+ :override t
+ `(;; Queries for def and class.
+ (function_definition
+ name: (identifier) @font-lock-function-name-face)
+
+ (class_definition
+ name: (identifier) @font-lock-type-face)
+
+ ;; Comment and string.
+ (comment) @font-lock-comment-face
+
+ (string) @python--treesit-fontify-string
+ ((string) @font-lock-doc-face
+ (:match "^\"\"\"" @font-lock-doc-face))
+ (interpolation (identifier) @font-lock-variable-name-face)
+
+ ;; Keywords, builtins, and constants.
+ [,@python--treesit-keywords] @font-lock-keyword-face
+
+ ((identifier) @font-lock-keyword-face
+ (:match "^self$" @font-lock-keyword-face))
+
+ ((identifier) @font-lock-builtin-face
+ (:match ,(rx-to-string
+ `(seq bol
+ (or ,@python--treesit-builtins
+ ,@python--treesit-special-attributes)
+ eol))
+ @font-lock-builtin-face))
+
+ [(true) (false) (none)] @font-lock-constant-face
+
+ ;; Escape sequences
+ (escape_sequence) @font-lock-constant-face
+
+ ;; Variable names.
+ (assignment left: (identifier)
+ @font-lock-variable-name-face)
+ (assignment left: (attribute
+ attribute: (identifier)
+ @font-lock-variable-name-face))
+ (pattern_list (identifier)
+ @font-lock-variable-name-face)
+ (tuple_pattern (identifier)
+ @font-lock-variable-name-face)
+ (list_pattern (identifier)
+ @font-lock-variable-name-face)
+ (list_splat_pattern (identifier)
+ @font-lock-variable-name-face)
+
+ ;; Types and decorators.
+ (decorator) @font-lock-type-face
+ ((identifier) @font-lock-type-face
+ (:match ,(rx-to-string
+ `(seq bol (or ,@python--treesit-exceptions)
+ eol))
+ @font-lock-type-face))
+ (type (identifier) @font-lock-type-face)))
+ "Tree-sitter font-lock settings.")
+
;;; Indentation
@@ -5171,6 +5321,91 @@ To this:
(python-imenu-format-parent-item-jump-label-function fn))
(python-imenu-create-index))))))
+;;; Tree-sitter imenu
+
+(defun python--imenu-treesit-create-index-1 (node)
+ "Given a sparse tree, create an imenu alist.
+
+NODE is the root node of the tree returned by
+`treesit-induce-sparse-tree' (not a tree-sitter node, its car is
+a tree-sitter node). Walk that tree and return an imenu alist.
+
+Return a list of ENTRY where
+
+ENTRY := (NAME . MARKER)
+ | (NAME . ((JUMP-LABEL . MARKER)
+ ENTRY
+ ...)
+
+NAME is the function/class's name, JUMP-LABEL is like \"*function
+definition*\"."
+ (let* ((ts-node (car node))
+ (children (cdr node))
+ (subtrees (mapcan #'python--imenu-treesit-create-index-1
+ children))
+ (type (pcase (treesit-node-type ts-node)
+ ("function_definition" 'def)
+ ("class_definition" 'class)))
+ ;; The root of the tree could have a nil ts-node.
+ (name (when ts-node
+ (treesit-node-text
+ (treesit-node-child-by-field-name
+ ts-node "name") t)))
+ (marker (when ts-node
+ (set-marker (make-marker)
+ (treesit-node-start ts-node)))))
+ (cond
+ ((null ts-node)
+ subtrees)
+ (subtrees
+ (let ((parent-label
+ (funcall python-imenu-format-parent-item-label-function
+ type name))
+ (jump-label
+ (funcall
+ python-imenu-format-parent-item-jump-label-function
+ type name)))
+ `((,parent-label
+ ,(cons jump-label marker)
+ ,@subtrees))))
+ (t (let ((label
+ (funcall python-imenu-format-item-label-function
+ type name)))
+ (list (cons label marker)))))))
+
+(defun python-imenu-treesit-create-index (&optional node)
+ "Return tree Imenu alist for the current Python buffer.
+
+Change `python-imenu-format-item-label-function',
+`python-imenu-format-parent-item-label-function',
+`python-imenu-format-parent-item-jump-label-function' to
+customize how labels are formatted.
+
+NODE is the root node of the subtree you want to build an index
+of. If nil, use the root node of the whole parse tree.
+
+Similar to `python-imenu-create-index' but use tree-sitter."
+ (let* ((node (or node (treesit-buffer-root-node 'python)))
+ (tree (treesit-induce-sparse-tree
+ node
+ (rx (seq bol
+ (or "function" "class")
+ "_definition"
+ eol)))))
+ (python--imenu-treesit-create-index-1 tree)))
+
+(defun python-imenu-treesit-create-flat-index ()
+ "Return flat outline of the current Python buffer for Imenu.
+
+Change `python-imenu-format-item-label-function',
+`python-imenu-format-parent-item-label-function',
+`python-imenu-format-parent-item-jump-label-function' to
+customize how labels are formatted.
+
+Similar to `python-imenu-create-flat-index' but use
+tree-sitter."
+ (python-imenu-create-flat-index
+ (python-imenu-treesit-create-index)))
;;; Misc helpers
@@ -5236,6 +5471,29 @@ since it returns nil if point is not inside a defun."
(concat (and type (format "%s " type))
(mapconcat #'identity names ".")))))))
+(defun python-info-treesit-current-defun (&optional include-type)
+ "Identical to `python-info-current-defun' but use tree-sitter.
+For INCLUDE-TYPE see `python-info-current-defun'."
+ (let ((node (treesit-node-at (point)))
+ (name-list ())
+ (type 'def))
+ (cl-loop while node
+ if (pcase (treesit-node-type node)
+ ("function_definition"
+ (setq type 'def))
+ ("class_definition"
+ (setq type 'class))
+ (_ nil))
+ do (push (treesit-node-text
+ (treesit-node-child-by-field-name node "name")
+ t)
+ name-list)
+ do (setq node (treesit-node-parent node))
+ finally return (concat (if include-type
+ (format "%s " type)
+ "")
+ (string-join name-list ".")))))
+
(defun python-info-current-symbol (&optional replace-self)
"Return current symbol using dotty syntax.
With optional argument REPLACE-SELF convert \"self\" to current
@@ -6135,13 +6393,20 @@ Add import for undefined name `%s' (empty to skip): "
(setq-local forward-sexp-function python-forward-sexp-function)
- (setq-local font-lock-defaults
+ (if (and python-use-tree-sitter
+ (treesit-can-enable-p))
+ (progn
+ (setq-local font-lock-defaults '(nil t))
+ (setq-local treesit-font-lock-settings
+ python--treesit-settings)
+ (treesit-font-lock-enable))
+ (setq-local font-lock-defaults
`(,python-font-lock-keywords
nil nil nil nil
(font-lock-syntactic-face-function
. python-font-lock-syntactic-face-function)
(font-lock-extend-after-change-region-function
- . python-font-lock-extend-region)))
+ . python-font-lock-extend-region))))
(setq-local syntax-propertize-function
python-syntax-propertize-function)
@@ -6170,13 +6435,21 @@ Add import for undefined name `%s' (empty to skip): "
(add-hook 'post-self-insert-hook
#'python-indent-post-self-insert-function 'append 'local)
- (setq-local imenu-create-index-function
- #'python-imenu-create-index)
+ (if (and python-use-tree-sitter
+ (treesit-can-enable-p))
+ (setq-local imenu-create-index-function
+ #'python-imenu-treesit-create-index)
+ (setq-local imenu-create-index-function
+ #'python-imenu-create-index))
(setq-local add-log-current-defun-function
#'python-info-current-defun)
- (add-hook 'which-func-functions #'python-info-current-defun nil t)
+ (if (and python-use-tree-sitter
+ (treesit-can-enable-p))
+ (add-hook 'which-func-functions
+ #'python-info-treesit-current-defun nil t)
+ (add-hook 'which-func-functions #'python-info-current-defun nil t))
(setq-local skeleton-further-elements
'((abbrev-mode nil)
diff --git a/lisp/treesit.el b/lisp/treesit.el
new file mode 100644
index 00000000000..bb13021a274
--- /dev/null
+++ b/lisp/treesit.el
@@ -0,0 +1,935 @@
+;;; treesit.el --- tree-sitter utilities -*- lexical-binding: t -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;
+;; Note to self: we don't create parsers automatically in any provided
+;; functions if we don't know what language to use.
+
+;;; Code:
+
+(eval-when-compile (require 'cl-lib))
+(eval-when-compile (require 'subr-x)) ; For `string-join'.
+(require 'cl-seq)
+(require 'font-lock)
+
+;;; Activating tree-sitter
+
+(defgroup treesit
+ nil
+ "Tree-sitter is an incremental parser."
+ :group 'tools)
+
+(defcustom treesit-max-buffer-size (* 4 1024 1024)
+ "Maximum buffer size for enabling tree-sitter parsing."
+ :type 'integer)
+
+(defun treesit-available-p ()
+ "Return non-nil if tree-sitter features are available."
+ (fboundp 'treesit-parser-create))
+
+(defun treesit-can-enable-p ()
+ "Return non-nil if current buffer can activate tree-sitter.
+Currently this function checks whether tree-sitter is available
+and the buffer size."
+ (and (treesit-available-p)
+ (< (buffer-size) treesit-max-buffer-size)))
+
+;;; Parser API supplement
+
+(defun treesit-parse-string (string language)
+ "Parse STRING using a parser for LANGUAGE.
+Return the root node of the syntax tree."
+ (with-temp-buffer
+ (insert string)
+ (treesit-parser-root-node
+ (treesit-parser-create language))))
+
+(defun treesit-language-at (point)
+ "Return the language used at POINT."
+ (cl-loop for parser in (treesit-parser-list)
+ if (treesit-node-on point point parser)
+ return (treesit-parser-language parser)))
+
+(defun treesit-set-ranges (parser-or-lang ranges)
+ "Set the ranges of PARSER-OR-LANG to RANGES."
+ (treesit-parser-set-included-ranges
+ (cond ((symbolp parser-or-lang)
+ (or (treesit-parser-create parser-or-lang)
+ (error "Cannot find a parser for %s" parser-or-lang)))
+ ((treesit-parser-p parser-or-lang)
+ parser-or-lang)
+ (t (error "Expecting a parser or language, but got %s"
+ parser-or-lang)))
+ ranges))
+
+(defun treesit-get-ranges (parser-or-lang)
+ "Get the ranges of PARSER-OR-LANG."
+ (treesit-parser-included-ranges
+ (cond ((symbolp parser-or-lang)
+ (or (treesit-parser-create parser-or-lang)
+ (error "Cannot find a parser for %s" parser-or-lang)))
+ ((treesit-parser-p parser-or-lang)
+ parser-or-lang)
+ (t (error "Expecting a parser or language, but got %s"
+ parser-or-lang)))))
+
+;;; Node API supplement
+
+(defun treesit-node-buffer (node)
+ "Return the buffer in where NODE belongs."
+ (treesit-parser-buffer
+ (treesit-node-parser node)))
+
+(defun treesit-node-language (node)
+ "Return the language symbol that NODE's parser uses."
+ (treesit-parser-language
+ (treesit-node-parser node)))
+
+(defun treesit-node-at (point &optional parser-or-lang named)
+ "Return the smallest node that starts at or after POINT.
+
+\"Starts at or after POINT\" means the start of the node is
+greater or larger than POINT. Return nil if none find. If NAMED
+non-nil, only look for named node.
+
+If PARSER-OR-LANG is nil, use the first parser in
+(`treesit-parser-list'); if PARSER-OR-LANG is a parser, use
+that parser; if PARSER-OR-LANG is a language, find a parser using
+that language in the current buffer, and use that."
+ (let ((node (if (treesit-parser-p parser-or-lang)
+ (treesit-parser-root-node parser-or-lang)
+ (treesit-buffer-root-node parser-or-lang))))
+ ;; TODO: We might want a `treesit-node-descendant-for-pos' in C.
+ (while (cond ((and node (< (treesit-node-end node) point))
+ (setq node (treesit-node-next-sibling node))
+ t)
+ ((treesit-node-child node 0 named)
+ (setq node (treesit-node-child node 0 named))
+ t)))
+ node))
+
+(defun treesit-node-on (beg end &optional parser-or-lang named)
+ "Return the smallest node covering BEG to END.
+
+BEWARE! Calling this function on an empty line that is not
+inside any top-level construct (function definition, etc) most
+probably will give you the root node, because the root node is
+the smallest node that covers that empty line. You probably want
+to use `treesit-node-at' instead.
+
+Return nil if none find. If NAMED non-nil, only look for named
+node.
+
+If PARSER-OR-LANG is nil, use the first parser in
+(`treesit-parser-list'); if PARSER-OR-LANG is a parser, use
+that parser; if PARSER-OR-LANG is a language, find a parser using
+that language in the current buffer, and use that."
+ (let ((root (if (treesit-parser-p parser-or-lang)
+ (treesit-parser-root-node parser-or-lang)
+ (treesit-buffer-root-node parser-or-lang))))
+ (treesit-node-descendant-for-range root beg (or end beg) named)))
+
+(defun treesit-buffer-root-node (&optional language)
+ "Return the root node of the current buffer.
+Use the first parser in (`treesit-parser-list'), if LANGUAGE is
+non-nil, use the first parser for LANGUAGE."
+ (if-let ((parser
+ (or (if language
+ (or (treesit-parser-create language)
+ (error "Cannot find a parser for %s" language))
+ (or (car (treesit-parser-list))
+ (error "Buffer has no parser"))))))
+ (treesit-parser-root-node parser)))
+
+(defun treesit-filter-child (node pred &optional named)
+ "Return children of NODE that satisfies PRED.
+PRED is a function that takes one argument, the child node. If
+NAMED non-nil, only search for named node."
+ (let ((child (treesit-node-child node 0 named))
+ result)
+ (while child
+ (when (funcall pred child)
+ (push child result))
+ (setq child (treesit-node-next-sibling child named)))
+ (reverse result)))
+
+(defun treesit-node-text (node &optional no-property)
+ "Return the buffer (or string) content corresponding to NODE.
+If NO-PROPERTY is non-nil, remove text properties."
+ (when node
+ (with-current-buffer (treesit-node-buffer node)
+ (if no-property
+ (buffer-substring-no-properties
+ (treesit-node-start node)
+ (treesit-node-end node))
+ (buffer-substring
+ (treesit-node-start node)
+ (treesit-node-end node))))))
+
+(defun treesit-parent-until (node pred)
+ "Return the closest parent of NODE that satisfies PRED.
+Return nil if none found. PRED should be a function that takes
+one argument, the parent node."
+ (let ((node (treesit-node-parent node)))
+ (while (and node (not (funcall pred node)))
+ (setq node (treesit-node-parent node)))
+ node))
+
+(defun treesit-parent-while (node pred)
+ "Return the furthest parent of NODE that satisfies PRED.
+Return nil if none found. PRED should be a function that takes
+one argument, the parent node."
+ (let ((last nil))
+ (while (and node (funcall pred node))
+ (setq last node
+ node (treesit-node-parent node)))
+ last))
+
+(defalias 'treesit-traverse-parent #'treesit-parent-until)
+
+(defun treesit-node-children (node &optional named)
+ "Return a list of NODE's children.
+If NAMED is non-nil, collect named child only."
+ (mapcar (lambda (idx)
+ (treesit-node-child node idx named))
+ (number-sequence
+ 0 (1- (treesit-node-child-count node named)))))
+
+(defun treesit-node-index (node &optional named)
+ "Return the index of NODE in its parent.
+If NAMED is non-nil, count named child only."
+ (let ((count 0))
+ (while (setq node (treesit-node-prev-sibling node named))
+ (cl-incf count))
+ count))
+
+(defun treesit-node-field-name (node)
+ "Return the field name of NODE as a child of its parent."
+ (when-let ((parent (treesit-node-parent node))
+ (idx (treesit-node-index node)))
+ (treesit-node-field-name-for-child parent idx)))
+
+;;; Query API supplement
+
+(defun treesit-query-string (string query language)
+ "Query STRING with QUERY in LANGUAGE.
+See `treesit-query-capture' for QUERY."
+ (with-temp-buffer
+ (insert string)
+ (let ((parser (treesit-parser-create language)))
+ (treesit-query-capture
+ (treesit-parser-root-node parser)
+ query))))
+
+(defun treesit-query-range (source query &optional beg end)
+ "Query the current buffer and return ranges of captured nodes.
+
+QUERY, SOURCE, BEG, END are the same as in
+`treesit-query-in'. This function returns a list
+of (START . END), where START and END specifics the range of each
+captured node. Capture names don't matter."
+ (cl-loop for capture
+ in (treesit-query-capture source query beg end)
+ for node = (cdr capture)
+ collect (cons (treesit-node-start node)
+ (treesit-node-end node))))
+
+;;; Range API supplement
+
+(defvar-local treesit-range-functions nil
+ "A list of range functions.
+Font-locking and indenting code uses functions in this alist to
+set correct ranges for a language parser before using it.
+
+The signature of each function should be
+
+ (start end &rest _)
+
+where START and END marks the region that is about to be used. A
+range function only need to (but not limited to) update ranges in
+that region.
+
+Each function in the list is called in-order.")
+
+(defun treesit-update-ranges (&optional start end)
+ "Update the ranges for each language in the current buffer.
+Calls each range functions in `treesit-range-functions'
+in-order. START and END are passed to each range function."
+ (dolist (range-fn treesit-range-functions)
+ (funcall range-fn (or start (point-min)) (or end (point-max)))))
+
+;;; Font-lock
+
+(define-error 'treesit-font-lock-error
+ "Generic tree-sitter font-lock error"
+ 'treesit-error)
+
+(defvar-local treesit-font-lock-settings nil
+ "A list of SETTINGs for treesit-based fontification.
+
+The exact format of this variable is considered internal. One
+should always use `treesit-font-lock-rules' to set this variable.
+
+Each SETTING is of form
+
+ (LANGUAGE QUERY OVERRIDE)
+
+Each SETTING controls one parser (often of different language).
+LANGUAGE is the language symbol. See Info node `(elisp)Language
+Definitions'.
+
+QUERY is either a string query, a sexp query, or a compiled
+query. See Info node `(elisp)Pattern Matching' for how to write
+a query in either string or s-expression form. When using
+repeatedly, a compiled query is much faster than a string or sexp
+one, so it is recommend to compile your queries if it will be
+used over and over.
+
+OVERRIDE is the override flag for this query. Its value can be
+t, nil, append, prepend, keep. See more in
+`treesit-font-lock-rules'.")
+
+(defun treesit-font-lock-rules (&rest args)
+ "Return a value suitable for `treesit-font-lock-settings'.
+
+Take a series of QUERIES in either string, s-expression or
+compiled form. Same as in `treesit-font-lock-settings', for each
+query, captured nodes are highlighted with the capture name as
+its face.
+
+Before each QUERY there could be :KEYWORD VALUE pairs that
+configure the query (and only that query). For example,
+
+ (treesit-font-lock-rules
+ :language \\='javascript
+ :override t
+ \\='((true) @font-lock-constant-face
+ (false) @font-lock-constant-face)
+ :language \\='html
+ \"(script_element) @font-lock-builtin-face\")
+
+For each QUERY, a :language keyword is required. Other keywords
+include:
+
+ KEYWORD VALUE DESCRIPTION
+ :override nil If the region already has a face,
+ discard the new face
+ t Always apply the new face
+ append Append the new face to existing ones
+ prepend Prepend the new face to existing ones
+ keep Fill-in regions without an existing face
+
+Capture names in QUERY should be face names like
+`font-lock-keyword-face'. The captured node will be fontified
+with that face. Capture names can also be function names, in
+which case the function is called with (START END NODE), where
+START and END are the start and end position of the node in
+buffer, and NODE is the tree-sitter node object. If a capture
+name is both a face and a function, the face takes priority. If
+a capture name is not a face name nor a function name, it is
+ignored.
+
+\(fn :KEYWORD VALUE QUERY...)"
+ (let (;; Tracks the current language that following queries will
+ ;; apply to.
+ (current-language nil)
+ ;; Tracks :override flag.
+ (current-override nil)
+ ;; The list this function returns.
+ (result nil))
+ (while args
+ (let ((token (pop args)))
+ (pcase token
+ (:language
+ (let ((lang (pop args)))
+ (when (or (not (symbolp lang)) (null lang))
+ (signal 'wrong-type-argument `(symbolp ,lang)))
+ (setq current-language lang)))
+ (:override
+ (let ((flag (pop args)))
+ (when (not (memq flag '(t nil append prepend keep)))
+ (signal 'wrong-type-argument
+ `((or t nil append prepend keep)
+ ,flag)))
+ (setq current-override flag)))
+ ((pred treesit-query-p)
+ (when (null current-language)
+ (signal 'treesit-font-lock-error
+ `("Language unspecified, use :language keyword to specify a language for this query" ,token)))
+ (if (treesit-compiled-query-p token)
+ (push `(,current-language token) result)
+ (push `(,current-language
+ ,(treesit-query-compile current-language token)
+ ,current-override)
+ result))
+ ;; Clears any configurations set for this query.
+ (setq current-language nil
+ current-override nil))
+ (_ (signal 'treesit-font-lock-error
+ `("Unexpected value" token))))))
+ (nreverse result)))
+
+(defun treesit-font-lock-fontify-region
+ (start end &optional loudly)
+ "Fontify the region between START and END.
+If LOUDLY is non-nil, message some debugging information."
+ (treesit-update-ranges start end)
+ (font-lock-unfontify-region start end)
+ (dolist (setting treesit-font-lock-settings)
+ (let* ((language (nth 0 setting))
+ (match-pattern (nth 1 setting))
+ (override (nth 2 setting))
+ (parser (treesit-parser-create language)))
+ (when-let ((node (treesit-node-on start end parser)))
+ (let ((captures (treesit-query-capture
+ node match-pattern
+ ;; Specifying the range is important. More
+ ;; often than not, NODE will be the root
+ ;; node, and if we don't specify the range,
+ ;; we are basically querying the whole file.
+ start end))
+ (inhibit-point-motion-hooks t))
+ (with-silent-modifications
+ (dolist (capture captures)
+ (let* ((face (car capture))
+ (node (cdr capture))
+ (start (treesit-node-start node))
+ (end (treesit-node-end node)))
+ (cond
+ ((facep face)
+ (pcase override
+ ('nil (unless (text-property-not-all
+ start end 'face nil)
+ (put-text-property start end 'face face)))
+ ('t (put-text-property start end 'face face))
+ ('append (font-lock-append-text-property
+ start end 'face face))
+ ('prepend (font-lock-prepend-text-property
+ start end 'face face))
+ ('keep (font-lock-fillin-text-property
+ start end 'face face))
+ (_ (signal 'treesit-font-lock-error
+ (list
+ "Unrecognized value of :override option"
+ override)))))
+ ((functionp face)
+ (funcall face start end node)))
+ ;; Don't raise an error if FACE is neither a face nor
+ ;; a function. This is to allow intermediate capture
+ ;; names used for #match and #eq.
+ (when loudly
+ (message "Fontifying text from %d to %d, Face: %s Language: %s"
+ start end face language)))))))))
+ ;; Call regexp font-lock after tree-sitter, as it is usually used
+ ;; for custom fontification.
+ (let ((font-lock-unfontify-region-function #'ignore))
+ (funcall #'font-lock-default-fontify-region start end loudly)))
+
+(defun treesit-font-lock-enable ()
+ "Enable tree-sitter font-locking for the current buffer."
+ (setq-local font-lock-fontify-region-function
+ #'treesit-font-lock-fontify-region)
+ ;; If we don't set `font-lock-defaults' to some non-nil value,
+ ;; font-lock doesn't enable properly (the font-lock-mode-internal
+ ;; doesn't run). See `font-lock-add-keywords'.
+ (when (and font-lock-mode
+ (null font-lock-keywords)
+ (null font-lock-defaults))
+ (font-lock-mode -1)
+ (setq-local font-lock-defaults '(nil t))
+ (font-lock-mode 1)))
+
+;;; Indent
+
+(defvar treesit--indent-verbose nil
+ "If non-nil, log progress when indenting.")
+
+;; This is not bound locally like we normally do with major-mode
+;; stuff, because for tree-sitter, a buffer could contain more than
+;; one language.
+(defvar treesit-simple-indent-rules nil
+ "A list of indent rule settings.
+Each indent rule setting should be (LANGUAGE . RULES),
+where LANGUAGE is a language symbol, and RULES is a list of
+
+ (MATCHER ANCHOR OFFSET).
+
+MATCHER determines whether this rule applies, ANCHOR and OFFSET
+together determines which column to indent to.
+
+A MATCHER is a function that takes three arguments (NODE PARENT
+BOL). BOL is the point where we are indenting: the beginning of
+line content, the position of the first non-whitespace character.
+NODE is the largest (highest-in-tree) node starting at that
+point. PARENT is the parent of NODE.
+
+If MATCHER returns non-nil, meaning the rule matches, Emacs then
+uses ANCHOR to find an anchor, it should be a function that takes
+the same argument (NODE PARENT BOL) and returns a point.
+
+Finally Emacs computes the column of that point returned by ANCHOR
+and adds OFFSET to it, and indents to that column.
+
+For MATCHER and ANCHOR, Emacs provides some convenient presets.
+See `treesit-simple-indent-presets'.")
+
+(defvar treesit-simple-indent-presets
+ '((match . (lambda
+ (&optional node-type parent-type node-field
+ node-index-min node-index-max)
+ `(lambda (node parent bol &rest _)
+ (and (or (null ,node-type)
+ (equal (treesit-node-type node)
+ ,node-type))
+ (or (null ,parent-type)
+ (equal (treesit-node-type parent)
+ ,parent-type))
+ (or (null ,node-field)
+ (equal (treesit-node-field-name node)
+ ,node-field))
+ (or (null ,node-index-min)
+ (>= (treesit-node-index node t)
+ ,node-index-min))
+ (or (null ,node-index-max)
+ (<= (treesit-node-index node t)
+ ,node-index-max))))))
+ (no-node . (lambda (node parent bol &rest _) (null node)))
+ (parent-is . (lambda (type)
+ `(lambda (node parent bol &rest _)
+ (equal ,type (treesit-node-type parent)))))
+
+ (node-is . (lambda (type)
+ `(lambda (node parent bol &rest _)
+ (equal ,type (treesit-node-type node)))))
+
+ (query . (lambda (pattern)
+ `(lambda (node parent bol &rest _)
+ (cl-loop for capture
+ in (treesit-query-capture
+ parent ,pattern)
+ if (treesit-node-eq node (cdr capture))
+ return t
+ finally return nil))))
+ (first-sibling . (lambda (node parent bol &rest _)
+ (treesit-node-start
+ (treesit-node-child parent 0 t))))
+
+ (parent . (lambda (node parent bol &rest _)
+ (treesit-node-start parent)))
+ (parent-bol . (lambda (node parent bol &rest _)
+ (save-excursion
+ (goto-char (treesit-node-start parent))
+ (back-to-indentation)
+ (point))))
+ (prev-sibling . (lambda (node parent bol &rest _)
+ (treesit-node-start
+ (treesit-node-prev-sibling node))))
+ (no-indent . (lambda (node parent bol &rest _) bol))
+ (prev-line . (lambda (node parent bol &rest _)
+ (save-excursion
+ (goto-char bol)
+ (forward-line -1)
+ (skip-chars-forward " \t")))))
+ "A list of presets.
+These presets that can be used as MATHER and ANCHOR in
+`treesit-simple-indent-rules'.
+
+MATCHER:
+
+\(match NODE-TYPE PARENT-TYPE NODE-FIELD NODE-INDEX-MIN NODE-INDEX-MAX)
+
+ NODE-TYPE checks for node's type, PARENT-TYPE checks for
+ parent's type, NODE-FIELD checks for the filed name of node
+ in the parent, NODE-INDEX-MIN and NODE-INDEX-MAX checks for
+ the node's index in the parent. Therefore, to match the
+ first child where parent is \"argument_list\", use
+
+ (match nil \"argument_list\" nil nil 0 0).
+
+no-node
+
+ Matches the case where node is nil, i.e., there is no node
+ that starts at point. This is the case when indenting an
+ empty line.
+
+\(parent-is TYPE)
+
+ Check that the parent has type TYPE.
+
+\(node-is TYPE)
+
+ Checks that the node has type TYPE.
+
+\(query QUERY)
+
+ Queries the parent node with QUERY, and checks if the node
+ is captured (by any capture name).
+
+ANCHOR:
+
+first-sibling
+
+ Find the first child of the parent.
+
+parent
+
+ Find the parent.
+
+parent-bol
+
+ Find the beginning of non-space characters on the line where
+ the parent is on.
+
+prev-sibling
+
+ Find node's previous sibling.
+
+no-indent
+
+ Do nothing.
+
+prev-line
+
+ The first non-whitespace charater on the previous line.")
+
+(defun treesit--simple-apply (fn args)
+ "Apply ARGS to FN.
+
+If FN is a key in `treesit-simple-indent-presets', use the
+corresponding value as the function."
+ ;; We don't want to match uncompiled lambdas, so make sure this cons
+ ;; is not a function. We could move the condition functionp
+ ;; forward, but better be explicit.
+ (cond ((and (consp fn) (not (functionp fn)))
+ (apply (treesit--simple-apply (car fn) (cdr fn))
+ ;; We don't evaluate ARGS with `simple-apply', i.e.,
+ ;; no composing, better keep it simple.
+ args))
+ ((and (symbolp fn)
+ (alist-get fn treesit-simple-indent-presets))
+ (apply (alist-get fn treesit-simple-indent-presets)
+ args))
+ ((functionp fn) (apply fn args))
+ (t (error "Couldn't find the function corresponding to %s" fn))))
+
+;; This variable might seem unnecessary: why split
+;; `treesit-indent' and `treesit-simple-indent' into two
+;; functions? We add this variable in between because later we might
+;; add more powerful indentation engines, and that new engine can
+;; probably share `treesit-indent'. It is also useful, suggested
+;; by Stefan M, to have a function that figures out how much to indent
+;; but doesn't actually performs the indentation, because we might
+;; want to know where will a node indent to if we put it at some other
+;; location, and use that information to calculate the actual
+;; indentation. And `treesit-simple-indent' is that function. I
+;; forgot the example Stefan gave, but it makes a lot of sense.
+(defvar treesit-indent-function #'treesit-simple-indent
+ "Function used by `treesit-indent' to do some of the work.
+
+This function is called with
+
+ (NODE PARENT BOL &rest _)
+
+and returns
+
+ (ANCHOR . OFFSET).
+
+BOL is the position of the beginning of the line; NODE is the
+\"largest\" node that starts at BOL; PARENT is its parent; ANCHOR
+is a point (not a node), and OFFSET is a number. Emacs finds the
+column of ANCHOR and adds OFFSET to it as the final indentation
+of the current line.")
+
+(defun treesit-indent ()
+ "Indent according to the result of `treesit-indent-function'."
+ (treesit-update-ranges)
+ (let* ((orig-pos (point))
+ (bol (save-excursion
+ (forward-line 0)
+ (skip-chars-forward " \t")
+ (point)))
+ (smallest-node
+ (cl-loop for parser in (treesit-parser-list)
+ for node = (treesit-node-at bol parser)
+ if node return node))
+ (node (treesit-parent-while
+ smallest-node
+ (lambda (node)
+ (eq bol (treesit-node-start node))))))
+ (pcase-let*
+ ((parser (if smallest-node
+ (treesit-node-parser smallest-node)
+ nil))
+ ;; NODE would be nil if BOL is on a whitespace. In that case
+ ;; we set PARENT to the "node at point", which would
+ ;; encompass the whitespace.
+ (parent (cond ((and node parser)
+ (treesit-node-parent node))
+ (parser
+ (treesit-node-at bol parser))
+ (t nil)))
+ (`(,anchor . ,offset)
+ (funcall treesit-indent-function node parent bol)))
+ (if (null anchor)
+ (when treesit--indent-verbose
+ (message "Failed to find the anchor"))
+ (let ((col (+ (save-excursion
+ (goto-char anchor)
+ (current-column))
+ offset)))
+ (if (< bol orig-pos)
+ (save-excursion
+ (indent-line-to col))
+ (indent-line-to col)))))))
+
+(defun treesit-simple-indent (node parent bol)
+ "Calculate indentation according to `treesit-simple-indent-rules'.
+
+BOL is the position of the first non-whitespace character on the
+current line. NODE is the largest node that starts at BOL,
+PARENT is NODE's parent.
+
+Return (ANCHOR . OFFSET) where ANCHOR is a node, OFFSET is the
+indentation offset, meaning indent to align with ANCHOR and add
+OFFSET."
+ (if (null parent)
+ (when treesit--indent-verbose
+ (message "PARENT is nil, not indenting"))
+ (let* ((language (treesit-node-language parent))
+ (rules (alist-get language
+ treesit-simple-indent-rules)))
+ (cl-loop for rule in rules
+ for pred = (nth 0 rule)
+ for anchor = (nth 1 rule)
+ for offset = (nth 2 rule)
+ if (treesit--simple-apply
+ pred (list node parent bol))
+ do (when treesit--indent-verbose
+ (message "Matched rule: %S" rule))
+ and
+ return (cons (treesit--simple-apply
+ anchor (list node parent bol))
+ offset)))))
+
+(defun treesit-check-indent (mode)
+ "Check current buffer's indentation against a major mode MODE.
+
+Pop up a diff buffer showing the difference. Correct
+indentation (target) is in green, current indentation is in red."
+ (interactive "CTarget major mode: ")
+ (let ((source-buf (current-buffer)))
+ (with-temp-buffer
+ (insert-buffer-substring source-buf)
+ (funcall mode)
+ (indent-region (point-min) (point-max))
+ (diff-buffers source-buf (current-buffer)))))
+
+;;; Search
+
+(defun treesit-search-forward-goto
+ (predicate side &optional all backward up)
+ "Search forward for a node and move to it.
+
+Stops at the first node after point that matches PREDICATE.
+PREDICATE can be either a regexp that matches against each node's
+type case-insensitively, or a function that takes a node and
+returns nil/non-nil for match/no match.
+
+If a node matches, move to that node and return the node,
+otherwise return nil. SIDE controls whether we move to the start
+or end of the matches node, it can be either \\='start or
+\\='end.
+
+ALL, BACKWARD, and UP are the same as in `treesit-search-forward'."
+ (let ((node (treesit-node-at (point)))
+ (start (point)))
+ ;; When searching forward, it is possible for (point) < start,
+ ;; because `treesit-search-forward' goes to parents.
+ (while (and node (if backward
+ (>= (point) start)
+ (<= (point) start)))
+ (setq node (treesit-search-forward
+ node predicate all backward up))
+ (if-let ((pos (pcase side
+ ('start (treesit-node-start node))
+ ('end (treesit-node-end node)))))
+ (goto-char pos)))
+ ;; If we made reverse progress, go back to where we started.
+ (when (if backward
+ (>= (point) start)
+ (<= (point) start))
+ (goto-char start))
+ node))
+
+;;; Debugging
+
+(defvar-local treesit--inspect-name nil
+ "treesit-inspect-mode uses this to show node name in mode-line.")
+
+(defun treesit-inspect-node-at-point (&optional arg)
+ "Show information of the node at point.
+If called interactively, show in echo area, otherwise set
+`treesit--inspect-name' (which will appear in the mode-line
+if `treesit-inspect-mode' is enabled). Uses the first parser
+in (`treesit-parser-list')."
+ (interactive "p")
+ ;; NODE-LIST contains all the node that starts at point.
+ (let* ((node-list
+ (cl-loop for node = (treesit-node-at (point))
+ then (treesit-node-parent node)
+ while node
+ if (eq (treesit-node-start node)
+ (point))
+ collect node))
+ (largest-node (car (last node-list)))
+ (parent (treesit-node-parent largest-node))
+ ;; node-list-acending contains all the node bottom-up, then
+ ;; the parent.
+ (node-list-acending
+ (if (null largest-node)
+ ;; If there are no nodes that start at point, just show
+ ;; the node at point and its parent.
+ (list (treesit-node-at (point))
+ (treesit-node-parent
+ (treesit-node-at (point))))
+ (append node-list (list parent))))
+ (name ""))
+ ;; We draw nodes like (parent field-name: (node)) recursively,
+ ;; so it could be (node1 field-name: (node2 field-name: (node3))).
+ (dolist (node node-list-acending)
+ (setq
+ name
+ (concat
+ (if (treesit-node-field-name node)
+ (format " %s: " (treesit-node-field-name node))
+ " ")
+ (if (treesit-node-check node 'named) "(" "\"")
+ (or (treesit-node-type node)
+ "N/A")
+ name
+ (if (treesit-node-check node 'named) ")" "\""))))
+ (setq treesit--inspect-name name)
+ (force-mode-line-update)
+ (when arg
+ (if node-list
+ (message "%s" treesit--inspect-name)
+ (message "No node at point")))))
+
+(define-minor-mode treesit-inspect-mode
+ "Shows the node that _starts_ at point in the mode-line.
+
+The mode-line displays
+
+ PARENT FIELD-NAME: (CHILD FIELD_NAME: (GRAND-CHILD (...)))
+
+CHILD, GRAND-CHILD, and GRAND-GRAND-CHILD, etc, are nodes that
+have their beginning at point. And PARENT is the parent of
+CHILD.
+
+If no node starts at point, i.e., point is in the middle of a
+node, then we just display the smallest node that spans point and
+its immediate parent.
+
+This minor mode doesn't create parsers on its own. It simply
+uses the first parser in (`treesit-parser-list')."
+ :lighter nil
+ (if treesit-inspect-mode
+ (progn
+ (add-hook 'post-command-hook
+ #'treesit-inspect-node-at-point 0 t)
+ (add-to-list 'mode-line-misc-info
+ '(:eval treesit--inspect-name)))
+ (remove-hook 'post-command-hook
+ #'treesit-inspect-node-at-point t)
+ (setq mode-line-misc-info
+ (remove '(:eval treesit--inspect-name)
+ mode-line-misc-info))))
+
+(defun treesit-query-validate (language query)
+ "Check if QUERY is valid for LANGUAGE.
+If QUERY is invalid, display the query in a popup buffer, jumps
+to the offending pattern and highlight the pattern."
+ (cl-assert (or (consp query) (stringp query)))
+ (let ((buf (get-buffer-create "*tree-sitter check query*")))
+ (with-temp-buffer
+ (treesit-parser-create language)
+ (condition-case err
+ (progn (treesit-query-capture language query)
+ (message "QUERY is valid"))
+ (treesit-query-error
+ (with-current-buffer buf
+ (let* ((data (cdr err))
+ (message (nth 0 data))
+ (start (nth 1 data)))
+ (erase-buffer)
+ (insert (treesit-query-expand query))
+ (goto-char start)
+ (search-forward " " nil t)
+ (put-text-property start (point) 'face 'error)
+ (message "%s" (buffer-substring start (point)))
+ (goto-char (point-min))
+ (insert (format "%s: %d\n" message start))
+ (forward-char start)))
+ (pop-to-buffer buf))))))
+
+;;; Etc
+
+(declare-function find-library-name "find-func.el")
+(defun treesit--check-manual-covarage ()
+ "Print tree-sitter functions missing from the manual in message buffer."
+ (interactive)
+ (require 'find-func)
+ (let ((functions-in-source
+ (with-temp-buffer
+ (insert-file-contents (find-library-name "tree-sitter"))
+ (cl-remove-if
+ (lambda (name) (string-match "treesit--" name))
+ (cl-sort
+ (save-excursion
+ (goto-char (point-min))
+ (cl-loop while (re-search-forward
+ "^(defun \\([^ ]+\\)" nil t)
+ collect (match-string-no-properties 1)))
+ #'string<))))
+ (functions-in-manual
+ (with-temp-buffer
+ (insert-file-contents (expand-file-name
+ "doc/lispref/parsing.texi"
+ source-directory))
+ (insert-file-contents (expand-file-name
+ "doc/lispref/modes.texi"
+ source-directory))
+ (cl-sort
+ (save-excursion
+ (goto-char (point-min))
+ (cl-loop while (re-search-forward
+ "^@defun \\([^ ]+\\)" nil t)
+ collect (match-string-no-properties 1)))
+ #'string<))))
+ (message "Missing: %s"
+ (string-join
+ (cl-remove-if
+ (lambda (name) (member name functions-in-manual))
+ functions-in-source)
+ "\n"))))
+
+(provide 'treesit)
+
+;;; treesit.el ends here
diff --git a/src/Makefile.in b/src/Makefile.in
index 1f941874ea8..eb537e21277 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -343,6 +343,10 @@ JSON_LIBS = @JSON_LIBS@
JSON_CFLAGS = @JSON_CFLAGS@
JSON_OBJ = @JSON_OBJ@
+TREE_SITTER_LIBS = @TREE_SITTER_LIBS@
+TREE_SITTER_FLAGS = @TREE_SITTER_FLAGS@
+TREE_SITTER_OBJ = @TREE_SITTER_OBJ@
+
INTERVALS_H = dispextern.h intervals.h composite.h
GETLOADAVG_LIBS = @GETLOADAVG_LIBS@
@@ -406,7 +410,7 @@ EMACS_CFLAGS=-Demacs $(MYCPPFLAGS) -I. -I$(srcdir) \
$(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \
$(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \
$(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \
- $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) \
+ $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \
$(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \
$(WERROR_CFLAGS) $(HAIKU_CFLAGS) $(XCOMPOSITE_CFLAGS) $(XSHAPE_CFLAGS)
ALL_CFLAGS = $(EMACS_CFLAGS) $(WARN_CFLAGS) $(CFLAGS)
@@ -445,7 +449,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \
$(if $(HYBRID_MALLOC),sheap.o) \
$(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \
$(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \
- $(HAIKU_OBJ) $(PGTK_OBJ)
+ $(TREE_SITTER_OBJ) $(HAIKU_OBJ) $(PGTK_OBJ)
doc_obj = $(base_obj) $(NS_OBJC_OBJ)
obj = $(doc_obj) $(HAIKU_CXX_OBJ)
@@ -565,7 +569,7 @@ LIBES = $(LIBS) $(W32_LIBS) $(LIBS_GNUSTEP) $(PGTK_LIBS) $(LIBX_BASE) $(LIBIMAGE
$(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \
$(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \
$(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \
- $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS)
+ $(TREE_SITTER_LIBS) $(SQLITE3_LIBS) $(XCOMPOSITE_LIBS) $(XSHAPE_LIBS)
## FORCE it so that admin/unidata can decide whether this file is
## up-to-date. Although since charprop depends on bootstrap-emacs,
diff --git a/src/alloc.c b/src/alloc.c
index 419c5e558b4..a8b57add60e 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -50,6 +50,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include TERM_HEADER
#endif /* HAVE_WINDOW_SYSTEM */
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
#include <flexmember.h>
#include <verify.h>
#include <execinfo.h> /* For backtrace. */
@@ -3165,6 +3169,22 @@ cleanup_vector (struct Lisp_Vector *vector)
if (uptr->finalizer)
uptr->finalizer (uptr->p);
}
+#ifdef HAVE_TREE_SITTER
+ else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_PARSER))
+ {
+ struct Lisp_TS_Parser *lisp_parser
+ = PSEUDOVEC_STRUCT (vector, Lisp_TS_Parser);
+ ts_tree_delete(lisp_parser->tree);
+ ts_parser_delete(lisp_parser->parser);
+ }
+ else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_COMPILED_QUERY))
+ {
+ struct Lisp_TS_Query *lisp_query
+ = PSEUDOVEC_STRUCT (vector, Lisp_TS_Query);
+ ts_query_delete (lisp_query->query);
+ ts_query_cursor_delete (lisp_query->cursor);
+ }
+#endif
#ifdef HAVE_MODULES
else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION))
{
diff --git a/src/buffer.c b/src/buffer.c
index d4a0c37bed5..be7c2f2161e 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -231,6 +231,13 @@ bset_extra_line_spacing (struct buffer *b, Lisp_Object val)
{
b->extra_line_spacing_ = val;
}
+#ifdef HAVE_TREE_SITTER
+static void
+bset_ts_parser_list (struct buffer *b, Lisp_Object val)
+{
+ b->ts_parser_list_ = val;
+}
+#endif
static void
bset_file_format (struct buffer *b, Lisp_Object val)
{
@@ -1007,6 +1014,9 @@ reset_buffer (register struct buffer *b)
(b, BVAR (&buffer_defaults, enable_multibyte_characters));
bset_cursor_type (b, BVAR (&buffer_defaults, cursor_type));
bset_extra_line_spacing (b, BVAR (&buffer_defaults, extra_line_spacing));
+#ifdef HAVE_TREE_SITTER
+ bset_ts_parser_list (b, Qnil);
+#endif
b->display_error_modiff = 0;
}
@@ -5283,6 +5293,9 @@ init_buffer_once (void)
XSETFASTINT (BVAR (&buffer_local_flags, tab_line_format), idx); ++idx;
XSETFASTINT (BVAR (&buffer_local_flags, cursor_type), idx); ++idx;
XSETFASTINT (BVAR (&buffer_local_flags, extra_line_spacing), idx); ++idx;
+#ifdef HAVE_TREE_SITTER
+ XSETFASTINT (BVAR (&buffer_local_flags, ts_parser_list), idx); ++idx;
+#endif
XSETFASTINT (BVAR (&buffer_local_flags, cursor_in_non_selected_windows), idx); ++idx;
/* buffer_local_flags contains no pointers, so it's safe to treat it
@@ -5353,6 +5366,9 @@ init_buffer_once (void)
bset_bidi_paragraph_separate_re (&buffer_defaults, Qnil);
bset_cursor_type (&buffer_defaults, Qt);
bset_extra_line_spacing (&buffer_defaults, Qnil);
+#ifdef HAVE_TREE_SITTER
+ bset_ts_parser_list (&buffer_defaults, Qnil);
+#endif
bset_cursor_in_non_selected_windows (&buffer_defaults, Qt);
bset_enable_multibyte_characters (&buffer_defaults, Qt);
diff --git a/src/buffer.h b/src/buffer.h
index cbdbae798ba..04792374cd1 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -573,6 +573,10 @@ struct buffer
in the display of this buffer. */
Lisp_Object extra_line_spacing_;
+#ifdef HAVE_TREE_SITTER
+ /* A list of tree-sitter parsers for this buffer. */
+ Lisp_Object ts_parser_list_;
+#endif
/* Cursor type to display in non-selected windows.
t means to use hollow box cursor.
See `cursor-type' for other values. */
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 2ea5f09b4c5..3022c5cc7d6 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -30,6 +30,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "composite.h"
#include "keymap.h"
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
/* State for casing individual characters. */
@@ -530,6 +534,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
modify_text (start, end);
prepare_casing_context (&ctx, flag, true);
+#ifdef HAVE_TREE_SITTER
+ ptrdiff_t start_byte = CHAR_TO_BYTE (start);
+ ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
+#endif
+
ptrdiff_t orig_end = end;
record_delete (start, make_buffer_string (start, end, true), false);
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
@@ -548,6 +557,9 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
{
signal_after_change (start, end - start - added, end - start);
update_compositions (start, end, CHECK_ALL);
+#ifdef HAVE_TREE_SITTER
+ ts_record_change (start_byte, old_end_byte, CHAR_TO_BYTE (end));
+#endif
}
return orig_end + added;
diff --git a/src/data.c b/src/data.c
index 221a6f58835..c6b85e17bc2 100644
--- a/src/data.c
+++ b/src/data.c
@@ -261,6 +261,12 @@ for example, (type-of 1) returns `integer'. */)
return Qxwidget;
case PVEC_XWIDGET_VIEW:
return Qxwidget_view;
+ case PVEC_TS_PARSER:
+ return Qtreesit_parser;
+ case PVEC_TS_NODE:
+ return Qtreesit_node;
+ case PVEC_TS_COMPILED_QUERY:
+ return Qtreesit_compiled_query;
case PVEC_SQLITE:
return Qsqlite;
/* "Impossible" cases. */
@@ -4271,6 +4277,9 @@ syms_of_data (void)
DEFSYM (Qterminal, "terminal");
DEFSYM (Qxwidget, "xwidget");
DEFSYM (Qxwidget_view, "xwidget-view");
+ DEFSYM (Qtreesit_parser, "treesit-parser");
+ DEFSYM (Qtreesit_node, "treesit-node");
+ DEFSYM (Qtreesit_compiled_query, "treesit-compiled-query");
DEFSYM (Qdefun, "defun");
diff --git a/src/emacs.c b/src/emacs.c
index 43e81b912c6..ba8b9c651a7 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -136,6 +136,10 @@ extern char etext;
#include <sys/resource.h>
#endif
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
#include "pdumper.h"
#include "fingerprint.h"
#include "epaths.h"
@@ -2266,6 +2270,9 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem
syms_of_module ();
#endif
+#ifdef HAVE_TREE_SITTER
+ syms_of_treesit ();
+#endif
#ifdef HAVE_SOUND
syms_of_sound ();
#endif
diff --git a/src/eval.c b/src/eval.c
index 7da1d8fb989..de9c07f1552 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -1897,6 +1897,19 @@ signal_error (const char *s, Lisp_Object arg)
xsignal (Qerror, Fcons (build_string (s), arg));
}
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent)
+{
+ eassert (SYMBOLP (name));
+ eassert (SYMBOLP (parent));
+ Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
+ eassert (CONSP (parent_conditions));
+ eassert (!NILP (Fmemq (parent, parent_conditions)));
+ eassert (NILP (Fmemq (name, parent_conditions)));
+ Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
+ Fput (name, Qerror_message, build_pure_c_string (message));
+}
+
/* Use this for arithmetic overflow, e.g., when an integer result is
too large even for a bignum. */
void
diff --git a/src/insdel.c b/src/insdel.c
index 38d5fbda002..7cbc88e51d4 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -31,6 +31,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "region-cache.h"
#include "pdumper.h"
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
static void insert_from_string_1 (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t,
ptrdiff_t, bool, bool);
static void insert_from_buffer_1 (struct buffer *, ptrdiff_t, ptrdiff_t, bool);
@@ -940,6 +944,12 @@ insert_1_both (const char *string,
set_text_properties (make_fixnum (PT), make_fixnum (PT + nchars),
Qnil, Qnil, Qnil);
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
adjust_point (nchars, nbytes);
check_markers ();
@@ -1071,6 +1081,12 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
graft_intervals_into_buffer (intervals, PT, nchars,
current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
adjust_point (nchars, outgoing_nbytes);
check_markers ();
@@ -1138,6 +1154,12 @@ insert_from_gap (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
current_buffer, 0);
}
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (ins_bytepos >= 0);
+ ts_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes);
+#endif
+
if (ins_charpos < PT)
adjust_point (nchars, nbytes);
@@ -1288,6 +1310,12 @@ insert_from_buffer_1 (struct buffer *buf,
/* Insert those intervals. */
graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (outgoing_nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + outgoing_nbytes);
+#endif
+
adjust_point (nchars, outgoing_nbytes);
}
@@ -1536,6 +1564,13 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
graft_intervals_into_buffer (intervals, from, inschars,
current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (to_byte >= from_byte);
+ eassert (outgoing_insbytes >= 0);
+ eassert (from_byte >= 0);
+ ts_record_change (from_byte, to_byte, from_byte + outgoing_insbytes);
+#endif
+
/* Relocate point as if it were a marker. */
if (from < PT)
adjust_point ((from + inschars - (PT < to ? PT : to)),
@@ -1570,7 +1605,11 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
If MARKERS, relocate markers.
Unlike most functions at this level, never call
- prepare_to_modify_buffer and never call signal_after_change. */
+ prepare_to_modify_buffer and never call signal_after_change.
+ Because this function is called in a loop, one character at a time.
+ The caller of 'replace_range_2' calls these hooks for the entire
+ region once. Apart from signal_after_change, any caller of this
+ function should also call ts_record_change. */
void
replace_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
@@ -1893,6 +1932,12 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
evaporate_overlays (from);
+#ifdef HAVE_TREE_SITTER
+ eassert (from_byte <= to_byte);
+ eassert (from_byte >= 0);
+ ts_record_change (from_byte, to_byte, from_byte);
+#endif
+
return deletion;
}
diff --git a/src/json.c b/src/json.c
index 9a455f507b4..cdcc11358e6 100644
--- a/src/json.c
+++ b/src/json.c
@@ -1092,22 +1092,6 @@ usage: (json-parse-buffer &rest args) */)
return unbind_to (count, lisp);
}
-/* Simplified version of 'define-error' that works with pure
- objects. */
-
-static void
-define_error (Lisp_Object name, const char *message, Lisp_Object parent)
-{
- eassert (SYMBOLP (name));
- eassert (SYMBOLP (parent));
- Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
- eassert (CONSP (parent_conditions));
- eassert (!NILP (Fmemq (parent, parent_conditions)));
- eassert (NILP (Fmemq (name, parent_conditions)));
- Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
- Fput (name, Qerror_message, build_pure_c_string (message));
-}
-
void
syms_of_json (void)
{
diff --git a/src/lisp.h b/src/lisp.h
index 9710dbef8d2..1e41e2064c9 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -584,6 +584,8 @@ enum Lisp_Fwd_Type
your object -- this way, the same object could be used to represent
several disparate C structures.
+ In addition, you need to add switch branches in data.c for Ftype_of.
+
You also need to add the new type to the constant
`cl--typeof-types' in lisp/emacs-lisp/cl-preloaded.el. */
@@ -1062,6 +1064,9 @@ enum pvec_type
PVEC_CONDVAR,
PVEC_MODULE_FUNCTION,
PVEC_NATIVE_COMP_UNIT,
+ PVEC_TS_PARSER,
+ PVEC_TS_NODE,
+ PVEC_TS_COMPILED_QUERY,
PVEC_SQLITE,
/* These should be last, for internal_equal and sxhash_obj. */
@@ -5565,6 +5570,11 @@ maybe_gc (void)
maybe_garbage_collect ();
}
+/* Simplified version of 'define-error' that works with pure
+ objects. */
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent);
+
INLINE_HEADER_END
#endif /* EMACS_LISP_H */
diff --git a/src/lread.c b/src/lread.c
index dfa4d9afb51..37ee3a00ecc 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -5465,6 +5465,14 @@ to the specified file name if a suffix is allowed or required. */);
Fcons (build_pure_c_string (MODULES_SECONDARY_SUFFIX), Vload_suffixes);
#endif
+ DEFVAR_LISP ("dynamic-library-suffixes", Vdynamic_library_suffixes,
+ doc: /* A list of suffixes for loadable dynamic libraries. */);
+ Vdynamic_library_suffixes =
+ Fcons (build_pure_c_string (DYNAMIC_LIB_SECONDARY_SUFFIX), Qnil);
+ Vdynamic_library_suffixes =
+ Fcons (build_pure_c_string (DYNAMIC_LIB_SUFFIX),
+ Vdynamic_library_suffixes);
+
#endif
DEFVAR_LISP ("module-file-suffix", Vmodule_file_suffix,
doc: /* Suffix of loadable module file, or nil if modules are not supported. */);
diff --git a/src/print.c b/src/print.c
index 1c96ec14b86..4f41448d861 100644
--- a/src/print.c
+++ b/src/print.c
@@ -48,6 +48,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
# include <sys/socket.h> /* for F_DUPFD_CLOEXEC */
#endif
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
struct terminal;
/* Avoid actual stack overflow in print. */
@@ -2009,6 +2013,36 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag,
}
break;
#endif
+
+#ifdef HAVE_TREE_SITTER
+ case PVEC_TS_PARSER:
+ print_c_string ("#<treesit-parser for ", printcharfun);
+ Lisp_Object language = XTS_PARSER (obj)->language_symbol;
+ print_string (Fsymbol_name (language), printcharfun);
+ print_c_string (" in ", printcharfun);
+ print_object (XTS_PARSER (obj)->buffer, printcharfun, escapeflag);
+ printchar ('>', printcharfun);
+ break;
+ case PVEC_TS_NODE:
+ /* Prints #<treesit-node (identifier) in #<buffer xxx>> or
+ #<treesit-node "keyword" in #<buffer xxx>>. */
+ print_c_string ("#<treesit-node ", printcharfun);
+ bool named = ts_node_is_named (XTS_NODE (obj)->node);
+ const char *delim1 = named ? "(" : "\"";
+ const char *delim2 = named ? ")" : "\"";
+ print_c_string (delim1, printcharfun);
+ print_string (Ftreesit_node_type (obj), printcharfun);
+ print_c_string (delim2, printcharfun);
+ print_c_string (" in ", printcharfun);
+ print_object (XTS_PARSER (XTS_NODE (obj)->parser)->buffer,
+ printcharfun, escapeflag);
+ printchar ('>', printcharfun);
+ break;
+ case PVEC_TS_COMPILED_QUERY:
+ print_c_string ("#<treesit-compiled-query>", printcharfun);
+ break;
+#endif
+
case PVEC_SQLITE:
{
print_c_string ("#<sqlite ", printcharfun);
diff --git a/src/treesit.c b/src/treesit.c
new file mode 100644
index 00000000000..77b48133ba8
--- /dev/null
+++ b/src/treesit.c
@@ -0,0 +1,2327 @@
+/* Tree-sitter integration for GNU Emacs.
+
+Copyright (C) 2021-2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "treesit.h"
+
+/* Commentary
+
+ The Emacs wrapper of tree-sitter does not expose everything the C
+ API provides, most notably:
+
+ - It doesn't expose a syntax tree, we put the syntax tree in the
+ parser object, and updating the tree is handled in the C level.
+
+ - We don't expose tree cursor either. I think Lisp is slow enough
+ to nullify any performance advantage of using a cursor, though I
+ don't have evidence. Also I want to minimize the number of new
+ types we introduce, currently we only add parser and node type.
+
+ - Because updating the change is handled in the C level as each
+ change is made in the buffer, there is no way for Lisp to update
+ a node. But since we can just retrieve a new node, it shouldn't
+ be a limitation.
+
+ - I didn't expose setting timeout and cancellation flag for a
+ parser, mainly because I don't think they are really necessary
+ in Emacs' use cases.
+
+ - Many tree-sitter functions asks for a TSPoint, basically a (row,
+ column) location. Emacs uses a gap buffer and keeps no
+ information about row and column position. According to the
+ author of tree-sitter, tree-sitter only asks for (row, column)
+ position to carry it around and return back to the user later;
+ and the real position used is the byte position. He also said
+ that he _think_ that it will work to use byte position only.
+ That's why whenever a TSPoint is asked, we pass a dummy one to
+ it. Judging by the nature of parsing algorithms, I think it is
+ safe to use only byte position, and I don't think this will
+ change in the future.
+
+ REF: https://github.com/tree-sitter/tree-sitter/issues/445
+
+ treesit.h has some commentary on the two main data structure
+ for the parser and node. ts_ensure_position_synced has some
+ commentary on how do we make tree-sitter play well with narrowing
+ (tree-sitter parser only sees the visible region, so we need to
+ translate positions back and forth). Most action happens in
+ ts_ensure_parsed, ts_read_buffer and ts_record_change.
+
+ A complete correspondence list between tree-sitter functions and
+ exposed Lisp functions can be found in the manual (elisp)API
+ Correspondence.
+
+ Placement of CHECK_xxx functions: call CHECK_xxx before using any
+ unchecked Lisp values; these include argument of Lisp functions,
+ return value of Fsymbol_value, car of a cons.
+
+ Initializing tree-sitter: there are two entry points to tree-sitter
+ functions: 'treesit-parser-create' and
+ 'treesit-language-available-p'. Therefore we only need to call
+ initialization function in those two functions.
+
+ Tree-sitter offset (0-based) and buffer position (1-based):
+ tree-sitter offset + buffer position = buffer position
+ buffer position - buffer position = tree-sitter offset
+
+ Tree-sitter-related code in other files:
+ - src/alloc.c for gc for parser and node
+ - src/casefiddle.c & src/insdel.c for notifying tree-sitter
+ parser of buffer changes.
+ - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
+ node type.
+
+ We don't parse at every keystroke. Instead we only record the
+ changes at each keystroke, and only parse when requested. It is
+ possible that lazy parsing is worse: instead of dispersed little
+ pauses, now you have less frequent but larger pauses. I doubt
+ there will be any perceived difference, as the lazy parsing is
+ going to be pretty frequent anyway. Also this (lazy parsing) is
+ what the mailing list guys wanted.
+
+ Because it is pretty slow (comparing to other tree-sitter
+ operations) for tree-sitter to parse the query and produce a query
+ object, it is very wasteful to reparse the query every time
+ treesit-query-capture is called, and it completely kills the
+ performance of querying in a loop for a moderate amount of times
+ (hundreds of queries takes seconds rather than milliseconds to
+ complete). Therefore we want some caching. We can either use a
+ search.c style transparent caching, or simply expose a new type,
+ compiled-ts-query and let the user to manually compile AOT. I
+ believe AOT compiling gives users more control, makes the
+ performance stable and easy to understand (compiled -> fast,
+ uncompiled -> slow), and avoids some edge cases transparent cache
+ could have (see below). So I implemented the AOT compilation.
+
+ Problems a transparent cache could have: Suppose we store cache
+ entries in a fixed-length linked-list, and compare with EQ. 1)
+ One-off query could kick out useful cache. 2) if the user messed
+ up and the query doesn't EQ to the cache anymore, the performance
+ mysteriously drops. 3) what if a user uses so many stuff that the
+ default cache size (20) is not enough and we end up thrashing?
+ These are all imagined scenarios but they are not impossible :-)
+ */
+
+/*** Initialization */
+
+bool ts_initialized = false;
+
+static void *
+ts_calloc_wrapper (size_t n, size_t size)
+{
+ return xzalloc (n * size);
+}
+
+static void
+ts_initialize (void)
+{
+ if (!ts_initialized)
+ {
+ ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree);
+ ts_initialized = true;
+ }
+}
+
+/*** Loading language library */
+
+/* Translates a symbol treesit-<lang> to a C name
+ treesit_<lang>. */
+static void
+ts_symbol_to_c_name (char *symbol_name)
+{
+ for (int idx=0; idx < strlen (symbol_name); idx++)
+ {
+ if (symbol_name[idx] == '-')
+ symbol_name[idx] = '_';
+ }
+}
+
+static bool
+ts_find_override_name
+(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol)
+{
+ for (Lisp_Object list = Vtreesit_load_name_override_list;
+ !NILP (list); list = XCDR (list))
+ {
+ Lisp_Object lang = XCAR (XCAR (list));
+ CHECK_SYMBOL (lang);
+ if (EQ (lang, language_symbol))
+ {
+ *name = Fnth (make_fixnum (1), XCAR (list));
+ CHECK_STRING (*name);
+ *c_symbol = Fnth (make_fixnum (2), XCAR (list));
+ CHECK_STRING (*c_symbol);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
+ thsi function pushes "lib_base_name.so" and "lib_base_name.dylib"
+ into *path_candidates. Obiviously path_candidates should be a Lisp
+ list of Lisp strings. */
+static void
+ts_load_language_push_for_each_suffix
+(Lisp_Object lib_base_name, Lisp_Object *path_candidates)
+{
+ for (Lisp_Object suffixes = Vdynamic_library_suffixes;
+ !NILP (suffixes); suffixes = XCDR (suffixes)) {
+ *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)),
+ *path_candidates);
+ }
+}
+
+/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
+ to the language definition. Signals
+ Qtreesit_load_language_error if something goes wrong.
+ Qtreesit_load_language_error carries the error message from
+ trying to load the library with each extension.
+
+ If SIGNAL is true, signal an error when failed to load LANGUAGE; if
+ false, return NULL when failed. */
+static TSLanguage *
+ts_load_language (Lisp_Object language_symbol, bool signal)
+{
+ Lisp_Object symbol_name = Fsymbol_name (language_symbol);
+
+ /* Figure out the library name and C name. */
+ Lisp_Object lib_base_name =
+ (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name));
+ Lisp_Object base_name =
+ (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name));
+ char *c_name = strdup (SSDATA (base_name));
+ ts_symbol_to_c_name (c_name);
+
+ /* Override the library name and C name, if appropriate. */
+ Lisp_Object override_name;
+ Lisp_Object override_c_name;
+ bool found_override = ts_find_override_name
+ (language_symbol, &override_name, &override_c_name);
+ if (found_override)
+ {
+ lib_base_name = override_name;
+ c_name = SSDATA (override_c_name);
+ }
+
+ /* Now we generate a list of possible library paths. */
+ Lisp_Object path_candidates = Qnil;
+ /* First push just the filenames to the candidate list, which will
+ make dynlib_open look under standard system load paths. */
+ ts_load_language_push_for_each_suffix
+ (lib_base_name, &path_candidates);
+ /* Then push ~/.emacs.d/tree-sitter paths. */
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name
+ (concat2 (build_string ("tree-sitter/"), lib_base_name),
+ Fsymbol_value (Quser_emacs_directory)),
+ &path_candidates);
+ /* Then push paths from treesit-extra-load-path. */
+ for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path);
+ !NILP (tail); tail = XCDR (tail))
+ {
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name (lib_base_name, XCAR (tail)),
+ &path_candidates);
+ }
+
+ /* Try loading the dynamic library by each path candidate. Stop
+ when succeed, record the error message and try the next one when
+ fail. */
+ dynlib_handle_ptr handle;
+ char const *error;
+ Lisp_Object error_list = Qnil;
+ for (Lisp_Object tail = path_candidates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ char *library_name = SSDATA (XCAR (tail));
+ dynlib_error ();
+ handle = dynlib_open (library_name);
+ error = dynlib_error ();
+ if (error == NULL)
+ break;
+ else
+ error_list = Fcons (build_string (error), error_list);
+ }
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ symbol_name, Fnreverse (error_list));
+ else
+ return NULL;
+ }
+
+ /* Load TSLanguage. */
+ dynlib_error ();
+ TSLanguage *(*langfn) (void);
+ langfn = dynlib_sym (handle, c_name);
+ error = dynlib_error ();
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal1 (Qtreesit_load_language_error,
+ build_string (error));
+ else
+ return NULL;
+ }
+ TSLanguage *lang = (*langfn) ();
+
+ /* Check if language version matches tree-sitter version. */
+ TSParser *parser = ts_parser_new ();
+ bool success = ts_parser_set_language (parser, lang);
+ ts_parser_delete (parser);
+ if (!success)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"),
+ make_fixnum (ts_language_version (lang)));
+ else
+ return NULL;
+ }
+ return lang;
+}
+
+DEFUN ("treesit-language-available-p",
+ Ftreesit_langauge_available_p,
+ Streesit_language_available_p,
+ 1, 1, 0,
+ doc: /* Return non-nil if LANGUAGE exists and is loadable. */)
+ (Lisp_Object language)
+{
+ CHECK_SYMBOL (language);
+ ts_initialize ();
+ if (ts_load_language(language, false) == NULL)
+ return Qnil;
+ else
+ return Qt;
+}
+
+DEFUN ("treesit-language-version",
+ Ftreesit_language_version,
+ Streesit_language_version,
+ 0, 1, 0,
+ doc: /* Return the language version of tree-sitter library.
+If MIN-COMPATIBLE non-nil, return the minimal compatible version. */)
+ (Lisp_Object min_compatible)
+{
+ if (NILP (min_compatible))
+ return make_fixnum (TREE_SITTER_LANGUAGE_VERSION);
+ else
+ return make_fixnum (TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION);
+}
+
+/*** Parsing functions */
+
+static void
+ts_check_parser (Lisp_Object obj)
+{
+ CHECK_TS_PARSER (obj);
+ if (XTS_PARSER (obj)->deleted)
+ xsignal1 (Qtreesit_parser_deleted, obj);
+}
+
+/* An auxiliary function that saves a few lines of code. Assumes TREE
+ is not NULL. */
+static inline void
+ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
+ ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
+{
+ eassert (start_byte >= 0);
+ eassert (start_byte <= old_end_byte);
+ eassert (start_byte <= new_end_byte);
+ TSPoint dummy_point = {0, 0};
+ TSInputEdit edit = {(uint32_t) start_byte,
+ (uint32_t) old_end_byte,
+ (uint32_t) new_end_byte,
+ dummy_point, dummy_point, dummy_point};
+ ts_tree_edit (tree, &edit);
+}
+
+/* Update each parser's tree after the user made an edit. This
+function does not parse the buffer and only updates the tree. (So it
+should be very fast.) */
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte)
+{
+ for (Lisp_Object parser_list
+ = BVAR (current_buffer, ts_parser_list);
+ !NILP (parser_list);
+ parser_list = XCDR (parser_list))
+ {
+ CHECK_CONS (parser_list);
+ Lisp_Object lisp_parser = XCAR (parser_list);
+ ts_check_parser (lisp_parser);
+ TSTree *tree = XTS_PARSER (lisp_parser)->tree;
+ if (tree != NULL)
+ {
+ eassert (start_byte <= old_end_byte);
+ eassert (start_byte <= new_end_byte);
+ /* Think the recorded change as a delete followed by an
+ insert, and think of them as moving unchanged text back
+ and forth. After all, the whole point of updating the
+ tree is to update the position of unchanged text. */
+ ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
+ eassert (visible_beg >= 0);
+ eassert (visible_beg <= visible_end);
+
+ /* AFFECTED_START/OLD_END/NEW_END are (0-based) offsets from
+ VISIBLE_BEG. min(visi_end, max(visi_beg, value)) clips
+ value into [visi_beg, visi_end], and subtracting visi_beg
+ gives the offset from visi_beg. */
+ ptrdiff_t start_offset =
+ min (visible_end,
+ max (visible_beg, start_byte)) - visible_beg;
+ ptrdiff_t old_end_offset =
+ min (visible_end,
+ max (visible_beg, old_end_byte)) - visible_beg;
+ ptrdiff_t new_end_offset =
+ min (visible_end,
+ max (visible_beg, new_end_byte)) - visible_beg;
+ eassert (start_offset <= old_end_offset);
+ eassert (start_offset <= new_end_offset);
+
+ ts_tree_edit_1 (tree, start_offset, old_end_offset,
+ new_end_offset);
+ XTS_PARSER (lisp_parser)->need_reparse = true;
+ XTS_PARSER (lisp_parser)->timestamp++;
+
+ /* VISIBLE_BEG/END records tree-sitter's range of view in
+ the buffer. Ee need to adjust them when tree-sitter's
+ view changes. */
+ ptrdiff_t visi_beg_delta;
+ if (old_end_byte > new_end_byte)
+ {
+ /* Move backward. */
+ visi_beg_delta = min (visible_beg, new_end_byte)
+ - min (visible_beg, old_end_byte);
+ }
+ else
+ {
+ /* Move forward. */
+ visi_beg_delta = old_end_byte < visible_beg
+ ? new_end_byte - old_end_byte : 0;
+ }
+ XTS_PARSER (lisp_parser)->visible_beg
+ = visible_beg + visi_beg_delta;
+ XTS_PARSER (lisp_parser)->visible_end
+ = visible_end + visi_beg_delta
+ + (new_end_offset - old_end_offset);
+ eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0);
+ eassert (XTS_PARSER (lisp_parser)->visible_beg
+ <= XTS_PARSER (lisp_parser)->visible_end);
+ }
+ }
+}
+
+static void
+ts_ensure_position_synced (Lisp_Object parser)
+{
+ TSTree *tree = XTS_PARSER (parser)->tree;
+
+ if (tree == NULL)
+ return;
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
+ eassert (0 <= visible_beg);
+ eassert (visible_beg <= visible_end);
+
+ /* Before we parse or set ranges, catch up with the narrowing
+ situation. We change visible_beg and visible_end to match
+ BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
+ change. We want to move the visible range of tree-sitter to
+ match the narrowed range. For example,
+ from ________|xxxx|__
+ to |xxxx|__________ */
+
+ /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
+ if (visible_beg > BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the beginning. */
+ ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ /* 2. Make sure visible_end = BUF_ZV_BYTE. */
+ if (visible_end < BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the end. */
+ ts_tree_edit_1 (tree, visible_end - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ else if (visible_end > BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the end. */
+ ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
+ if (visible_beg < BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the beginning. */
+ ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ eassert (visible_beg <= visible_end);
+ }
+ eassert (0 <= visible_beg);
+ eassert (visible_beg <= visible_end);
+
+ XTS_PARSER (parser)->visible_beg = visible_beg;
+ XTS_PARSER (parser)->visible_end = visible_end;
+}
+
+static void
+ts_check_buffer_size (struct buffer *buffer)
+{
+ ptrdiff_t buffer_size =
+ (BUF_Z (buffer) - BUF_BEG (buffer));
+ if (buffer_size > UINT32_MAX)
+ xsignal2 (Qtreesit_buffer_too_large,
+ build_pure_c_string ("Buffer size larger than 4GB, size:"),
+ make_fixnum (buffer_size));
+}
+
+/* Parse the buffer. We don't parse until we have to. When we have
+to, we call this function to parse and update the tree. */
+static void
+ts_ensure_parsed (Lisp_Object parser)
+{
+ if (!XTS_PARSER (parser)->need_reparse)
+ return;
+ TSParser *ts_parser = XTS_PARSER (parser)->parser;
+ TSTree *tree = XTS_PARSER(parser)->tree;
+ TSInput input = XTS_PARSER (parser)->input;
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ts_check_buffer_size (buffer);
+
+ /* Before we parse, catch up with the narrowing situation. */
+ ts_ensure_position_synced (parser);
+
+ TSTree *new_tree = ts_parser_parse(ts_parser, tree, input);
+ /* This should be very rare (impossible, really): it only happens
+ when 1) language is not set (impossible in Emacs because the user
+ has to supply a language to create a parser), 2) parse canceled
+ due to timeout (impossible because we don't set a timeout), 3)
+ parse canceled due to cancellation flag (impossible because we
+ don't set the flag). (See comments for ts_parser_parse in
+ tree_sitter/api.h.) */
+ if (new_tree == NULL)
+ {
+ Lisp_Object buf;
+ XSETBUFFER (buf, buffer);
+ xsignal1 (Qtreesit_parse_error, buf);
+ }
+
+ if (tree != NULL)
+ ts_tree_delete (tree);
+ XTS_PARSER (parser)->tree = new_tree;
+ XTS_PARSER (parser)->need_reparse = false;
+}
+
+/* This is the read function provided to tree-sitter to read from a
+ buffer. It reads one character at a time and automatically skips
+ the gap. */
+static const char*
+ts_read_buffer (void *parser, uint32_t byte_index,
+ TSPoint position, uint32_t *bytes_read)
+{
+ struct buffer *buffer =
+ XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
+ ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
+ ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
+ ptrdiff_t byte_pos = byte_index + visible_beg;
+ /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
+ ts_ensure_parsed), so byte_pos will never be smaller than
+ BUF_BEG_BYTE. */
+ eassert (visible_beg = BUF_BEGV_BYTE (buffer));
+ eassert (visible_end = BUF_ZV_BYTE (buffer));
+
+ /* Read one character. Tree-sitter wants us to set bytes_read to 0
+ if it reads to the end of buffer. It doesn't say what it wants
+ for the return value in that case, so we just give it an empty
+ string. */
+ char *beg;
+ int len;
+ /* This function could run from a user command, so it is better to
+ do nothing instead of raising an error. (It was a pain in the a**
+ to decrypt mega-if-conditions in Emacs source, so I wrote the two
+ branches separately, you are welcome.) */
+ if (!BUFFER_LIVE_P (buffer))
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Reached visible end-of-buffer, tell tree-sitter to read no more. */
+ else if (byte_pos >= visible_end)
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Normal case, read a character. */
+ else
+ {
+ beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
+ len = BYTES_BY_CHAR_HEAD ((int) *beg);
+ }
+ *bytes_read = (uint32_t) len;
+ return beg;
+}
+
+/*** Functions for parser and node object*/
+
+/* Wrap the parser in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol)
+{
+ struct Lisp_TS_Parser *lisp_parser
+ = ALLOCATE_PSEUDOVECTOR
+ (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER);
+
+ lisp_parser->language_symbol = language_symbol;
+ lisp_parser->buffer = buffer;
+ lisp_parser->parser = parser;
+ lisp_parser->tree = tree;
+ TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8};
+ lisp_parser->input = input;
+ lisp_parser->need_reparse = true;
+ lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer));
+ lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer));
+ lisp_parser->timestamp = 0;
+ lisp_parser->deleted = false;
+ eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
+ return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
+}
+
+/* Wrap the node in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node)
+{
+ struct Lisp_TS_Node *lisp_node
+ = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE);
+ lisp_node->parser = parser;
+ lisp_node->node = node;
+ lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
+ return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
+}
+
+/* Make a compiled query struct. Return NULL if error occurs. QUERY
+ has to be either a cons or a string. */
+static struct Lisp_TS_Query *
+make_ts_query (Lisp_Object query, const TSLanguage *language,
+ uint32_t *error_offset, TSQueryError *error_type)
+{
+ if (CONSP (query))
+ query = Ftreesit_query_expand (query);
+ char *source = SSDATA (query);
+
+ TSQuery *ts_query = ts_query_new (language, source, strlen (source),
+ error_offset, error_type);
+ TSQueryCursor *ts_cursor = ts_query_cursor_new ();
+
+ if (ts_query == NULL)
+ return NULL;
+
+ struct Lisp_TS_Query *lisp_query
+ = ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_TS_Query,
+ PVEC_TS_COMPILED_QUERY);
+ lisp_query->query = ts_query;
+ lisp_query->cursor = ts_cursor;
+ return lisp_query;
+}
+
+DEFUN ("treesit-parser-p",
+ Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter parser. */)
+ (Lisp_Object object)
+{
+ if (TS_PARSERP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-p",
+ Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter node. */)
+ (Lisp_Object object)
+{
+ if (TS_NODEP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-compiled-query-p",
+ Ftreesit_compiled_query_p, Streesit_compiled_query_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a compiled tree-sitter query. */)
+ (Lisp_Object object)
+{
+ if (TS_COMPILED_QUERY_P (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-query-p",
+ Ftreesit_query_p, Streesit_query_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a generic tree-sitter query. */)
+ (Lisp_Object object)
+{
+ if (TS_COMPILED_QUERY_P (object)
+ || CONSP (object) || STRINGP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-parser",
+ Ftreesit_node_parser, Streesit_node_parser,
+ 1, 1, 0,
+ doc: /* Return the parser to which NODE belongs. */)
+ (Lisp_Object node)
+{
+ CHECK_TS_NODE (node);
+ return XTS_NODE (node)->parser;
+}
+
+DEFUN ("treesit-parser-create",
+ Ftreesit_parser_create, Streesit_parser_create,
+ 1, 3, 0,
+ doc: /* Create and return a parser in BUFFER for LANGUAGE.
+
+The parser is automatically added to BUFFER's `treesit-parser-list'.
+LANGUAGE is a language symbol. If BUFFER is nil, use the current
+buffer. If BUFFER already has a parser for LANGUAGE, return that
+parser. If NO-REUSE is non-nil, always create a new parser. */)
+ (Lisp_Object language, Lisp_Object buffer, Lisp_Object no_reuse)
+{
+ ts_initialize ();
+
+ CHECK_SYMBOL (language);
+ struct buffer *buf;
+ if (NILP (buffer))
+ buf = current_buffer;
+ else
+ {
+ CHECK_BUFFER (buffer);
+ buf = XBUFFER (buffer);
+ }
+ ts_check_buffer_size (buf);
+
+ /* See if we can reuse a parser. */
+ for (Lisp_Object tail = BVAR (buf, ts_parser_list);
+ NILP (no_reuse) && !NILP (tail);
+ tail = XCDR (tail))
+ {
+ struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail));
+ if (EQ (parser->language_symbol, language))
+ {
+ return XCAR (tail);
+ }
+ }
+
+ TSParser *parser = ts_parser_new ();
+ TSLanguage *lang = ts_load_language (language, true);
+ /* We check language version when loading a language, so this should
+ always succeed. */
+ ts_parser_set_language (parser, lang);
+
+ Lisp_Object lisp_parser
+ = make_ts_parser (Fcurrent_buffer (), parser, NULL, language);
+
+ BVAR (buf, ts_parser_list)
+ = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
+
+ return lisp_parser;
+}
+
+DEFUN ("treesit-parser-delete",
+ Ftreesit_parser_delete, Streesit_parser_delete,
+ 1, 1, 0,
+ doc: /* Delete PARSER from its buffer. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+
+ Lisp_Object buffer = XTS_PARSER (parser)->buffer;
+ struct buffer *buf = XBUFFER (buffer);
+ BVAR (buf, ts_parser_list)
+ = Fdelete (parser, BVAR (buf, ts_parser_list));
+
+ XTS_PARSER (parser)->deleted = true;
+ return Qnil;
+}
+
+DEFUN ("treesit-parser-list",
+ Ftreesit_parser_list, Streesit_parser_list,
+ 0, 1, 0,
+ doc: /* Return BUFFER's parser list.
+BUFFER defaults to the current buffer. */)
+ (Lisp_Object buffer)
+{
+ struct buffer *buf;
+ if (NILP (buffer))
+ buf = current_buffer;
+ else
+ {
+ CHECK_BUFFER (buffer);
+ buf = XBUFFER (buffer);
+ }
+ /* Return a fresh list so messing with that list doesn't affect our
+ internal data. */
+ Lisp_Object return_list = Qnil;
+ for (Lisp_Object tail = BVAR (buf, ts_parser_list);
+ !NILP (tail);
+ tail = XCDR (tail))
+ {
+ return_list = Fcons (XCAR (tail), return_list);
+ }
+ return Freverse (return_list);
+}
+
+DEFUN ("treesit-parser-buffer",
+ Ftreesit_parser_buffer, Streesit_parser_buffer,
+ 1, 1, 0,
+ doc: /* Return the buffer of PARSER. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ Lisp_Object buf;
+ XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
+ return buf;
+}
+
+DEFUN ("treesit-parser-language",
+ Ftreesit_parser_language, Streesit_parser_language,
+ 1, 1, 0,
+ doc: /* Return parser's language symbol.
+This symbol is the one used to create the parser. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ return XTS_PARSER (parser)->language_symbol;
+}
+
+/*** Parser API */
+
+DEFUN ("treesit-parser-root-node",
+ Ftreesit_parser_root_node, Streesit_parser_root_node,
+ 1, 1, 0,
+ doc: /* Return the root node of PARSER. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ ts_ensure_parsed (parser);
+ TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
+ return make_ts_node (parser, root_node);
+}
+
+/* Checks that the RANGES argument of
+ treesit-parser-set-included-ranges is valid. */
+static void
+ts_check_range_argument (Lisp_Object ranges)
+{
+ struct buffer *buffer = current_buffer;
+ ptrdiff_t point_min = BUF_BEGV (buffer);
+ ptrdiff_t point_max = BUF_ZV (buffer);
+ EMACS_INT last_point = point_min;
+
+ for (Lisp_Object tail = ranges;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ CHECK_CONS (tail);
+ Lisp_Object range = XCAR (tail);
+ CHECK_CONS (range);
+ CHECK_FIXNUM (XCAR (range));
+ CHECK_FIXNUM (XCDR (range));
+ EMACS_INT beg = XFIXNUM (XCAR (range));
+ EMACS_INT end = XFIXNUM (XCDR (range));
+ if (!(last_point <= beg && beg <= end && end <= point_max))
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("RANGE is either overlapping or out-of-order or out-of-range"),
+ ranges);
+ last_point = end;
+ }
+}
+
+DEFUN ("treesit-parser-set-included-ranges",
+ Ftreesit_parser_set_included_ranges,
+ Streesit_parser_set_included_ranges,
+ 2, 2, 0,
+ doc: /* Limit PARSER to RANGES.
+
+RANGES is a list of (BEG . END), each (BEG . END) confines a range in
+which the parser should operate in. Each range must not overlap, and
+each range should come in order. Signal `treesit-set-range-error'
+if the argument is invalid, or something else went wrong. If RANGES
+is nil, set PARSER to parse the whole buffer. */)
+ (Lisp_Object parser, Lisp_Object ranges)
+{
+ ts_check_parser (parser);
+ CHECK_CONS (ranges);
+ ts_check_range_argument (ranges);
+
+ /* Before we parse, catch up with narrowing/widening. */
+ ts_ensure_position_synced (parser);
+
+ bool success;
+ if (NILP (ranges))
+ {
+ /* If RANGES is nil, make parser to parse the whole document.
+ To do that we give tree-sitter a 0 length, the range is a
+ dummy. */
+ TSRange ts_range = {{0, 0}, {0, 0}, 0, 0};
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, &ts_range , 0);
+ }
+ else
+ {
+ /* Set ranges for PARSER. */
+ ptrdiff_t len = list_length (ranges);
+ TSRange *ts_ranges = malloc (sizeof(TSRange) * len);
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges))
+ {
+ Lisp_Object range = XCAR (ranges);
+ EMACS_INT beg_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCAR (range)));
+ EMACS_INT end_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCDR (range)));
+ /* We don't care about start and end points, put in dummy
+ value. */
+ TSRange rg = {{0,0}, {0,0},
+ (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
+ (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
+ ts_ranges[idx] = rg;
+ }
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len);
+ /* Although XFIXNUM could signal, it should be impossible
+ because we have checked the input by ts_check_range_argument.
+ So there is no need for unwind-protect. */
+ free (ts_ranges);
+ }
+
+ if (!success)
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("Something went wrong when setting ranges"),
+ ranges);
+
+ XTS_PARSER (parser)->need_reparse = true;
+ return Qnil;
+}
+
+DEFUN ("treesit-parser-included-ranges",
+ Ftreesit_parser_included_ranges,
+ Streesit_parser_included_ranges,
+ 1, 1, 0,
+ doc: /* Return the ranges set for PARSER.
+See `treesit-parser-set-ranges'. If no range is set, return
+nil. */)
+ (Lisp_Object parser)
+{
+ ts_check_parser (parser);
+ uint32_t len;
+ const TSRange *ranges = ts_parser_included_ranges
+ (XTS_PARSER (parser)->parser, &len);
+ if (len == 0)
+ return Qnil;
+
+ /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
+ etc), so we need to sync up. */
+ ts_ensure_position_synced (parser);
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ Lisp_Object list = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSRange range = ranges[idx];
+ uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
+ uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+ eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
+ eassert (beg_byte <= end_byte);
+ eassert (end_byte <= BUF_ZV_BYTE (buffer));
+
+ Lisp_Object lisp_range =
+ Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) ,
+ make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
+ list = Fcons (lisp_range, list);
+ }
+ return Fnreverse (list);
+}
+
+/*** Node API */
+
+/* Check that OBJ is a positive integer and signal an error if
+ otherwise. */
+static void
+ts_check_positive_integer (Lisp_Object obj)
+{
+ CHECK_INTEGER (obj);
+ if (XFIXNUM (obj) < 0)
+ xsignal1 (Qargs_out_of_range, obj);
+}
+
+static void
+ts_check_node (Lisp_Object obj)
+{
+ CHECK_TS_NODE (obj);
+ Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
+ if (XTS_NODE (obj)->timestamp !=
+ XTS_PARSER (lisp_parser)->timestamp)
+ xsignal1 (Qtreesit_node_outdated, obj);
+}
+
+DEFUN ("treesit-node-type",
+ Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
+ doc: /* Return the NODE's type as a string.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *type = ts_node_type (ts_node);
+ return build_string (type);
+}
+
+DEFUN ("treesit-node-start",
+ Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
+ doc: /* Return the NODE's start position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t start_byte_offset = ts_node_start_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t start_pos = buf_bytepos_to_charpos
+ (buffer, start_byte_offset + visible_beg);
+ return make_fixnum (start_pos);
+}
+
+DEFUN ("treesit-node-end",
+ Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
+ doc: /* Return the NODE's end position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t end_byte_offset = ts_node_end_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t end_pos = buf_bytepos_to_charpos
+ (buffer, end_byte_offset + visible_beg);
+ return make_fixnum (end_pos);
+}
+
+DEFUN ("treesit-node-string",
+ Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
+ doc: /* Return the string representation of NODE.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ char *string = ts_node_string (ts_node);
+ return build_string (string);
+}
+
+DEFUN ("treesit-node-parent",
+ Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
+ doc: /* Return the immediate parent of NODE.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode parent = ts_node_parent (ts_node);
+
+ if (ts_node_is_null (parent))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, parent);
+}
+
+DEFUN ("treesit-node-child",
+ Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
+ doc: /* Return the Nth child of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_child (ts_node, (uint32_t) idx);
+ else
+ child = ts_node_named_child (ts_node, (uint32_t) idx);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-check",
+ Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
+ doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
+
+PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error.
+Named nodes correspond to named rules in the language definition,
+whereas "anonymous" nodes correspond to string literals in the
+language definition.
+
+Missing nodes are inserted by the parser in order to recover from
+certain kinds of syntax errors, i.e., should be there but not there.
+
+Extra nodes represent things like comments, which are not required the
+language definition, but can appear anywhere.
+
+A node "has changes" if the buffer changed since the node is
+created. (Don't forget the "s" at the end of 'has-changes.)
+
+A node "has error" if itself is a syntax error or contains any syntax
+errors. */)
+ (Lisp_Object node, Lisp_Object property)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_SYMBOL (property);
+ TSNode ts_node = XTS_NODE (node)->node;
+ bool result;
+ if (EQ (property, Qnamed))
+ result = ts_node_is_named (ts_node);
+ else if (EQ (property, Qmissing))
+ result = ts_node_is_missing (ts_node);
+ else if (EQ (property, Qextra))
+ result = ts_node_is_extra (ts_node);
+ else if (EQ (property, Qhas_error))
+ result = ts_node_has_error (ts_node);
+ else if (EQ (property, Qhas_changes))
+ result = ts_node_has_changes (ts_node);
+ else
+ signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got",
+ property);
+ return result ? Qt : Qnil;
+}
+
+DEFUN ("treesit-node-field-name-for-child",
+ Ftreesit_node_field_name_for_child,
+ Streesit_node_field_name_for_child, 2, 2, 0,
+ doc: /* Return the field name of the Nth child of NODE.
+
+Return nil if not any child or no field is found.
+If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *name
+ = ts_node_field_name_for_child (ts_node, (uint32_t) idx);
+
+ if (name == NULL)
+ return Qnil;
+
+ return build_string (name);
+}
+
+DEFUN ("treesit-node-child-count",
+ Ftreesit_node_child_count,
+ Streesit_node_child_count, 1, 2, 0,
+ doc: /* Return the number of children of NODE.
+
+If NAMED is non-nil, count named child only. NAMED defaults to
+nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ uint32_t count;
+ if (NILP (named))
+ count = ts_node_child_count (ts_node);
+ else
+ count = ts_node_named_child_count (ts_node);
+ return make_fixnum (count);
+}
+
+DEFUN ("treesit-node-child-by-field-name",
+ Ftreesit_node_child_by_field_name,
+ Streesit_node_child_by_field_name, 2, 2, 0,
+ doc: /* Return the child of NODE with FIELD-NAME.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object field_name)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_STRING (field_name);
+ char *name_str = SSDATA (field_name);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child
+ = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str));
+
+ if (ts_node_is_null(child))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-next-sibling",
+ Ftreesit_node_next_sibling,
+ Streesit_node_next_sibling, 1, 2, 0,
+ doc: /* Return the next sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+ if (NILP (named))
+ sibling = ts_node_next_sibling (ts_node);
+ else
+ sibling = ts_node_next_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-prev-sibling",
+ Ftreesit_node_prev_sibling,
+ Streesit_node_prev_sibling, 1, 2, 0,
+ doc: /* Return the previous sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+
+ if (NILP (named))
+ sibling = ts_node_prev_sibling (ts_node);
+ else
+ sibling = ts_node_prev_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-first-child-for-pos",
+ Ftreesit_node_first_child_for_pos,
+ Streesit_node_first_child_for_pos, 2, 3, 0,
+ doc: /* Return the first child of NODE on POS.
+
+Specifically, return the first child that extends beyond POS. POS is
+a position in the buffer. Return nil if there isn't any. If NAMED is
+non-nil, look for named child only. NAMED defaults to nil. Note that
+this function returns an immediate child, not the smallest
+(grand)child. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (pos);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
+
+ if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf))
+ xsignal1 (Qargs_out_of_range, pos);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_first_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+ else
+ child = ts_node_first_named_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-descendant-for-range",
+ Ftreesit_node_descendant_for_range,
+ Streesit_node_descendant_for_range, 3, 4, 0,
+ doc: /* Return the smallest node that covers BEG to END.
+
+The returned node is a descendant of NODE. POS is a position. Return
+nil if there isn't any. If NAMED is non-nil, look for named child
+only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_INTEGER (beg);
+ CHECK_INTEGER (end);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+ ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
+
+ /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END. */
+ if (!(BUF_BEGV_BYTE (buf) <= byte_beg
+ && byte_beg <= byte_end
+ && byte_end <= BUF_ZV_BYTE (buf)))
+ xsignal2 (Qargs_out_of_range, beg, end);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg , byte_end - visible_beg);
+ else
+ child = ts_node_named_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg, byte_end - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-eq",
+ Ftreesit_node_eq,
+ Streesit_node_eq, 2, 2, 0,
+ doc: /* Return non-nil if NODE1 and NODE2 are the same node.
+If any one of NODE1 and NODE2 is nil, return nil. */)
+ (Lisp_Object node1, Lisp_Object node2)
+{
+ if (NILP (node1) || NILP (node2))
+ return Qnil;
+ CHECK_TS_NODE (node1);
+ CHECK_TS_NODE (node2);
+
+ TSNode ts_node_1 = XTS_NODE (node1)->node;
+ TSNode ts_node_2 = XTS_NODE (node2)->node;
+
+ bool same_node = ts_node_eq (ts_node_1, ts_node_2);
+ return same_node ? Qt : Qnil;
+}
+
+/*** Query functions */
+
+DEFUN ("treesit-pattern-expand",
+ Ftreesit_pattern_expand,
+ Streesit_pattern_expand, 1, 1, 0,
+ doc: /* Expand PATTERN to its string form.
+
+PATTERN can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object pattern)
+{
+ if (EQ (pattern, intern_c_string (":anchor")))
+ return build_pure_c_string(".");
+ if (EQ (pattern, intern_c_string (":?")))
+ return build_pure_c_string("?");
+ if (EQ (pattern, intern_c_string (":*")))
+ return build_pure_c_string("*");
+ if (EQ (pattern, intern_c_string (":+")))
+ return build_pure_c_string("+");
+ if (EQ (pattern, intern_c_string (":equal")))
+ return build_pure_c_string("#equal");
+ if (EQ (pattern, intern_c_string (":match")))
+ return build_pure_c_string("#match");
+ Lisp_Object opening_delimeter =
+ build_pure_c_string (VECTORP (pattern) ? "[" : "(");
+ Lisp_Object closing_delimiter =
+ build_pure_c_string (VECTORP (pattern) ? "]" : ")");
+ if (VECTORP (pattern) || CONSP (pattern))
+ return concat3 (opening_delimeter,
+ Fmapconcat (intern_c_string
+ ("treesit-pattern-expand"),
+ pattern,
+ build_pure_c_string (" ")),
+ closing_delimiter);
+ return CALLN (Fformat, build_pure_c_string("%S"), pattern);
+}
+
+DEFUN ("treesit-query-expand",
+ Ftreesit_query_expand,
+ Streesit_query_expand, 1, 1, 0,
+ doc: /* Expand sexp QUERY to its string form.
+
+A PATTERN in QUERY can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object query)
+{
+ return Fmapconcat (intern_c_string ("treesit-pattern-expand"),
+ query, build_pure_c_string (" "));
+}
+
+static const char*
+ts_query_error_to_string (TSQueryError error)
+{
+ switch (error)
+ {
+ case TSQueryErrorNone:
+ return "None";
+ case TSQueryErrorSyntax:
+ return "Syntax error at";
+ case TSQueryErrorNodeType:
+ return "Node type error at";
+ case TSQueryErrorField:
+ return "Field error at";
+ case TSQueryErrorCapture:
+ return "Capture error at";
+ case TSQueryErrorStructure:
+ return "Structure error at";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* This struct is used for passing captures to be check against
+ predicates. Captures we check for are the ones in START before
+ END. For example, if START and END are
+
+ START END
+ v v
+ (1 . (2 . (3 . (4 . (5 . (6 . nil))))))
+
+ We only look at captures 1 2 3. */
+struct capture_range
+{
+ Lisp_Object start;
+ Lisp_Object end;
+};
+
+/* Collect predicates for this match and return them in a list. Each
+ predicate is a list of strings and symbols. */
+static Lisp_Object
+ts_predicates_for_pattern
+(TSQuery *query, uint32_t pattern_index)
+{
+ uint32_t len;
+ const TSQueryPredicateStep *predicate_list =
+ ts_query_predicates_for_pattern (query, pattern_index, &len);
+ Lisp_Object result = Qnil;
+ Lisp_Object predicate = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSQueryPredicateStep step = predicate_list[idx];
+ switch (step.type)
+ {
+ case TSQueryPredicateStepTypeCapture:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_capture_name_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (intern_c_string_1 (str, str_len),
+ predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeString:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_string_value_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (make_string (str, str_len), predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeDone:
+ result = Fcons (Fnreverse (predicate), result);
+ predicate = Qnil;
+ break;
+ }
+ }
+ return Fnreverse (result);
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+ Signals treesit-query-error if such node is not captured. */
+static Lisp_Object
+ts_predicate_capture_name_to_text
+(Lisp_Object name, struct capture_range captures)
+{
+ Lisp_Object node = Qnil;
+ for (Lisp_Object tail = captures.start;
+ !EQ (tail, captures.end); tail = XCDR (tail))
+ {
+ if (EQ (XCAR (XCAR (tail)), name))
+ {
+ node = XCDR (XCAR (tail));
+ break;
+ }
+ }
+
+ if (NILP (node))
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Cannot find captured node"),
+ name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern"));
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal
+ (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
+ Lisp_Object text = Fbuffer_substring
+ (Ftreesit_node_start (node), Ftreesit_node_end (node));
+ set_buffer_internal (old_buffer);
+ return text;
+}
+
+/* Handles predicate (#equal A B). Return true if A equals B; return
+ false otherwise. A and B can be either string, or a capture name.
+ The capture name evaluates to the text its captured node spans in
+ the buffer. */
+static bool
+ts_predicate_equal
+(Lisp_Object args, struct capture_range captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object arg1 = XCAR (args);
+ Lisp_Object arg2 = XCAR (XCDR (args));
+ Lisp_Object text1 = STRINGP (arg1) ? arg1 :
+ ts_predicate_capture_name_to_text (arg1, captures);
+ Lisp_Object text2 = STRINGP (arg2) ? arg2 :
+ ts_predicate_capture_name_to_text (arg2, captures);
+
+ if (NILP (Fstring_equal (text1, text2)))
+ return false;
+ else
+ return true;
+}
+
+/* Handles predicate (#match "regexp" @node). Return true if "regexp"
+ matches the text spanned by @node; return false otherwise. Matching
+ is case-sensitive. */
+static bool
+ts_predicate_match
+(Lisp_Object args, struct capture_range captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object regexp = XCAR (args);
+ Lisp_Object capture_name = XCAR (XCDR (args));
+ Lisp_Object text = ts_predicate_capture_name_to_text
+ (capture_name, captures);
+
+ /* It's probably common to get the argument order backwards. Catch
+ this mistake early and show helpful explanation, because Emacs
+ loves you. (We put the regexp first because that's what
+ string-match does.) */
+ if (!STRINGP (regexp))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name"));
+ if (!SYMBOLP (capture_name))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string"));
+
+ if (fast_string_match (regexp, text) >= 0)
+ return true;
+ else
+ return false;
+}
+
+/* About predicates: I decide to hard-code predicates in C instead of
+ implementing an extensible system where predicates are translated
+ to Lisp functions, and new predicates can be added by extending a
+ list of functions, because I really couldn't imagine any useful
+ predicates besides equal and match. If we later found out that
+ such system is indeed useful and necessary, it can be easily
+ added. */
+
+/* If all predicates in PREDICATES passes, return true; otherwise
+ return false. */
+static bool
+ts_eval_predicates
+(struct capture_range captures, Lisp_Object predicates)
+{
+ bool pass = true;
+ /* Evaluate each predicates. */
+ for (Lisp_Object tail = predicates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object predicate = XCAR (tail);
+ Lisp_Object fn = XCAR (predicate);
+ Lisp_Object args = XCDR (predicate);
+ if (!NILP (Fstring_equal (fn, build_pure_c_string("equal"))))
+ pass = ts_predicate_equal (args, captures);
+ else if (!NILP (Fstring_equal
+ (fn, build_pure_c_string("match"))))
+ pass = ts_predicate_match (args, captures);
+ else
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Invalid predicate"),
+ fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate"));
+ }
+ /* If all predicates passed, add captures to result list. */
+ return pass;
+}
+
+DEFUN ("treesit-query-compile",
+ Ftreesit_query_compile,
+ Streesit_query_compile, 2, 2, 0,
+ doc: /* Compile QUERY to a compiled query.
+
+Querying a compiled query is much faster than an uncompiled one.
+LANGUAGE is the language this query is for.
+
+Signals treesit-query-error if QUERY is malformed or something else
+goes wrong. You can use `treesit-query-validate' to debug the
+query. */)
+ (Lisp_Object language, Lisp_Object query)
+{
+ if (NILP (Ftreesit_query_p (query)))
+ wrong_type_argument (Qtreesit_query_p, query);
+ CHECK_SYMBOL (language);
+ if (TS_COMPILED_QUERY_P (query))
+ return query;
+
+ TSLanguage *ts_lang = ts_load_language (language, true);
+ uint32_t error_offset;
+ TSQueryError error_type;
+
+ struct Lisp_TS_Query *lisp_query
+ = make_ts_query (query, ts_lang, &error_offset, &error_type);
+
+ if (lisp_query == NULL)
+ xsignal2 (Qtreesit_query_error,
+ build_string (ts_query_error_to_string (error_type)),
+ make_fixnum (error_offset + 1));
+
+ return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
+}
+
+DEFUN ("treesit-query-capture",
+ Ftreesit_query_capture,
+ Streesit_query_capture, 2, 5, 0,
+ doc: /* Query NODE with patterns in QUERY.
+
+Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name
+assigned to the node in PATTERN. NODE is the captured node.
+
+QUERY is either a string query, a sexp query, or a compiled query.
+See Info node `(elisp)Pattern Matching' for how to write a query in
+either string or s-expression form. When using repeatedly, a compiled
+query is much faster than a string or sexp one, so it is recommend to
+compile your queries if it will be used over and over.
+
+BEG and END, if both non-nil, specifies the range in which the query
+is executed. If NODE-ONLY is non-nil, return a list of nodes.
+
+Besides a node, NODE can also be a parser, then the root node of that
+parser is used; NODE can be a language symbol, then the root node of a
+parser for that language is used. If such a parser doesn't exist, it
+is created.
+
+Signals treesit-query-error if QUERY is malformed or something else
+goes wrong. You can use `treesit-query-validate' to debug the
+query. */)
+ (Lisp_Object node, Lisp_Object query,
+ Lisp_Object beg, Lisp_Object end, Lisp_Object node_only)
+{
+ if (!NILP (beg))
+ CHECK_INTEGER (beg);
+ if (!NILP (end))
+ CHECK_INTEGER (end);
+
+ if (!(TS_COMPILED_QUERY_P (query)
+ || CONSP (query) || STRINGP (query)))
+ wrong_type_argument (Qtreesit_query_p, query);
+
+
+ Lisp_Object lisp_node;
+ if (TS_NODEP (node))
+ lisp_node = node;
+ else if (TS_PARSERP (node))
+ lisp_node = Ftreesit_parser_root_node (node);
+ else if (SYMBOLP (node))
+ {
+ Lisp_Object parser
+ = Ftreesit_parser_create (node, Fcurrent_buffer (), Qnil);
+ lisp_node = Ftreesit_parser_root_node (parser);
+ }
+ else
+ xsignal2 (Qwrong_type_argument,
+ list4 (Qor, Qtreesit_node_p,
+ Qtreesit_parser_p, Qsymbolp),
+ node);
+
+ /* Extract C values from Lisp objects. */
+ TSNode ts_node = XTS_NODE (lisp_node)->node;
+ Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
+ const TSLanguage *lang = ts_parser_language
+ (XTS_PARSER (lisp_parser)->parser);
+
+ /* Initialize query objects, and execute query. */
+ struct Lisp_TS_Query *lisp_query;
+ if (TS_COMPILED_QUERY_P (query))
+ lisp_query = XTS_COMPILED_QUERY (query);
+ else
+ {
+ uint32_t error_offset;
+ TSQueryError error_type;
+ lisp_query = make_ts_query (query, lang,
+ &error_offset, &error_type);
+ if (lisp_query == NULL)
+ {
+ xsignal3 (Qtreesit_query_error,
+ build_string
+ (ts_query_error_to_string (error_type)),
+ make_fixnum (error_offset + 1),
+ build_pure_c_string("Debug the query with `treesit-query-validate'"));
+ }
+ /* We don't need need to free TS_QUERY and CURSOR, they are stored
+ in a lisp object, which is tracked by gc. */
+ }
+ TSQuery *ts_query = lisp_query->query;
+ TSQueryCursor *cursor = lisp_query->cursor;
+
+ if (!NILP (beg) && !NILP (end))
+ {
+ EMACS_INT beg_byte = XFIXNUM (beg);
+ EMACS_INT end_byte = XFIXNUM (end);
+ ts_query_cursor_set_byte_range
+ (cursor, (uint32_t) beg_byte - visible_beg,
+ (uint32_t) end_byte - visible_beg);
+ }
+
+ ts_query_cursor_exec (cursor, ts_query, ts_node);
+ TSQueryMatch match;
+
+ /* Go over each match, collect captures and predicates. Include the
+ captures in the RESULT list unconditionally as we get them, then
+ test for predicates. If predicates pass, then all good, if
+ predicates don't pass, revert the result back to the result
+ before this loop (PREV_RESULT). (Predicates control the entire
+ match.) This way we don't need to create a list of captures in
+ every for loop and nconc it to RESULT every time. That is indeed
+ the initial implementation in which Yoav found nconc being the
+ bottleneck (98.4% of the running time spent on nconc). */
+ Lisp_Object result = Qnil;
+ Lisp_Object prev_result = result;
+ while (ts_query_cursor_next_match (cursor, &match))
+ {
+ /* Record the checkpoint that we may roll back to. */
+ prev_result = result;
+ /* Get captured nodes. */
+ const TSQueryCapture *captures = match.captures;
+ for (int idx=0; idx < match.capture_count; idx++)
+ {
+ uint32_t capture_name_len;
+ TSQueryCapture capture = captures[idx];
+ Lisp_Object captured_node =
+ make_ts_node(lisp_parser, capture.node);
+
+ Lisp_Object cap;
+ if (NILP (node_only))
+ {
+ const char *capture_name = ts_query_capture_name_for_id
+ (ts_query, capture.index, &capture_name_len);
+ cap =
+ Fcons (intern_c_string_1 (capture_name, capture_name_len),
+ captured_node);
+ }
+ else
+ {
+ cap = captured_node;
+ }
+ result = Fcons (cap, result);
+ }
+ /* Get predicates. */
+ Lisp_Object predicates =
+ ts_predicates_for_pattern (ts_query, match.pattern_index);
+
+ /* captures_lisp = Fnreverse (captures_lisp); */
+ struct capture_range captures_range = { result, prev_result };
+ if (!ts_eval_predicates (captures_range, predicates))
+ {
+ /* Predicates didn't pass, roll back. */
+ result = prev_result;
+ }
+ }
+ return Fnreverse (result);
+}
+
+/*** Navigation */
+
+/* Return the next/previous named/unnamed sibling of NODE. FORWARD
+ controls the direction and NAMED controls the nameness.
+ */
+static TSNode
+ts_traverse_sibling_helper (TSNode node, bool forward, bool named)
+{
+ if (forward)
+ {
+ if (named)
+ return ts_node_next_named_sibling (node);
+ else
+ return ts_node_next_sibling (node);
+ }
+ else
+ {
+ if (named)
+ return ts_node_prev_named_sibling (node);
+ else
+ return ts_node_prev_sibling (node);
+ }
+}
+
+/* Return true if NODE matches PRED. PRED can be a string or a
+ function. This function doesn't check for PRED's type. */
+static bool
+ts_traverse_match_predicate
+(TSNode node, Lisp_Object pred, Lisp_Object parser)
+{
+ if (STRINGP (pred))
+ {
+ const char *type = ts_node_type (node);
+ return (fast_c_string_match_ignore_case
+ (pred, type, strlen (type)) >= 0);
+ }
+ else
+ {
+ Lisp_Object lisp_node = make_ts_node (parser, node);
+ return !NILP (CALLN (Ffuncall, pred, lisp_node));
+ }
+
+}
+
+/* Traverse the parse tree starting from ROOT (but ROOT is not
+ matches against PRED). PRED can be a function (takes a node and
+ returns nil/non-nil),or a string (treated as regexp matching the
+ node's type, ignores case, must be all single byte characters). If
+ the node satisfies PRED , terminate, set ROOT to that node, and
+ return true. If no node satisfies PRED, return FALSE. PARSER is
+ the parser of ROOT.
+
+ LIMIT is the number of levels we descend in the tree. If NO_LIMIT
+ is true, LIMIT is ignored. FORWARD controls the direction in which
+ we traverse the tree, true means forward, false backward. If NAMED
+ is true, only traverse named nodes, if false, all nodes. If
+ SKIP_ROOT is true, don't match ROOT. */
+static bool
+ts_search_dfs
+(TSNode *root, Lisp_Object pred, Lisp_Object parser,
+ bool named, bool forward, ptrdiff_t limit, bool no_limit,
+ bool skip_root)
+{
+ /* TSTreeCursor doesn't allow us to move backward, so we can't use
+ it. We could use limit == -1 to indicate no_limit == true, but
+ separating them is safer. */
+ TSNode node = *root;
+
+ if (!skip_root && ts_traverse_match_predicate (node, pred, parser))
+ {
+ *root = node;
+ return true;
+ }
+
+ if (!no_limit && limit <= 0)
+ return false;
+ else
+ {
+ int count = named ?
+ ts_node_named_child_count( node)
+ : ts_node_child_count (node);
+ for (int offset=0; offset < count; offset++)
+ {
+ uint32_t idx = forward ? offset
+ : count - offset - 1;
+ TSNode child = ts_node_child (node, idx);
+
+ if (!ts_node_is_null (child)
+ && ts_search_dfs (&child, pred, parser, named,
+ forward, limit - 1, no_limit, false))
+ {
+ *root = child;
+ return true;
+ }
+ }
+ return false;
+ }
+}
+
+/* Go thought the whole tree linearly depth first, starting from
+ START. PRED, PARSER, NAMED, FORWARD are the same as in
+ ts_search_subtre. If UP_ONLY is true, never go to children, only
+ sibling and parents. If SKIP_START is true, don'tt match
+ START. */
+static bool
+ts_search_forward
+(TSNode *start, Lisp_Object pred, Lisp_Object parser,
+ bool named, bool forward, bool up_only, bool skip_start)
+{
+ TSNode node = *start;
+
+ if (!up_only && ts_search_dfs
+ (start, pred, parser, named, forward, 0, true, skip_start))
+ return true;
+
+ TSNode next = ts_traverse_sibling_helper (node, forward, named);
+ while (ts_node_is_null (next))
+ {
+ node = ts_node_parent (node);
+ if (ts_node_is_null (node))
+ return false;
+
+ if (ts_traverse_match_predicate (node, pred, parser))
+ {
+ *start = node;
+ return true;
+ }
+ next = ts_traverse_sibling_helper (node, forward, named);
+ }
+ if (ts_search_forward
+ (&next, pred, parser, named, forward, up_only, false))
+ {
+ *start = next;
+ return true;
+ }
+ else
+ return false;
+}
+
+DEFUN ("treesit-search-subtree",
+ Ftreesit_search_subtree,
+ Streesit_search_subtree, 2, 5, 0,
+ doc: /* Traverse the parse tree depth-first.
+
+Traverse the subtree of NODE, and match PREDICATE with each node along
+the way. PREDICATE is a regexp string that matches against each
+node's type case-insensitively, or a function that takes a node and
+returns nil/non-nil.
+
+By default, only traverse named nodes, if ALL is non-nil, traverse all
+nodes. If BACKWARD is non-nil, traverse backwards. If LIMIT is
+non-nil, we only traverse that number of levels down in the tree.
+
+Return the first matched node, or nil if none matches. */)
+ (Lisp_Object node, Lisp_Object predicate, Lisp_Object all,
+ Lisp_Object backward, Lisp_Object limit)
+{
+ CHECK_TS_NODE (node);
+ CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+ list3 (Qor, Qstringp, Qfunctionp), predicate);
+ CHECK_SYMBOL (all);
+ CHECK_SYMBOL (backward);
+
+ ptrdiff_t the_limit = 0;
+ bool no_limit = false;
+ if (NILP (limit))
+ no_limit = true;
+ else
+ {
+ CHECK_FIXNUM (limit);
+ the_limit = XFIXNUM (limit);
+ }
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ Lisp_Object parser = XTS_NODE (node)->parser;
+ if (ts_search_dfs
+ (&ts_node, predicate, parser, NILP (all),
+ NILP (backward), the_limit, no_limit, false))
+ {
+ return make_ts_node (parser, ts_node);
+ }
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-search-forward",
+ Ftreesit_search_forward,
+ Streesit_search_forward, 2, 5, 0,
+ doc: /* Search for node in the parse tree.
+
+Start traversing the tree from node START, and match PREDICATE with
+each node along the way (except START). PREDICATE is a regexp string
+that matches against each node's type case-insensitively, or a
+function that takes a node and returns nil/non-nil.
+
+By default, only search for named nodes, if ALL is non-nil, search for
+all nodes. If BACKWARD is non-nil, search backwards.
+
+Return the first matched node, or nil if none matches.
+
+For a tree like the below where START is marked 1, traverse as
+numbered:
+ 16
+ |
+ 3--------4-----------8
+ | | |
+ o--o-+--1 5--+--6 9---+-----12
+ | | | | | |
+ o o 2 7 +-+-+ +--+--+
+ | | | | |
+ 10 11 13 14 15
+
+If UP is non-nil, only traverse to siblings and parents. In that
+case, only 1 3 4 8 16 would be traversed. */)
+ (Lisp_Object start, Lisp_Object predicate, Lisp_Object all,
+ Lisp_Object backward, Lisp_Object up)
+{
+ CHECK_TS_NODE (start);
+ CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+ list3 (Qor, Qstringp, Qfunctionp), predicate);
+ CHECK_SYMBOL (all);
+ CHECK_SYMBOL (backward);
+ CHECK_SYMBOL (up);
+
+ TSNode ts_start = XTS_NODE (start)->node;
+ Lisp_Object parser = XTS_NODE (start)->parser;
+ if (ts_search_forward
+ (&ts_start, predicate, parser, NILP (all),
+ NILP (backward), !NILP (up), true))
+ {
+ return make_ts_node (parser, ts_start);
+ }
+ else
+ return Qnil;
+}
+
+/* Recursively traverse the tree under CURSOR, and append the result
+ subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree.
+ Note that the top-level children list is reversed, because
+ reasons. */
+static void
+ts_build_sparse_tree
+(TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred,
+ Lisp_Object process_fn, ptrdiff_t limit,
+ bool no_limit, Lisp_Object parser)
+{
+
+ TSNode node = ts_tree_cursor_current_node (cursor);
+ bool match = ts_traverse_match_predicate (node, pred, parser);
+ if (match)
+ {
+ /* If this node matches pred, add a new node to the parent's
+ children list. */
+ Lisp_Object lisp_node = make_ts_node (parser, node);
+ if (!NILP (process_fn))
+ {
+ lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
+ }
+ Lisp_Object this = Fcons (lisp_node, Qnil);
+ Fsetcdr (parent, Fcons (this, Fcdr (parent)));
+ /* Now for children nodes, this is the new parent. */
+ parent = this;
+ }
+ /* Go through each child. */
+ if ((no_limit || limit > 0)
+ && ts_tree_cursor_goto_first_child (cursor))
+ {
+ do
+ {
+ /* Make sure not to use node after the recursive funcall.
+ Then C compilers should be smart enough not to copy NODE
+ to stack. */
+ ts_build_sparse_tree
+ (cursor, parent, pred, process_fn,
+ limit - 1, no_limit, parser);
+ }
+ while (ts_tree_cursor_goto_next_sibling (cursor));
+ /* Don't forget to come back to this node. */
+ ts_tree_cursor_goto_parent (cursor);
+ }
+ /* Before we go, reverse children in the sparse tree. */
+ if (match)
+ {
+ /* When match == true, "parent" is actually the node we added in
+ this layer (parent = this). */
+ Fsetcdr (parent, Fnreverse (Fcdr (parent)));
+ }
+}
+
+DEFUN ("treesit-induce-sparse-tree",
+ Ftreesit_induce_sparse_tree,
+ Streesit_induce_sparse_tree, 2, 4, 0,
+ doc: /* Create a sparse tree of ROOT's subtree.
+
+Basically, take the subtree under ROOT, and comb it so only the nodes
+that match PREDICATE are left, like picking out grapes on the vine.
+PREDICATE is a regexp string that matches against each node's type
+case-insensitively.
+
+For a subtree on the left that consist of both numbers and letters, if
+PREDICATE is "is letter", the returned tree is the one on the right.
+
+ a a a
+ | | |
+ +---+---+ +---+---+ +---+---+
+ | | | | | | | | |
+ b 1 2 b | | b c d
+ | | => | | => |
+ c +--+ c + e
+ | | | | |
+ +--+ d 4 +--+ d
+ | | |
+ e 5 e
+
+If PROCESS-FN is non-nil, instead of returning the matched nodes, pass
+each node to PROCESS-FN use the return value instead. If non-nil,
+LIMIT is the number of levels to go down from ROOT.
+
+Each node in the returned tree looks like (NODE . (CHILD ...)). The
+root of this tree might be nil, if ROOT doesn't match PREDICATE. If
+no node matches PRED, return nil.
+
+PREDICATE can also be a function that takes a node and returns
+nil/non-nil, but it is slower and more memory consuming than
+regexp. */)
+ (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn,
+ Lisp_Object limit)
+{
+ CHECK_TS_NODE (root);
+ CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
+ list3 (Qor, Qstringp, Qfunctionp), predicate);
+
+ if (!NILP (process_fn))
+ CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
+ ptrdiff_t the_limit = 0;
+ bool no_limit = false;
+ if (NILP (limit))
+ no_limit = true;
+ else
+ {
+ CHECK_FIXNUM (limit);
+ the_limit = XFIXNUM (limit);
+ }
+
+ TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
+ Lisp_Object parser = XTS_NODE (root)->parser;
+ Lisp_Object parent = Fcons (Qnil, Qnil);
+ ts_build_sparse_tree
+ (&cursor, parent, predicate, process_fn,
+ the_limit, no_limit, parser);
+ Fsetcdr (parent, Fnreverse (Fcdr (parent)));
+ if (NILP (Fcdr (parent)))
+ return Qnil;
+ else
+ return parent;
+}
+
+/*** Initialization */
+
+/* Initialize the tree-sitter routines. */
+void
+syms_of_treesit (void)
+{
+ DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
+ DEFSYM (Qtreesit_node_p, "treesit-node-p");
+ DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p");
+ DEFSYM (Qtreesit_query_p, "treesit-query-p");
+ DEFSYM (Qnamed, "named");
+ DEFSYM (Qmissing, "missing");
+ DEFSYM (Qextra, "extra");
+ DEFSYM (Qhas_changes, "has-changes");
+ DEFSYM (Qhas_error, "has-error");
+
+ DEFSYM (Qtreesit_error, "treesit-error");
+ DEFSYM (Qtreesit_query_error, "treesit-query-error");
+ DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
+ DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
+ DEFSYM (Qtreesit_buffer_too_large,
+ "treesit-buffer-too-large");
+ DEFSYM (Qtreesit_load_language_error,
+ "treesit-load-language-error");
+ DEFSYM (Qtreesit_node_outdated,
+ "treesit-node-outdated");
+ DEFSYM (Quser_emacs_directory,
+ "user-emacs-directory");
+ DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
+
+ DEFSYM (Qor, "or");
+
+ define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
+ define_error (Qtreesit_query_error, "Query pattern is malformed",
+ Qtreesit_error);
+ /* Should be impossible, no need to document this error. */
+ define_error (Qtreesit_parse_error, "Parse failed",
+ Qtreesit_error);
+ define_error (Qtreesit_range_invalid,
+ "RANGES are invalid, they have to be ordered and not overlapping",
+ Qtreesit_error);
+ define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)",
+ Qtreesit_error);
+ define_error (Qtreesit_load_language_error,
+ "Cannot load language definition",
+ Qtreesit_error);
+ define_error (Qtreesit_node_outdated,
+ "This node is outdated, please retrieve a new one",
+ Qtreesit_error);
+ define_error (Qtreesit_parser_deleted,
+ "This parser is deleted and cannot be used",
+ Qtreesit_error);
+
+ DEFVAR_LISP ("treesit-load-name-override-list",
+ Vtreesit_load_name_override_list,
+ doc:
+ /* An override list for unconventional tree-sitter libraries.
+
+By default, Emacs assumes the dynamic library for LANG is
+libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
+dynamic libraries. Emacs also assumes that the name of the C function
+the library provides is tree_sitter_LANG. If that is not the case,
+add an entry
+
+ (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
+
+to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
+library without extension, FUNCTION-NAME is the function provided by
+the library. */);
+ Vtreesit_load_name_override_list = Qnil;
+
+ DEFVAR_LISP ("treesit-extra-load-path",
+ Vtreesit_extra_load_path,
+ doc:
+ /* Extra load paths of tree-sitter language definitions.
+When trying to load a tree-sitter language definition,
+Emacs looks at directories in this variable,
+`user-emacs-directory'/tree-sitter, and system default locations for
+dynamic libraries, in that order. */);
+ Vtreesit_extra_load_path = Qnil;
+
+ defsubr (&Streesit_language_available_p);
+
+ defsubr (&Streesit_parser_p);
+ defsubr (&Streesit_node_p);
+ defsubr (&Streesit_compiled_query_p);
+ defsubr (&Streesit_query_p);
+
+ defsubr (&Streesit_node_parser);
+
+ defsubr (&Streesit_parser_create);
+ defsubr (&Streesit_parser_delete);
+ defsubr (&Streesit_parser_list);
+ defsubr (&Streesit_parser_buffer);
+ defsubr (&Streesit_parser_language);
+
+ defsubr (&Streesit_parser_root_node);
+ /* defsubr (&Streesit_parse_string); */
+
+ defsubr (&Streesit_parser_set_included_ranges);
+ defsubr (&Streesit_parser_included_ranges);
+
+ defsubr (&Streesit_node_type);
+ defsubr (&Streesit_node_start);
+ defsubr (&Streesit_node_end);
+ defsubr (&Streesit_node_string);
+ defsubr (&Streesit_node_parent);
+ defsubr (&Streesit_node_child);
+ defsubr (&Streesit_node_check);
+ defsubr (&Streesit_node_field_name_for_child);
+ defsubr (&Streesit_node_child_count);
+ defsubr (&Streesit_node_child_by_field_name);
+ defsubr (&Streesit_node_next_sibling);
+ defsubr (&Streesit_node_prev_sibling);
+ defsubr (&Streesit_node_first_child_for_pos);
+ defsubr (&Streesit_node_descendant_for_range);
+ defsubr (&Streesit_node_eq);
+
+ defsubr (&Streesit_pattern_expand);
+ defsubr (&Streesit_query_expand);
+ defsubr (&Streesit_query_compile);
+ defsubr (&Streesit_query_capture);
+
+ defsubr (&Streesit_search_subtree);
+ defsubr (&Streesit_search_forward);
+ defsubr (&Streesit_induce_sparse_tree);
+}
diff --git a/src/treesit.h b/src/treesit.h
new file mode 100644
index 00000000000..0c043f7d250
--- /dev/null
+++ b/src/treesit.h
@@ -0,0 +1,171 @@
+/* Header file for the tree-sitter integration.
+
+Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#ifndef EMACS_TREESIT_H
+#define EMACS_TREESIT_H
+
+#include <tree_sitter/api.h>
+#include "lisp.h"
+
+INLINE_HEADER_BEGIN
+
+/* A wrapper for a tree-sitter parser, but also contains a parse tree
+ and other goodies for convenience. */
+struct Lisp_TS_Parser
+{
+ union vectorlike_header header;
+ /* A symbol represents the language this parser uses. See the
+ manual for more explanation. */
+ Lisp_Object language_symbol;
+ /* The buffer associated with this parser. */
+ Lisp_Object buffer;
+ /* The pointer to the tree-sitter parser. Never NULL. */
+ TSParser *parser;
+ /* Pointer to the syntax tree. Initially is NULL, so check for NULL
+ before use. */
+ TSTree *tree;
+ /* Teaches tree-sitter how to read an Emacs buffer. */
+ TSInput input;
+ /* Re-parsing an unchanged buffer is not free for tree-sitter, so we
+ only make it re-parse when need_reparse == true. That usually
+ means some change is made in the buffer. But others could set
+ this field to true to force tree-sitter to re-parse. */
+ bool need_reparse;
+ /* These two positions record the buffer byte position (1-based) of
+ the "visible region" that tree-sitter sees. Unlike markers,
+ These two positions do not change as the user inserts and deletes
+ text around them. Before re-parse, we move these positions to
+ match BUF_BEGV_BYTE and BUF_ZV_BYTE. Note that we don't need to
+ synchronize these positions when retrieving them in a function
+ that involves a node: if the node is not outdated, these
+ positions are synchronized. */
+ ptrdiff_t visible_beg;
+ ptrdiff_t visible_end;
+ /* This counter is incremented every time a change is made to the
+ buffer in ts_record_change. The node retrieved from this parser
+ inherits this timestamp. This way we can make sure the node is
+ not outdated when we access its information. */
+ ptrdiff_t timestamp;
+ /* If this field is true, parser functions raises
+ treesit-parser-deleted signal. */
+ bool deleted;
+};
+
+/* A wrapper around a tree-sitter node. */
+struct Lisp_TS_Node
+{
+ union vectorlike_header header;
+ /* This prevents gc from collecting the tree before the node is done
+ with it. TSNode contains a pointer to the tree it belongs to,
+ and the parser object, when collected by gc, will free that
+ tree. */
+ Lisp_Object parser;
+ TSNode node;
+ /* A node inherits its parser's timestamp at creation time. The
+ parser's timestamp increments as the buffer changes. This way we
+ can make sure the node is not outdated when we access its
+ information. */
+ ptrdiff_t timestamp;
+};
+
+/* A compiled tree-sitter query. */
+struct Lisp_TS_Query
+{
+ union vectorlike_header header;
+ /* Pointer to the query object. */
+ TSQuery *query;
+ /* Pointer to a cursor. If we are storing the query object, we
+ might as well store a cursor, too. */
+ TSQueryCursor *cursor;
+};
+
+INLINE bool
+TS_PARSERP (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_PARSER);
+}
+
+INLINE struct Lisp_TS_Parser *
+XTS_PARSER (Lisp_Object a)
+{
+ eassert (TS_PARSERP (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Parser);
+}
+
+INLINE bool
+TS_NODEP (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_NODE);
+}
+
+INLINE struct Lisp_TS_Node *
+XTS_NODE (Lisp_Object a)
+{
+ eassert (TS_NODEP (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node);
+}
+
+INLINE bool
+TS_COMPILED_QUERY_P (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_COMPILED_QUERY);
+}
+
+INLINE struct Lisp_TS_Query *
+XTS_COMPILED_QUERY (Lisp_Object a)
+{
+ eassert (TS_COMPILED_QUERY_P (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Query);
+}
+
+INLINE void
+CHECK_TS_PARSER (Lisp_Object parser)
+{
+ CHECK_TYPE (TS_PARSERP (parser), Qtreesit_parser_p, parser);
+}
+
+INLINE void
+CHECK_TS_NODE (Lisp_Object node)
+{
+ CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node);
+}
+
+INLINE void
+CHECK_TS_COMPILED_QUERY (Lisp_Object query)
+{
+ CHECK_TYPE (TS_COMPILED_QUERY_P (query),
+ Qtreesit_compiled_query_p, query);
+}
+
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte);
+
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol);
+
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node);
+
+extern void syms_of_treesit (void);
+
+INLINE_HEADER_END
+
+#endif /* EMACS_TREESIT_H */
diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el
new file mode 100644
index 00000000000..6fa891a136a
--- /dev/null
+++ b/test/src/treesit-tests.el
@@ -0,0 +1,450 @@
+;;; treesit-tests.el --- tests for src/treesit.c -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+(require 'treesit)
+
+(ert-deftest treesit-basic-parsing ()
+ "Test basic parsing routines."
+ (with-temp-buffer
+ (let ((parser (treesit-parser-create 'json)))
+ (should
+ (eq parser (car (treesit-parser-list))))
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(ERROR)"))
+
+ (insert "[1,2,3]")
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (number) (number)))"))
+
+ (goto-char (point-min))
+ (forward-char 3)
+ (insert "{\"name\": \"Bob\"},")
+ (should
+ (equal
+ (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))")))))
+
+(ert-deftest treesit-node-api ()
+ "Tests for node API."
+ (with-temp-buffer
+ (let (parser root-node doc-node object-node pair-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+ ;; `treesit-node-type'.
+ (should (equal "document" (treesit-node-type root-node)))
+ ;; `treesit-node-check'.
+ (should (eq t (treesit-node-check root-node 'named)))
+ (should (eq nil (treesit-node-check root-node 'missing)))
+ (should (eq nil (treesit-node-check root-node 'extra)))
+ (should (eq nil (treesit-node-check root-node 'has-error)))
+ ;; `treesit-node-child'.
+ (setq doc-node (treesit-node-child root-node 0))
+ (should (equal "array" (treesit-node-type doc-node)))
+ (should (equal (treesit-node-string doc-node)
+ "(array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number))"))
+ ;; `treesit-node-child-count'.
+ (should (eql 9 (treesit-node-child-count doc-node)))
+ (should (eql 4 (treesit-node-child-count doc-node t)))
+ ;; `treesit-node-field-name-for-child'.
+ (setq object-node (treesit-node-child doc-node 2 t))
+ (setq pair-node (treesit-node-child object-node 0 t))
+ (should (equal "object" (treesit-node-type object-node)))
+ (should (equal "pair" (treesit-node-type pair-node)))
+ (should (equal "key"
+ (treesit-node-field-name-for-child
+ pair-node 0)))
+ ;; `treesit-node-child-by-field-name'.
+ (should (equal "(string (string_content))"
+ (treesit-node-string
+ (treesit-node-child-by-field-name
+ pair-node "key"))))
+ ;; `treesit-node-next-sibling'.
+ (should (equal "(number)"
+ (treesit-node-string
+ (treesit-node-next-sibling object-node t))))
+ (should (equal "(\",\")"
+ (treesit-node-string
+ (treesit-node-next-sibling object-node))))
+ ;; `treesit-node-prev-sibling'.
+ (should (equal "(number)"
+ (treesit-node-string
+ (treesit-node-prev-sibling object-node t))))
+ (should (equal "(\",\")"
+ (treesit-node-string
+ (treesit-node-prev-sibling object-node))))
+ ;; `treesit-node-first-child-for-pos'.
+ (should (equal "(number)"
+ (treesit-node-string
+ (treesit-node-first-child-for-pos
+ doc-node 3 t))))
+ (should (equal "(\",\")"
+ (treesit-node-string
+ (treesit-node-first-child-for-pos
+ doc-node 3))))
+ ;; `treesit-node-descendant-for-range'.
+ (should (equal "(\"{\")"
+ (treesit-node-string
+ (treesit-node-descendant-for-range
+ root-node 6 7))))
+ (should (equal "(object (pair key: (string (string_content)) value: (string (string_content))))"
+ (treesit-node-string
+ (treesit-node-descendant-for-range
+ root-node 6 7 t))))
+ ;; `treesit-node-eq'.
+ (should (treesit-node-eq root-node root-node))
+ (should (not (treesit-node-eq root-node doc-node))))))
+
+(ert-deftest treesit-query-api ()
+ "Tests for query API."
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+
+ ;; Test `treesit-query-capture' on string, sexp and compiled
+ ;; queries.
+ (dolist (query1
+ ;; String query.
+ '("(string) @string
+(pair key: (_) @keyword)
+((_) @bob (#match \"^B.b$\" @bob))
+(number) @number
+((number) @n3 (#equal \"3\" @n3)) "
+ ;; Sexp query.
+ ((string) @string
+ (pair key: (_) @keyword)
+ ((_) @bob (:match "^B.b$" @bob))
+ (number) @number
+ ((number) @n3 (:equal "3" @n3)))))
+ ;; Test `treesit-query-compile'.
+ (dolist (query (list query1
+ (treesit-query-compile 'json query1)))
+ (should
+ (equal
+ '((number . "1") (number . "2")
+ (keyword . "\"name\"")
+ (string . "\"name\"")
+ (string . "\"Bob\"")
+ (bob . "Bob")
+ (number . "3")
+ (n3 . "3"))
+ (mapcar (lambda (entry)
+ (cons (car entry)
+ (treesit-node-text
+ (cdr entry))))
+ (treesit-query-capture root-node query))))))
+ ;; Test `treesit-query-expand'.
+ (should
+ (equal
+ "(type field: (_) @capture .) ? * + \"return\""
+ (treesit-query-expand
+ '((type field: (_) @capture :anchor)
+ :? :* :+ "return")))))))
+
+(ert-deftest treesit-narrow ()
+ "Tests if narrowing works."
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx")
+ (narrow-to-region (+ (point-min) 3) (- (point-max) 3))
+ (setq parser (treesit-parser-create 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+ ;; This test is from the basic test.
+ (should
+ (equal
+ (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))"))
+
+ (widen)
+ (goto-char (point-min))
+ (insert "ooo")
+ (should (equal "oooxxx[1,{\"name\": \"Bob\"},2,3]xxx"
+ (buffer-string)))
+ (delete-region 10 26)
+ (should (equal "oooxxx[1,2,3]xxx"
+ (buffer-string)))
+ (narrow-to-region (+ (point-min) 6) (- (point-max) 3))
+ ;; This test is also from the basic test.
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (number) (number)))"))
+ (widen)
+ (goto-char (point-max))
+ (insert "[1,2]")
+ (should (equal "oooxxx[1,2,3]xxx[1,2]"
+ (buffer-string)))
+ (narrow-to-region (- (point-max) 5) (point-max))
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (number)))"))
+ (widen)
+ (goto-char (point-min))
+ (insert "[1]")
+ (should (equal "[1]oooxxx[1,2,3]xxx[1,2]"
+ (buffer-string)))
+ (narrow-to-region (point-min) (+ (point-min) 3))
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number)))")))))
+
+(ert-deftest treesit-cross-boundary ()
+ "Tests for cross-boundary edits.
+Cross-boundary means crossing visible_beg and visible_end. We
+don't test if parser parses correctly, instead we just check
+edits like this don't produce assertion errors. (I inserted a
+bunch of assertions that checks e.g. visible_beg <=
+visible_end.)"
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx")
+ (narrow-to-region (+ (point-min) 3) (- (point-max) 3))
+ (setq parser (treesit-parser-create 'json))
+ ;; Now visible_beg/end = visible boundary.
+ (setq root-node (treesit-parser-root-node parser)))
+ ;; Now parser knows the content of the visible region.
+ (widen)
+ ;; Now visible_beg/end don't change, but visible region expanded.
+ (delete-region 1 7)
+ ;; (1) This change is across visible_beg. I expect
+ ;; ts_record_change to receive (start=1, old_end=7, new_end=1).
+ (treesit-parser-root-node parser)
+ ;; Above form forces a parse which calls
+ ;; `ts_ensure_position_synced'. Now visible_beg/end matches the
+ ;; visible region (whole buffer). We want to test that this
+ ;; doesn't cause assertion error.
+
+ (should (equal "{\"name\": \"Bob\"},2,3]xxx" (buffer-string)))
+ (narrow-to-region 1 16)
+ (should (equal "{\"name\": \"Bob\"}" (buffer-string)))
+ (treesit-parser-root-node parser)
+ ;; Call `ts_ensure_position_synced' again to update visible_beg/end.
+ (widen)
+ (goto-char 14)
+ (insert "by")
+ ;; (2) This change is inside [visible_beg, visible_end].
+ (should (equal "{\"name\": \"Bobby\"},2,3]xxx" (buffer-string)))
+ (delete-region 14 23)
+ ;; This delete is across visible_end.
+ (should (equal "{\"name\": \"Bobxxx" (buffer-string)))
+ (treesit-parser-root-node parser)
+ ;; visible_beg/end synced.
+
+ (narrow-to-region 3 7)
+ (should (equal "name" (buffer-string)))
+ (treesit-parser-root-node parser)
+ ;; visible_beg/end synced.
+ (widen)
+ (goto-char (point-min))
+ (insert "zzz")
+ (should (equal "zzz{\"name\": \"Bobxxx" (buffer-string)))
+ ;; (3) Test inserting before visible_beg.
+ (treesit-parser-root-node parser)
+ ;; visible_beg/end synced.
+
+ (narrow-to-region 4 11)
+ (should (equal "{\"name\"" (buffer-string)))
+ (treesit-parser-root-node parser)
+ ;; visible_beg/end synced.
+ (widen)
+ (goto-char (point-max))
+ (insert "yyy")
+ ;; (4) This change is after visible_end.
+ (treesit-parser-root-node parser)
+ ;; Sync up visible_beg/end.
+ (should (equal "zzz{\"name\": \"Bobxxxyyy" (buffer-string)))
+
+ (narrow-to-region 1 17)
+ (should (equal "zzz{\"name\": \"Bob" (buffer-string)))
+ (treesit-parser-root-node parser)
+ ;; Sync up visible_beg/end.
+ (widen)
+ (delete-region 13 (point-max))
+ (treesit-parser-root-node parser)
+ ;; Sync up visible_beg/end.
+ (should (equal "zzz{\"name\": " (buffer-string)))
+ ;; Ideally we want to also test the case where we delete and
+ ;; insert simultaneously, but the only such use is in
+ ;; `casify_region', all others either only inserts or only
+ ;; deletes. I'll leave it to someone to try to write a test
+ ;; that calls that.
+ )))
+
+(ert-deftest treesit-range ()
+ "Tests if range works."
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "[[1],oooxxx[1,2,3],xxx[1,2]]")
+ (setq parser (treesit-parser-create 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+ (should-error
+ (treesit-parser-set-included-ranges
+ parser '((1 . 6) (5 . 20)))
+ :type '(treesit-range-invalid))
+
+ (treesit-parser-set-included-ranges
+ parser '((1 . 6) (12 . 20) (23 . 29)))
+ (should (equal '((1 . 6) (12 . 20) (23 . 29))
+ (treesit-parser-included-ranges parser)))
+ (should (equal "(document (array (array (number)) (array (number) (number) (number)) (array (number) (number))))"
+ (treesit-node-string
+ (treesit-parser-root-node parser))))
+ ;; TODO: More tests.
+ )))
+
+(ert-deftest treesit-multi-lang ()
+ "Tests if parsing multiple language works."
+ (with-temp-buffer
+ (let (html css js html-range css-range js-range)
+ (progn
+ (insert "<html><script>1</script><style>body {}</style></html>")
+ (setq html (treesit-parser-create 'html))
+ (setq css (treesit-parser-create 'css))
+ (setq js (treesit-parser-create 'javascript)))
+ ;; JavaScript.
+ (setq js-range
+ (treesit-query-range
+ 'html
+ '((script_element (raw_text) @capture))))
+ (should (equal '((15 . 16)) js-range))
+ (treesit-parser-set-included-ranges js js-range)
+ (should (equal "(program (expression_statement (number)))"
+ (treesit-node-string
+ (treesit-parser-root-node js))))
+ ;; CSS.
+ (setq css-range
+ (treesit-query-range
+ 'html
+ '((style_element (raw_text) @capture))))
+ (should (equal '((32 . 39)) css-range))
+ (treesit-parser-set-included-ranges css css-range)
+ (should
+ (equal "(stylesheet (rule_set (selectors (tag_name)) (block)))"
+ (treesit-node-string
+ (treesit-parser-root-node css))))
+ ;; TODO: More tests.
+ )))
+
+(ert-deftest treesit-parser-supplemental ()
+ "Supplemental node functions."
+ ;; `treesit-parse-string'.
+ (should (equal (treesit-node-string
+ (treesit-parse-string
+ "[1,2,{\"name\": \"Bob\"},3]"
+ 'json))
+ "(document (array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number)))"))
+ (with-temp-buffer
+ (let (parser root-node doc-node object-node pair-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create 'json))
+ (setq root-node (treesit-parser-root-node
+ parser))
+ (setq doc-node (treesit-node-child root-node 0)))
+ ;; `treesit-language-at'.
+ (should (equal (treesit-language-at (point))
+ 'json))
+ ;; `treesit-set-ranges', `treesit-get-ranges'.
+ (treesit-set-ranges 'json
+ '((1 . 2)))
+ (should (equal (treesit-get-ranges 'json)
+ '((1 . 2)))))))
+
+(ert-deftest treesit-node-supplemental ()
+ "Supplemental node functions."
+ (let (parser root-node doc-node array-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create 'json))
+ (setq root-node (treesit-parser-root-node
+ parser))
+ (setq doc-node (treesit-node-child root-node 0)))
+ ;; `treesit-node-buffer'.
+ (should (equal (treesit-node-buffer root-node)
+ (current-buffer)))
+ ;; `treesit-node-language'.
+ (should (eq (treesit-node-language root-node)
+ 'json))
+ ;; `treesit-node-at'.
+ (should (equal (treesit-node-string
+ (treesit-node-at 1 'json))
+ "(\"[\")"))
+ ;; `treesit-node-on'
+ (should (equal (treesit-node-string
+ (treesit-node-on 1 2 'json))
+ "(\"[\")"))
+ ;; `treesit-buffer-root-node'.
+ (should (treesit-node-eq
+ (treesit-buffer-root-node 'json)
+ root-node))
+ ;; `treesit-filter-child'.
+ (should (equal (mapcar
+ (lambda (node)
+ (treesit-node-type node))
+ (treesit-filter-child
+ doc-node (lambda (node)
+ (treesit-node-check node 'named))))
+ '("number" "number" "object" "number")))
+ ;; `treesit-node-text'.
+ (should (equal (treesit-node-text doc-node)
+ "[1,2,{\"name\": \"Bob\"},3]"))
+ ;; `treesit-node-index'.
+ (should (eq (treesit-node-index doc-node)
+ 0))
+ ;; TODO:
+ ;; `treesit-parent-until'
+ ;; `treesit-parent-while'
+ ;; `treesit-node-children'
+ ;; `treesit-node-field-name'
+ ;; `treesit-search-forward-goto'
+ ))
+
+;; TODO
+;; - Functions in treesit.el
+;; - treesit-load-name-override-list
+;; - treesit-search-subtree
+;; - treesit-search-forward
+;; - treesit-induce-sparse-tree
+;; - treesit-search-forward
+
+
+(provide 'treesit-tests)
+;;; treesit-tests.el ends here