diff options
Diffstat (limited to 'lisp/treesit.el')
-rw-r--r-- | lisp/treesit.el | 935 |
1 files changed, 935 insertions, 0 deletions
diff --git a/lisp/treesit.el b/lisp/treesit.el new file mode 100644 index 00000000000..bb13021a274 --- /dev/null +++ b/lisp/treesit.el @@ -0,0 +1,935 @@ +;;; treesit.el --- tree-sitter utilities -*- lexical-binding: t -*- + +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: +;; +;; Note to self: we don't create parsers automatically in any provided +;; functions if we don't know what language to use. + +;;; Code: + +(eval-when-compile (require 'cl-lib)) +(eval-when-compile (require 'subr-x)) ; For `string-join'. +(require 'cl-seq) +(require 'font-lock) + +;;; Activating tree-sitter + +(defgroup treesit + nil + "Tree-sitter is an incremental parser." + :group 'tools) + +(defcustom treesit-max-buffer-size (* 4 1024 1024) + "Maximum buffer size for enabling tree-sitter parsing." + :type 'integer) + +(defun treesit-available-p () + "Return non-nil if tree-sitter features are available." + (fboundp 'treesit-parser-create)) + +(defun treesit-can-enable-p () + "Return non-nil if current buffer can activate tree-sitter. +Currently this function checks whether tree-sitter is available +and the buffer size." + (and (treesit-available-p) + (< (buffer-size) treesit-max-buffer-size))) + +;;; Parser API supplement + +(defun treesit-parse-string (string language) + "Parse STRING using a parser for LANGUAGE. +Return the root node of the syntax tree." + (with-temp-buffer + (insert string) + (treesit-parser-root-node + (treesit-parser-create language)))) + +(defun treesit-language-at (point) + "Return the language used at POINT." + (cl-loop for parser in (treesit-parser-list) + if (treesit-node-on point point parser) + return (treesit-parser-language parser))) + +(defun treesit-set-ranges (parser-or-lang ranges) + "Set the ranges of PARSER-OR-LANG to RANGES." + (treesit-parser-set-included-ranges + (cond ((symbolp parser-or-lang) + (or (treesit-parser-create parser-or-lang) + (error "Cannot find a parser for %s" parser-or-lang))) + ((treesit-parser-p parser-or-lang) + parser-or-lang) + (t (error "Expecting a parser or language, but got %s" + parser-or-lang))) + ranges)) + +(defun treesit-get-ranges (parser-or-lang) + "Get the ranges of PARSER-OR-LANG." + (treesit-parser-included-ranges + (cond ((symbolp parser-or-lang) + (or (treesit-parser-create parser-or-lang) + (error "Cannot find a parser for %s" parser-or-lang))) + ((treesit-parser-p parser-or-lang) + parser-or-lang) + (t (error "Expecting a parser or language, but got %s" + parser-or-lang))))) + +;;; Node API supplement + +(defun treesit-node-buffer (node) + "Return the buffer in where NODE belongs." + (treesit-parser-buffer + (treesit-node-parser node))) + +(defun treesit-node-language (node) + "Return the language symbol that NODE's parser uses." + (treesit-parser-language + (treesit-node-parser node))) + +(defun treesit-node-at (point &optional parser-or-lang named) + "Return the smallest node that starts at or after POINT. + +\"Starts at or after POINT\" means the start of the node is +greater or larger than POINT. Return nil if none find. If NAMED +non-nil, only look for named node. + +If PARSER-OR-LANG is nil, use the first parser in +(`treesit-parser-list'); if PARSER-OR-LANG is a parser, use +that parser; if PARSER-OR-LANG is a language, find a parser using +that language in the current buffer, and use that." + (let ((node (if (treesit-parser-p parser-or-lang) + (treesit-parser-root-node parser-or-lang) + (treesit-buffer-root-node parser-or-lang)))) + ;; TODO: We might want a `treesit-node-descendant-for-pos' in C. + (while (cond ((and node (< (treesit-node-end node) point)) + (setq node (treesit-node-next-sibling node)) + t) + ((treesit-node-child node 0 named) + (setq node (treesit-node-child node 0 named)) + t))) + node)) + +(defun treesit-node-on (beg end &optional parser-or-lang named) + "Return the smallest node covering BEG to END. + +BEWARE! Calling this function on an empty line that is not +inside any top-level construct (function definition, etc) most +probably will give you the root node, because the root node is +the smallest node that covers that empty line. You probably want +to use `treesit-node-at' instead. + +Return nil if none find. If NAMED non-nil, only look for named +node. + +If PARSER-OR-LANG is nil, use the first parser in +(`treesit-parser-list'); if PARSER-OR-LANG is a parser, use +that parser; if PARSER-OR-LANG is a language, find a parser using +that language in the current buffer, and use that." + (let ((root (if (treesit-parser-p parser-or-lang) + (treesit-parser-root-node parser-or-lang) + (treesit-buffer-root-node parser-or-lang)))) + (treesit-node-descendant-for-range root beg (or end beg) named))) + +(defun treesit-buffer-root-node (&optional language) + "Return the root node of the current buffer. +Use the first parser in (`treesit-parser-list'), if LANGUAGE is +non-nil, use the first parser for LANGUAGE." + (if-let ((parser + (or (if language + (or (treesit-parser-create language) + (error "Cannot find a parser for %s" language)) + (or (car (treesit-parser-list)) + (error "Buffer has no parser")))))) + (treesit-parser-root-node parser))) + +(defun treesit-filter-child (node pred &optional named) + "Return children of NODE that satisfies PRED. +PRED is a function that takes one argument, the child node. If +NAMED non-nil, only search for named node." + (let ((child (treesit-node-child node 0 named)) + result) + (while child + (when (funcall pred child) + (push child result)) + (setq child (treesit-node-next-sibling child named))) + (reverse result))) + +(defun treesit-node-text (node &optional no-property) + "Return the buffer (or string) content corresponding to NODE. +If NO-PROPERTY is non-nil, remove text properties." + (when node + (with-current-buffer (treesit-node-buffer node) + (if no-property + (buffer-substring-no-properties + (treesit-node-start node) + (treesit-node-end node)) + (buffer-substring + (treesit-node-start node) + (treesit-node-end node)))))) + +(defun treesit-parent-until (node pred) + "Return the closest parent of NODE that satisfies PRED. +Return nil if none found. PRED should be a function that takes +one argument, the parent node." + (let ((node (treesit-node-parent node))) + (while (and node (not (funcall pred node))) + (setq node (treesit-node-parent node))) + node)) + +(defun treesit-parent-while (node pred) + "Return the furthest parent of NODE that satisfies PRED. +Return nil if none found. PRED should be a function that takes +one argument, the parent node." + (let ((last nil)) + (while (and node (funcall pred node)) + (setq last node + node (treesit-node-parent node))) + last)) + +(defalias 'treesit-traverse-parent #'treesit-parent-until) + +(defun treesit-node-children (node &optional named) + "Return a list of NODE's children. +If NAMED is non-nil, collect named child only." + (mapcar (lambda (idx) + (treesit-node-child node idx named)) + (number-sequence + 0 (1- (treesit-node-child-count node named))))) + +(defun treesit-node-index (node &optional named) + "Return the index of NODE in its parent. +If NAMED is non-nil, count named child only." + (let ((count 0)) + (while (setq node (treesit-node-prev-sibling node named)) + (cl-incf count)) + count)) + +(defun treesit-node-field-name (node) + "Return the field name of NODE as a child of its parent." + (when-let ((parent (treesit-node-parent node)) + (idx (treesit-node-index node))) + (treesit-node-field-name-for-child parent idx))) + +;;; Query API supplement + +(defun treesit-query-string (string query language) + "Query STRING with QUERY in LANGUAGE. +See `treesit-query-capture' for QUERY." + (with-temp-buffer + (insert string) + (let ((parser (treesit-parser-create language))) + (treesit-query-capture + (treesit-parser-root-node parser) + query)))) + +(defun treesit-query-range (source query &optional beg end) + "Query the current buffer and return ranges of captured nodes. + +QUERY, SOURCE, BEG, END are the same as in +`treesit-query-in'. This function returns a list +of (START . END), where START and END specifics the range of each +captured node. Capture names don't matter." + (cl-loop for capture + in (treesit-query-capture source query beg end) + for node = (cdr capture) + collect (cons (treesit-node-start node) + (treesit-node-end node)))) + +;;; Range API supplement + +(defvar-local treesit-range-functions nil + "A list of range functions. +Font-locking and indenting code uses functions in this alist to +set correct ranges for a language parser before using it. + +The signature of each function should be + + (start end &rest _) + +where START and END marks the region that is about to be used. A +range function only need to (but not limited to) update ranges in +that region. + +Each function in the list is called in-order.") + +(defun treesit-update-ranges (&optional start end) + "Update the ranges for each language in the current buffer. +Calls each range functions in `treesit-range-functions' +in-order. START and END are passed to each range function." + (dolist (range-fn treesit-range-functions) + (funcall range-fn (or start (point-min)) (or end (point-max))))) + +;;; Font-lock + +(define-error 'treesit-font-lock-error + "Generic tree-sitter font-lock error" + 'treesit-error) + +(defvar-local treesit-font-lock-settings nil + "A list of SETTINGs for treesit-based fontification. + +The exact format of this variable is considered internal. One +should always use `treesit-font-lock-rules' to set this variable. + +Each SETTING is of form + + (LANGUAGE QUERY OVERRIDE) + +Each SETTING controls one parser (often of different language). +LANGUAGE is the language symbol. See Info node `(elisp)Language +Definitions'. + +QUERY is either a string query, a sexp query, or a compiled +query. See Info node `(elisp)Pattern Matching' for how to write +a query in either string or s-expression form. When using +repeatedly, a compiled query is much faster than a string or sexp +one, so it is recommend to compile your queries if it will be +used over and over. + +OVERRIDE is the override flag for this query. Its value can be +t, nil, append, prepend, keep. See more in +`treesit-font-lock-rules'.") + +(defun treesit-font-lock-rules (&rest args) + "Return a value suitable for `treesit-font-lock-settings'. + +Take a series of QUERIES in either string, s-expression or +compiled form. Same as in `treesit-font-lock-settings', for each +query, captured nodes are highlighted with the capture name as +its face. + +Before each QUERY there could be :KEYWORD VALUE pairs that +configure the query (and only that query). For example, + + (treesit-font-lock-rules + :language \\='javascript + :override t + \\='((true) @font-lock-constant-face + (false) @font-lock-constant-face) + :language \\='html + \"(script_element) @font-lock-builtin-face\") + +For each QUERY, a :language keyword is required. Other keywords +include: + + KEYWORD VALUE DESCRIPTION + :override nil If the region already has a face, + discard the new face + t Always apply the new face + append Append the new face to existing ones + prepend Prepend the new face to existing ones + keep Fill-in regions without an existing face + +Capture names in QUERY should be face names like +`font-lock-keyword-face'. The captured node will be fontified +with that face. Capture names can also be function names, in +which case the function is called with (START END NODE), where +START and END are the start and end position of the node in +buffer, and NODE is the tree-sitter node object. If a capture +name is both a face and a function, the face takes priority. If +a capture name is not a face name nor a function name, it is +ignored. + +\(fn :KEYWORD VALUE QUERY...)" + (let (;; Tracks the current language that following queries will + ;; apply to. + (current-language nil) + ;; Tracks :override flag. + (current-override nil) + ;; The list this function returns. + (result nil)) + (while args + (let ((token (pop args))) + (pcase token + (:language + (let ((lang (pop args))) + (when (or (not (symbolp lang)) (null lang)) + (signal 'wrong-type-argument `(symbolp ,lang))) + (setq current-language lang))) + (:override + (let ((flag (pop args))) + (when (not (memq flag '(t nil append prepend keep))) + (signal 'wrong-type-argument + `((or t nil append prepend keep) + ,flag))) + (setq current-override flag))) + ((pred treesit-query-p) + (when (null current-language) + (signal 'treesit-font-lock-error + `("Language unspecified, use :language keyword to specify a language for this query" ,token))) + (if (treesit-compiled-query-p token) + (push `(,current-language token) result) + (push `(,current-language + ,(treesit-query-compile current-language token) + ,current-override) + result)) + ;; Clears any configurations set for this query. + (setq current-language nil + current-override nil)) + (_ (signal 'treesit-font-lock-error + `("Unexpected value" token)))))) + (nreverse result))) + +(defun treesit-font-lock-fontify-region + (start end &optional loudly) + "Fontify the region between START and END. +If LOUDLY is non-nil, message some debugging information." + (treesit-update-ranges start end) + (font-lock-unfontify-region start end) + (dolist (setting treesit-font-lock-settings) + (let* ((language (nth 0 setting)) + (match-pattern (nth 1 setting)) + (override (nth 2 setting)) + (parser (treesit-parser-create language))) + (when-let ((node (treesit-node-on start end parser))) + (let ((captures (treesit-query-capture + node match-pattern + ;; Specifying the range is important. More + ;; often than not, NODE will be the root + ;; node, and if we don't specify the range, + ;; we are basically querying the whole file. + start end)) + (inhibit-point-motion-hooks t)) + (with-silent-modifications + (dolist (capture captures) + (let* ((face (car capture)) + (node (cdr capture)) + (start (treesit-node-start node)) + (end (treesit-node-end node))) + (cond + ((facep face) + (pcase override + ('nil (unless (text-property-not-all + start end 'face nil) + (put-text-property start end 'face face))) + ('t (put-text-property start end 'face face)) + ('append (font-lock-append-text-property + start end 'face face)) + ('prepend (font-lock-prepend-text-property + start end 'face face)) + ('keep (font-lock-fillin-text-property + start end 'face face)) + (_ (signal 'treesit-font-lock-error + (list + "Unrecognized value of :override option" + override))))) + ((functionp face) + (funcall face start end node))) + ;; Don't raise an error if FACE is neither a face nor + ;; a function. This is to allow intermediate capture + ;; names used for #match and #eq. + (when loudly + (message "Fontifying text from %d to %d, Face: %s Language: %s" + start end face language))))))))) + ;; Call regexp font-lock after tree-sitter, as it is usually used + ;; for custom fontification. + (let ((font-lock-unfontify-region-function #'ignore)) + (funcall #'font-lock-default-fontify-region start end loudly))) + +(defun treesit-font-lock-enable () + "Enable tree-sitter font-locking for the current buffer." + (setq-local font-lock-fontify-region-function + #'treesit-font-lock-fontify-region) + ;; If we don't set `font-lock-defaults' to some non-nil value, + ;; font-lock doesn't enable properly (the font-lock-mode-internal + ;; doesn't run). See `font-lock-add-keywords'. + (when (and font-lock-mode + (null font-lock-keywords) + (null font-lock-defaults)) + (font-lock-mode -1) + (setq-local font-lock-defaults '(nil t)) + (font-lock-mode 1))) + +;;; Indent + +(defvar treesit--indent-verbose nil + "If non-nil, log progress when indenting.") + +;; This is not bound locally like we normally do with major-mode +;; stuff, because for tree-sitter, a buffer could contain more than +;; one language. +(defvar treesit-simple-indent-rules nil + "A list of indent rule settings. +Each indent rule setting should be (LANGUAGE . RULES), +where LANGUAGE is a language symbol, and RULES is a list of + + (MATCHER ANCHOR OFFSET). + +MATCHER determines whether this rule applies, ANCHOR and OFFSET +together determines which column to indent to. + +A MATCHER is a function that takes three arguments (NODE PARENT +BOL). BOL is the point where we are indenting: the beginning of +line content, the position of the first non-whitespace character. +NODE is the largest (highest-in-tree) node starting at that +point. PARENT is the parent of NODE. + +If MATCHER returns non-nil, meaning the rule matches, Emacs then +uses ANCHOR to find an anchor, it should be a function that takes +the same argument (NODE PARENT BOL) and returns a point. + +Finally Emacs computes the column of that point returned by ANCHOR +and adds OFFSET to it, and indents to that column. + +For MATCHER and ANCHOR, Emacs provides some convenient presets. +See `treesit-simple-indent-presets'.") + +(defvar treesit-simple-indent-presets + '((match . (lambda + (&optional node-type parent-type node-field + node-index-min node-index-max) + `(lambda (node parent bol &rest _) + (and (or (null ,node-type) + (equal (treesit-node-type node) + ,node-type)) + (or (null ,parent-type) + (equal (treesit-node-type parent) + ,parent-type)) + (or (null ,node-field) + (equal (treesit-node-field-name node) + ,node-field)) + (or (null ,node-index-min) + (>= (treesit-node-index node t) + ,node-index-min)) + (or (null ,node-index-max) + (<= (treesit-node-index node t) + ,node-index-max)))))) + (no-node . (lambda (node parent bol &rest _) (null node))) + (parent-is . (lambda (type) + `(lambda (node parent bol &rest _) + (equal ,type (treesit-node-type parent))))) + + (node-is . (lambda (type) + `(lambda (node parent bol &rest _) + (equal ,type (treesit-node-type node))))) + + (query . (lambda (pattern) + `(lambda (node parent bol &rest _) + (cl-loop for capture + in (treesit-query-capture + parent ,pattern) + if (treesit-node-eq node (cdr capture)) + return t + finally return nil)))) + (first-sibling . (lambda (node parent bol &rest _) + (treesit-node-start + (treesit-node-child parent 0 t)))) + + (parent . (lambda (node parent bol &rest _) + (treesit-node-start parent))) + (parent-bol . (lambda (node parent bol &rest _) + (save-excursion + (goto-char (treesit-node-start parent)) + (back-to-indentation) + (point)))) + (prev-sibling . (lambda (node parent bol &rest _) + (treesit-node-start + (treesit-node-prev-sibling node)))) + (no-indent . (lambda (node parent bol &rest _) bol)) + (prev-line . (lambda (node parent bol &rest _) + (save-excursion + (goto-char bol) + (forward-line -1) + (skip-chars-forward " \t"))))) + "A list of presets. +These presets that can be used as MATHER and ANCHOR in +`treesit-simple-indent-rules'. + +MATCHER: + +\(match NODE-TYPE PARENT-TYPE NODE-FIELD NODE-INDEX-MIN NODE-INDEX-MAX) + + NODE-TYPE checks for node's type, PARENT-TYPE checks for + parent's type, NODE-FIELD checks for the filed name of node + in the parent, NODE-INDEX-MIN and NODE-INDEX-MAX checks for + the node's index in the parent. Therefore, to match the + first child where parent is \"argument_list\", use + + (match nil \"argument_list\" nil nil 0 0). + +no-node + + Matches the case where node is nil, i.e., there is no node + that starts at point. This is the case when indenting an + empty line. + +\(parent-is TYPE) + + Check that the parent has type TYPE. + +\(node-is TYPE) + + Checks that the node has type TYPE. + +\(query QUERY) + + Queries the parent node with QUERY, and checks if the node + is captured (by any capture name). + +ANCHOR: + +first-sibling + + Find the first child of the parent. + +parent + + Find the parent. + +parent-bol + + Find the beginning of non-space characters on the line where + the parent is on. + +prev-sibling + + Find node's previous sibling. + +no-indent + + Do nothing. + +prev-line + + The first non-whitespace charater on the previous line.") + +(defun treesit--simple-apply (fn args) + "Apply ARGS to FN. + +If FN is a key in `treesit-simple-indent-presets', use the +corresponding value as the function." + ;; We don't want to match uncompiled lambdas, so make sure this cons + ;; is not a function. We could move the condition functionp + ;; forward, but better be explicit. + (cond ((and (consp fn) (not (functionp fn))) + (apply (treesit--simple-apply (car fn) (cdr fn)) + ;; We don't evaluate ARGS with `simple-apply', i.e., + ;; no composing, better keep it simple. + args)) + ((and (symbolp fn) + (alist-get fn treesit-simple-indent-presets)) + (apply (alist-get fn treesit-simple-indent-presets) + args)) + ((functionp fn) (apply fn args)) + (t (error "Couldn't find the function corresponding to %s" fn)))) + +;; This variable might seem unnecessary: why split +;; `treesit-indent' and `treesit-simple-indent' into two +;; functions? We add this variable in between because later we might +;; add more powerful indentation engines, and that new engine can +;; probably share `treesit-indent'. It is also useful, suggested +;; by Stefan M, to have a function that figures out how much to indent +;; but doesn't actually performs the indentation, because we might +;; want to know where will a node indent to if we put it at some other +;; location, and use that information to calculate the actual +;; indentation. And `treesit-simple-indent' is that function. I +;; forgot the example Stefan gave, but it makes a lot of sense. +(defvar treesit-indent-function #'treesit-simple-indent + "Function used by `treesit-indent' to do some of the work. + +This function is called with + + (NODE PARENT BOL &rest _) + +and returns + + (ANCHOR . OFFSET). + +BOL is the position of the beginning of the line; NODE is the +\"largest\" node that starts at BOL; PARENT is its parent; ANCHOR +is a point (not a node), and OFFSET is a number. Emacs finds the +column of ANCHOR and adds OFFSET to it as the final indentation +of the current line.") + +(defun treesit-indent () + "Indent according to the result of `treesit-indent-function'." + (treesit-update-ranges) + (let* ((orig-pos (point)) + (bol (save-excursion + (forward-line 0) + (skip-chars-forward " \t") + (point))) + (smallest-node + (cl-loop for parser in (treesit-parser-list) + for node = (treesit-node-at bol parser) + if node return node)) + (node (treesit-parent-while + smallest-node + (lambda (node) + (eq bol (treesit-node-start node)))))) + (pcase-let* + ((parser (if smallest-node + (treesit-node-parser smallest-node) + nil)) + ;; NODE would be nil if BOL is on a whitespace. In that case + ;; we set PARENT to the "node at point", which would + ;; encompass the whitespace. + (parent (cond ((and node parser) + (treesit-node-parent node)) + (parser + (treesit-node-at bol parser)) + (t nil))) + (`(,anchor . ,offset) + (funcall treesit-indent-function node parent bol))) + (if (null anchor) + (when treesit--indent-verbose + (message "Failed to find the anchor")) + (let ((col (+ (save-excursion + (goto-char anchor) + (current-column)) + offset))) + (if (< bol orig-pos) + (save-excursion + (indent-line-to col)) + (indent-line-to col))))))) + +(defun treesit-simple-indent (node parent bol) + "Calculate indentation according to `treesit-simple-indent-rules'. + +BOL is the position of the first non-whitespace character on the +current line. NODE is the largest node that starts at BOL, +PARENT is NODE's parent. + +Return (ANCHOR . OFFSET) where ANCHOR is a node, OFFSET is the +indentation offset, meaning indent to align with ANCHOR and add +OFFSET." + (if (null parent) + (when treesit--indent-verbose + (message "PARENT is nil, not indenting")) + (let* ((language (treesit-node-language parent)) + (rules (alist-get language + treesit-simple-indent-rules))) + (cl-loop for rule in rules + for pred = (nth 0 rule) + for anchor = (nth 1 rule) + for offset = (nth 2 rule) + if (treesit--simple-apply + pred (list node parent bol)) + do (when treesit--indent-verbose + (message "Matched rule: %S" rule)) + and + return (cons (treesit--simple-apply + anchor (list node parent bol)) + offset))))) + +(defun treesit-check-indent (mode) + "Check current buffer's indentation against a major mode MODE. + +Pop up a diff buffer showing the difference. Correct +indentation (target) is in green, current indentation is in red." + (interactive "CTarget major mode: ") + (let ((source-buf (current-buffer))) + (with-temp-buffer + (insert-buffer-substring source-buf) + (funcall mode) + (indent-region (point-min) (point-max)) + (diff-buffers source-buf (current-buffer))))) + +;;; Search + +(defun treesit-search-forward-goto + (predicate side &optional all backward up) + "Search forward for a node and move to it. + +Stops at the first node after point that matches PREDICATE. +PREDICATE can be either a regexp that matches against each node's +type case-insensitively, or a function that takes a node and +returns nil/non-nil for match/no match. + +If a node matches, move to that node and return the node, +otherwise return nil. SIDE controls whether we move to the start +or end of the matches node, it can be either \\='start or +\\='end. + +ALL, BACKWARD, and UP are the same as in `treesit-search-forward'." + (let ((node (treesit-node-at (point))) + (start (point))) + ;; When searching forward, it is possible for (point) < start, + ;; because `treesit-search-forward' goes to parents. + (while (and node (if backward + (>= (point) start) + (<= (point) start))) + (setq node (treesit-search-forward + node predicate all backward up)) + (if-let ((pos (pcase side + ('start (treesit-node-start node)) + ('end (treesit-node-end node))))) + (goto-char pos))) + ;; If we made reverse progress, go back to where we started. + (when (if backward + (>= (point) start) + (<= (point) start)) + (goto-char start)) + node)) + +;;; Debugging + +(defvar-local treesit--inspect-name nil + "treesit-inspect-mode uses this to show node name in mode-line.") + +(defun treesit-inspect-node-at-point (&optional arg) + "Show information of the node at point. +If called interactively, show in echo area, otherwise set +`treesit--inspect-name' (which will appear in the mode-line +if `treesit-inspect-mode' is enabled). Uses the first parser +in (`treesit-parser-list')." + (interactive "p") + ;; NODE-LIST contains all the node that starts at point. + (let* ((node-list + (cl-loop for node = (treesit-node-at (point)) + then (treesit-node-parent node) + while node + if (eq (treesit-node-start node) + (point)) + collect node)) + (largest-node (car (last node-list))) + (parent (treesit-node-parent largest-node)) + ;; node-list-acending contains all the node bottom-up, then + ;; the parent. + (node-list-acending + (if (null largest-node) + ;; If there are no nodes that start at point, just show + ;; the node at point and its parent. + (list (treesit-node-at (point)) + (treesit-node-parent + (treesit-node-at (point)))) + (append node-list (list parent)))) + (name "")) + ;; We draw nodes like (parent field-name: (node)) recursively, + ;; so it could be (node1 field-name: (node2 field-name: (node3))). + (dolist (node node-list-acending) + (setq + name + (concat + (if (treesit-node-field-name node) + (format " %s: " (treesit-node-field-name node)) + " ") + (if (treesit-node-check node 'named) "(" "\"") + (or (treesit-node-type node) + "N/A") + name + (if (treesit-node-check node 'named) ")" "\"")))) + (setq treesit--inspect-name name) + (force-mode-line-update) + (when arg + (if node-list + (message "%s" treesit--inspect-name) + (message "No node at point"))))) + +(define-minor-mode treesit-inspect-mode + "Shows the node that _starts_ at point in the mode-line. + +The mode-line displays + + PARENT FIELD-NAME: (CHILD FIELD_NAME: (GRAND-CHILD (...))) + +CHILD, GRAND-CHILD, and GRAND-GRAND-CHILD, etc, are nodes that +have their beginning at point. And PARENT is the parent of +CHILD. + +If no node starts at point, i.e., point is in the middle of a +node, then we just display the smallest node that spans point and +its immediate parent. + +This minor mode doesn't create parsers on its own. It simply +uses the first parser in (`treesit-parser-list')." + :lighter nil + (if treesit-inspect-mode + (progn + (add-hook 'post-command-hook + #'treesit-inspect-node-at-point 0 t) + (add-to-list 'mode-line-misc-info + '(:eval treesit--inspect-name))) + (remove-hook 'post-command-hook + #'treesit-inspect-node-at-point t) + (setq mode-line-misc-info + (remove '(:eval treesit--inspect-name) + mode-line-misc-info)))) + +(defun treesit-query-validate (language query) + "Check if QUERY is valid for LANGUAGE. +If QUERY is invalid, display the query in a popup buffer, jumps +to the offending pattern and highlight the pattern." + (cl-assert (or (consp query) (stringp query))) + (let ((buf (get-buffer-create "*tree-sitter check query*"))) + (with-temp-buffer + (treesit-parser-create language) + (condition-case err + (progn (treesit-query-capture language query) + (message "QUERY is valid")) + (treesit-query-error + (with-current-buffer buf + (let* ((data (cdr err)) + (message (nth 0 data)) + (start (nth 1 data))) + (erase-buffer) + (insert (treesit-query-expand query)) + (goto-char start) + (search-forward " " nil t) + (put-text-property start (point) 'face 'error) + (message "%s" (buffer-substring start (point))) + (goto-char (point-min)) + (insert (format "%s: %d\n" message start)) + (forward-char start))) + (pop-to-buffer buf)))))) + +;;; Etc + +(declare-function find-library-name "find-func.el") +(defun treesit--check-manual-covarage () + "Print tree-sitter functions missing from the manual in message buffer." + (interactive) + (require 'find-func) + (let ((functions-in-source + (with-temp-buffer + (insert-file-contents (find-library-name "tree-sitter")) + (cl-remove-if + (lambda (name) (string-match "treesit--" name)) + (cl-sort + (save-excursion + (goto-char (point-min)) + (cl-loop while (re-search-forward + "^(defun \\([^ ]+\\)" nil t) + collect (match-string-no-properties 1))) + #'string<)))) + (functions-in-manual + (with-temp-buffer + (insert-file-contents (expand-file-name + "doc/lispref/parsing.texi" + source-directory)) + (insert-file-contents (expand-file-name + "doc/lispref/modes.texi" + source-directory)) + (cl-sort + (save-excursion + (goto-char (point-min)) + (cl-loop while (re-search-forward + "^@defun \\([^ ]+\\)" nil t) + collect (match-string-no-properties 1))) + #'string<)))) + (message "Missing: %s" + (string-join + (cl-remove-if + (lambda (name) (member name functions-in-manual)) + functions-in-source) + "\n")))) + +(provide 'treesit) + +;;; treesit.el ends here |