From 7c7950fe006fe19596011637610b934a786c1742 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Sun, 25 Dec 2022 10:22:40 -0800 Subject: Add maintainer stub for tree-sitter files * lisp/treesit.el: * src/treesit.c: Add maintainer. --- lisp/treesit.el | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lisp/treesit.el') diff --git a/lisp/treesit.el b/lisp/treesit.el index 24fb316fab9..f3e1afd943e 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -2,6 +2,10 @@ ;; Copyright (C) 2021-2022 Free Software Foundation, Inc. +;; Maintainer: 付禹安 (Yuan Fu) +;; Keywords: treesit, tree-sitter, languages +;; Package: emacs + ;; This file is part of GNU Emacs. ;; GNU Emacs is free software: you can redistribute it and/or modify -- cgit v1.2.3 From d90d7d15f2f78c37b9a5c775e617ab6f5cd5fb01 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Mon, 26 Dec 2022 01:39:02 -0800 Subject: ; Fix vindexes in parsing.texi * doc/lispref/parsing.texi (Tree-sitter major modes): Replace vindex with cross-reference to modes.texi. Add manual entry for treesit-defun-type-regexp. * lisp/treesit.el (treesit-defun-type-regexp): Use pred in docstring since we use pred everywhere else. --- doc/lispref/parsing.texi | 22 +++++++++++++++------- lisp/treesit.el | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'lisp/treesit.el') diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 4b9cd18dd47..5d1b11935cf 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1715,17 +1715,14 @@ This function activates some tree-sitter features for a major mode. Currently, it sets up the following features: @itemize -@vindex treesit-font-lock-settings @item -If @code{treesit-font-lock-settings} is non-@code{nil}, it sets up -fontification. +If @code{treesit-font-lock-settings} (@pxref{Parser-based Font Lock}) +is non-@code{nil}, it sets up fontification. -@vindex treesit-simple-indent-rules @item -If @code{treesit-simple-indent-rules} is non-@code{nil}, it sets up -indentation. +If @code{treesit-simple-indent-rules} (@pxref{Parser-based Font Lock}) +is non-@code{nil}, it sets up indentation. -@vindex treesit-defun-type-regexp @item If @code{treesit-defun-type-regexp} is non-@code{nil}, it sets up navigation functions for @code{beginning-of-defun} and @@ -1782,6 +1779,17 @@ node is a defun node but doesn't have a name, or the node is @code{nil}, it should return @code{nil}. @end defvar +@defvar treesit-defun-type-regexp +This variable determines which nodes are considered defuns by Emacs. +It can be a regexp that matches the type of defun nodes. + +Sometimes not all nodes matched by the regexp are valid defuns. +Therefore, this variable can also be a cons cell of the form +@w{(@var{regexp} . @var{pred})}, where @var{pred} should be a function +that takes a node as its argument, and returns @code{t} if the node is +valid defun, or @code{nil} if it is not valid. +@end defvar + @node Tree-sitter C API @section Tree-sitter C API Correspondence diff --git a/lisp/treesit.el b/lisp/treesit.el index f3e1afd943e..2130cd00616 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1622,7 +1622,7 @@ For example, \"(function|class)_definition\". Sometimes not all nodes matched by the regexp are valid defuns. In that case, set this variable to a cons cell of the -form (REGEXP . FILTER), where FILTER is a function that takes a +form (REGEXP . PRED), where PRED is a function that takes a node (the matched node) and returns t if node is valid, or nil for invalid node. -- cgit v1.2.3 From 5326b041982287514522f7f7930ff243d8d5cc70 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Tue, 27 Dec 2022 15:07:03 -0800 Subject: Improve treesit-node-top-level and treesit-parent-until * lisp/treesit.el (treesit-node-top-level): Now it can accept a predicate function. Add an optional argument INCLUDE-NODE. (treesit-parent-until): Add an optional argument INCLUDE-NODE. --- lisp/treesit.el | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'lisp/treesit.el') diff --git a/lisp/treesit.el b/lisp/treesit.el index 2130cd00616..675ecd85b08 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -234,19 +234,27 @@ is nil, try to guess the language at BEG using `treesit-language-at'." (or parser-or-lang (treesit-language-at beg)))))) (treesit-node-descendant-for-range root beg (or end beg) named))) -(defun treesit-node-top-level (node &optional type) +(defun treesit-node-top-level (node &optional pred include-node) "Return the top-level equivalent of NODE. + Specifically, return the highest parent of NODE that has the same type as it. If no such parent exists, return nil. -If TYPE is non-nil, match each parent's type with TYPE as a -regexp, rather than using NODE's type." - (let ((type (or type (treesit-node-type node))) +If PRED is non-nil, match each parent's type with PRED as a +regexp, rather than using NODE's type. PRED can also be a +function that takes the node as an argument, and return +non-nil/nil for match/no match. + +If INCLUDE-NODE is non-nil, return NODE if it satisfies PRED." + (let ((pred (or pred (treesit-node-type node))) (result nil)) - (cl-loop for cursor = (treesit-node-parent node) + (cl-loop for cursor = (if include-node node + (treesit-node-parent node)) then (treesit-node-parent cursor) while cursor - if (string-match-p type (treesit-node-type cursor)) + if (if (stringp pred) + (string-match-p pred (treesit-node-type cursor)) + (funcall pred cursor)) do (setq result cursor)) result)) @@ -290,11 +298,16 @@ properties." (treesit-node-start node) (treesit-node-end node)))))) -(defun treesit-parent-until (node pred) +(defun treesit-parent-until (node pred &optional include-node) "Return the closest parent of NODE that satisfies PRED. + Return nil if none was found. PRED should be a function that -takes one argument, the parent node." - (let ((node (treesit-node-parent node))) +takes one argument, the parent node, and return non-nil/nil for +match/no match. + +If INCLUDE-NODE is non-nil, return NODE if it satisfies PRED." + (let ((node (if include-node node + (treesit-node-parent node)))) (while (and node (not (funcall pred node))) (setq node (treesit-node-parent node))) node)) -- cgit v1.2.3 From 7512b9025a152ea953918e1c0748b695b742b4b6 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Tue, 27 Dec 2022 15:08:07 -0800 Subject: ; * lisp/treesit.el (treesit-traverse-parent): Remove alias. It was added with treesit-traverse-xxx functions, since now they are gone, this alias doesn't make sense by itself anymore. --- lisp/treesit.el | 2 -- 1 file changed, 2 deletions(-) (limited to 'lisp/treesit.el') diff --git a/lisp/treesit.el b/lisp/treesit.el index 675ecd85b08..fd61cbb8600 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -322,8 +322,6 @@ takes one argument, the parent node." node (treesit-node-parent node))) last)) -(defalias 'treesit-traverse-parent #'treesit-parent-until) - (defun treesit-node-children (node &optional named) "Return a list of NODE's children. If NAMED is non-nil, collect named child only." -- cgit v1.2.3 From ba1ddea9dabf51c9c6e463d667bcce0b48294453 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Tue, 27 Dec 2022 17:02:03 -0800 Subject: Fix treesit--things-around (bug#60355) Current implementation of treesit--things-around only searches forward for REGEXP and go up the tree until it finds a valid thing, if nothing matches it gives up. This makes it sometimes miss defuns. The new implementation tries multiple times (of search forward + go up) until it exhausts all possible defun nodes. * lisp/treesit.el (treesit--things-around): New implementation. (treesit--navigate-defun): Refactor to use treesit-node-top-level to simplify code, and add some guards in the predicate function. * test/src/treesit-tests.el: (treesit--ert-defun-navigation-elixir-program): New variable. (treesit-defun-navigation-nested-4): New test. --- lisp/treesit.el | 109 +++++++++++++++++++++------------------------- test/src/treesit-tests.el | 40 ++++++++++++++++- 2 files changed, 88 insertions(+), 61 deletions(-) (limited to 'lisp/treesit.el') diff --git a/lisp/treesit.el b/lisp/treesit.el index fd61cbb8600..f3fdcfb652c 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1773,78 +1773,67 @@ sound things exists. REGEXP and PRED are the same as in `treesit-thing-at-point'." (let* ((node (treesit-node-at pos)) - ;; NODE-BEFORE/AFTER = NODE when POS is completely in NODE, - ;; but if not, that means point could be in between two - ;; defun, in that case we want to use a node that's actually - ;; before/after point. - (node-before (if (>= (treesit-node-start node) pos) - (save-excursion - (treesit-search-forward-goto node "" t t t)) - node)) - (node-after (if (<= (treesit-node-end node) pos) - (save-excursion - (treesit-search-forward-goto - node "" nil nil t)) - node)) - (result (list nil nil nil)) - (pred (or pred (lambda (_) t)))) + (result (list nil nil nil))) ;; 1. Find previous and next sibling defuns. (cl-loop for idx from 0 to 1 - for node in (list node-before node-after) for backward in '(t nil) + ;; Make sure we go in the right direction, and the defun we find + ;; doesn't cover POS. for pos-pred in (list (lambda (n) (<= (treesit-node-end n) pos)) (lambda (n) (>= (treesit-node-start n) pos))) - ;; If point is inside a defun, our process below will never - ;; return a next/prev sibling outside of that defun, effectively - ;; any prev/next sibling is locked inside the smallest defun - ;; covering point, which is the correct behavior. That's because - ;; when there exists a defun that covers point, - ;; `treesit-search-forward' will first reach that defun, after - ;; that we only go upwards in the tree, so other defuns outside - ;; of the covering defun is never reached. (Don't use - ;; `treesit-search-forward-goto' as it breaks when NODE-AFTER is - ;; the last token of a parent defun: it will skip the parent - ;; defun because it wants to ensure progress.) - do (cl-loop for cursor = (when node - (save-excursion - (treesit-search-forward - node regexp backward backward))) - then (treesit-node-parent cursor) - while cursor - if (and (string-match-p - regexp (treesit-node-type cursor)) - (funcall pred cursor) - (funcall pos-pred cursor)) - do (setf (nth idx result) cursor))) + ;; We repeatedly find next defun candidate with + ;; `treesit-search-forward', and check if it is a valid defun, + ;; until the node we find covers POS, meaning we've gone through + ;; every possible sibling defuns. But there is a catch: + ;; `treesit-search-forward' searches bottom-up, so for each + ;; candidate we need to go up the tree and find the top-most + ;; valid sibling, this defun will be at the same level as POS. + ;; Don't use `treesit-search-forward-goto', it skips nodes in + ;; order to enforce progress. + when node + do (let ((cursor node) + (iter-pred (lambda (node) + (and (string-match-p + regexp (treesit-node-type node)) + (or (null pred) (funcall pred node)) + (funcall pos-pred node))))) + ;; Find the node just before/after POS to start searching. + (save-excursion + (while (and cursor (not (funcall pos-pred cursor))) + (setq cursor (treesit-search-forward-goto + cursor "" backward backward t)))) + ;; Keep searching until we run out of candidates. + (while (and cursor + (funcall pos-pred cursor) + (null (nth idx result))) + (setf (nth idx result) + (treesit-node-top-level cursor iter-pred t)) + (setq cursor (treesit-search-forward + cursor regexp backward backward))))) ;; 2. Find the parent defun. - (setf (nth 2 result) - (cl-loop for cursor = (or (nth 0 result) - (nth 1 result) - node) - then (treesit-node-parent cursor) - while cursor - if (and (string-match-p - regexp (treesit-node-type cursor)) - (funcall pred cursor) - (not (member cursor result))) - return cursor)) + (let ((cursor (or (nth 0 result) (nth 1 result) node)) + (iter-pred (lambda (node) + (and (string-match-p + regexp (treesit-node-type node)) + (or (null pred) (funcall pred node)) + (not (treesit-node-eq node (nth 0 result))) + (not (treesit-node-eq node (nth 1 result))) + (< (treesit-node-start node) + pos + (treesit-node-end node)))))) + (setf (nth 2 result) + (treesit-parent-until cursor iter-pred))) result)) (defun treesit--top-level-thing (node regexp &optional pred) "Return the top-level parent thing of NODE. REGEXP and PRED are the same as in `treesit-thing-at-point'." - (let* ((pred (or pred (lambda (_) t)))) - ;; `treesit-search-forward-goto' will make sure the matched node - ;; is before POS. - (cl-loop for cursor = node - then (treesit-node-parent cursor) - while cursor - if (and (string-match-p - regexp (treesit-node-type cursor)) - (funcall pred cursor)) - do (setq node cursor)) - node)) + (treesit-node-top-level + node (lambda (node) + (and (string-match-p regexp (treesit-node-type node)) + (or (null pred) (funcall pred node)))) + t)) ;; The basic idea for nested defun navigation is that we first try to ;; move across sibling defuns in the same level, if no more siblings diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index b0fbed4b06c..ec686c69642 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -940,7 +940,28 @@ and \"]\"." [999]} [110] " - "Javascript source for navigation test.") + "Bash source for navigation test.") + +(defvar treesit--ert-defun-navigation-elixir-program + "[100] +[101]def bar() do +[999]end +[102] +[103]defmodule Example do[0] +[999] @impl true +[104] [1]def bar() do[2] +[999] end[3] +[105] [4] +[106] [5]def baz() do[6] +[999] end[7] +[107] [8] +[999]end[9] +[108] +[109]def bar() do +[999]end +[110] +" + "Elixir source for navigation test.") (defvar treesit--ert-defun-navigation-nested-master ;; START PREV-BEG NEXT-END PREV-END NEXT-BEG @@ -1022,6 +1043,23 @@ the prev-beg, now point should be at marker 103\", etc.") treesit--ert-defun-navigation-bash-program treesit--ert-defun-navigation-nested-master))) +(ert-deftest treesit-defun-navigation-nested-4 () + "Test defun navigation using Elixir. +This tests bug#60355." + (skip-unless (treesit-language-available-p 'bash)) + ;; Nested defun navigation + (let ((treesit-defun-tactic 'nested) + (pred (lambda (node) + (member (treesit-node-text + (treesit-node-child-by-field-name node "target")) + '("def" "defmodule"))))) + (treesit--ert-test-defun-navigation + (lambda () + (treesit-parser-create 'elixir) + (setq-local treesit-defun-type-regexp `("call" . ,pred))) + treesit--ert-defun-navigation-elixir-program + treesit--ert-defun-navigation-nested-master))) + (ert-deftest treesit-defun-navigation-top-level () "Test top-level only defun navigation." (skip-unless (treesit-language-available-p 'python)) -- cgit v1.2.3 From b39dc7ab27a696a8607ab859aeff3c71509231f5 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Tue, 27 Dec 2022 20:37:29 -0800 Subject: Add tree-sitter helper functions for Imenu We didn't add an integration for Imenu because we aren't sure what should it look like. Now we have a pretty good idea. All the major modes copy-paste the two Imenu functions and tweaks them in a standard way. With the addition of treesit-defun-type-regexp and treesit-defun-name-function, now is a good time to standardize Imenu integration. In the next commit we update all the major modes to use this integration. * doc/lispref/modes.texi (Imenu): Add manual. * doc/lispref/parsing.texi (Tree-sitter major modes): Update manual. * lisp/treesit.el (treesit-simple-imenu-settings): New varaible. (treesit--simple-imenu-1) (treesit-simple-imenu): New functions. (treesit-major-mode-setup): Setup Imenu. --- doc/lispref/modes.texi | 29 +++++++++++++++ doc/lispref/parsing.texi | 5 +++ lisp/treesit.el | 96 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 129 insertions(+), 1 deletion(-) (limited to 'lisp/treesit.el') diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index 449529a4307..de17969566d 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -2841,6 +2841,35 @@ function uses @code{imenu-generic-expression} instead. Setting this variable makes it buffer-local in the current buffer. @end defvar +If built with tree-sitter, Emacs can automatically generate an Imenu +index if the major mode sets relevant variables. + +@defvar treesit-simple-imenu-settings +This variable instructs Emacs how to generate Imenu indexes. It +should be a list of @w{(@var{category} @var{regexp} @var{pred} +@var{name-fn})}. + +@var{category} should be the name of a category, like "Function", +"Class", etc. @var{regexp} should be a regexp matching the type of +nodes that belong to @var{category}. @var{pred} should be either +@code{nil} or a function that takes a node as the argument. It should +return non-@code{nil} if the node is a valid node for @var{category}, +or @code{nil} if not. + +@var{category} could also be @code{nil}. In which case the entries +matched by @var{regexp} and @var{pred} are not grouped under +@var{category}. + +@var{name-fn} should be either @var{nil} or a function that takes a +defun node and returns the name of that defun, e.g., the function name +for a function definition. If @var{name-fn} is @var{nil}, +@code{treesit-defun-name} (@pxref{Tree-sitter major modes}) is used +instead. + +@code{treesit-major-mode-setup} (@pxref{Tree-sitter major modes}) +automatically sets up Imenu if this variable is non-@code{nil}. +@end defvar + @node Font Lock Mode @section Font Lock Mode @cindex Font Lock mode diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 63741b69c22..c5500b0b37e 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1738,6 +1738,11 @@ navigation functions for @code{beginning-of-defun} and If @code{treesit-defun-name-function} is non-@code{nil}, it sets up add-log functions used by @code{add-log-current-defun}. @end itemize + +@item +If @code{treesit-simple-imenu-settings} (@pxref{Imenu}) is +non-@code{nil}, it sets up Imenu. +@end itemize @end defun For more information of these built-in tree-sitter features, diff --git a/lisp/treesit.el b/lisp/treesit.el index f3fdcfb652c..0aab0a12614 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -2009,6 +2009,91 @@ The delimiter between nested defun names is controlled by (setq node (treesit-node-parent node))) name)) +;;; Imenu + +(defvar treesit-simple-imenu-settings nil + "Settings that configure `treesit-simple-imenu'. + +It should be a list of (CATEGORY REGEXP PRED NAME-FN). + +CATEGORY is the name of a category, like \"Function\", \"Class\", +etc. REGEXP should be a regexp matching the type of nodes that +belong to CATEGORY. PRED should be either nil or a function +that takes a node an the argument. It should return non-nil if +the node is a valid node for CATEGORY, or nil if not. + +CATEGORY could also be nil. In that case the entries matched by +REGEXP and PRED are not grouped under CATEGORY. + +NAME-FN should be either nil or a function that takes a defun +node and returns the name of that defun node. If NAME-FN is nil, +`treesit-defun-name' is used. + +`treesit-major-mode-setup' automatically sets up Imenu if this +variable is non-nil.") + +(defun treesit--simple-imenu-1 (node pred name-fn) + "Given a sparse tree, create an Imenu index. + +NODE is a node in the tree returned by +`treesit-induce-sparse-tree' (not a tree-sitter node, its car is +a tree-sitter node). Walk that tree and return an Imenu index. + +Return a list of ENTRYs where + +ENTRY := (NAME . MARKER) + | (NAME . ((\" \" . MARKER) + ENTRY + ...) + +PRED and NAME-FN are the same as described in +`treesit-simple-imenu-settings'. NAME-FN computes NAME in an +ENTRY. MARKER marks the start of each tree-sitter node." + (let* ((ts-node (car node)) + (children (cdr node)) + (subtrees (mapcan (lambda (node) + (treesit--simple-imenu-1 node pred name-fn)) + children)) + ;; The root of the tree could have a nil ts-node. + (name (when ts-node + (or (if name-fn + (funcall name-fn ts-node) + (treesit-defun-name ts-node)) + "Anonymous"))) + (marker (when ts-node + (set-marker (make-marker) + (treesit-node-start ts-node))))) + (cond + ;; The tree-sitter node in the root node of the tree returned by + ;; `treesit-induce-sparse-tree' is often nil. + ((null ts-node) + subtrees) + ;; This tree-sitter node is not a valid entry, skip it. + ((and pred (not (funcall pred ts-node))) + subtrees) + ;; Non-leaf node, return a (list of) subgroup. + (subtrees + `((,name + ,(cons " " marker) + ,@subtrees))) + ;; Leaf node, return a (list of) plain index entry. + (t (list (cons name marker)))))) + +(defun treesit-simple-imenu () + "Return an Imenu index for the current buffer." + (let ((root (treesit-buffer-root-node))) + (mapcan (lambda (setting) + (pcase-let ((`(,category ,regexp ,pred ,name-fn) + setting)) + (when-let* ((tree (treesit-induce-sparse-tree + root regexp)) + (index (treesit--simple-imenu-1 + tree pred name-fn))) + (if category + (list (cons category index)) + index)))) + treesit-simple-imenu-settings))) + ;;; Activating tree-sitter (defun treesit-ready-p (language &optional quiet) @@ -2066,6 +2151,11 @@ If `treesit-simple-indent-rules' is non-nil, setup indentation. If `treesit-defun-type-regexp' is non-nil, setup `beginning/end-of-defun' functions. +If `treesit-defun-name-function' is non-nil, setup +`add-log-current-defun'. + +If `treesit-simple-imenu-settings' is non-nil, setup Imenu. + Make sure necessary parsers are created for the current buffer before calling this function." ;; Font-lock. @@ -2106,7 +2196,11 @@ before calling this function." ;; Defun name. (when treesit-defun-name-function (setq-local add-log-current-defun-function - #'treesit-add-log-current-defun))) + #'treesit-add-log-current-defun)) + ;; Imenu. + (when treesit-simple-imenu-settings + (setq-local imenu-create-index-function + #'treesit-simple-imenu))) ;;; Debugging -- cgit v1.2.3 From 8676bec51de7433bf54d66bc1dfd819eb4fadeb3 Mon Sep 17 00:00:00 2001 From: Stefan Kangas Date: Wed, 28 Dec 2022 17:37:46 +0100 Subject: ; * lisp/treesit.el (treesit--simple-imenu-1): Doc fix; wording. --- lisp/treesit.el | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lisp/treesit.el') diff --git a/lisp/treesit.el b/lisp/treesit.el index 0aab0a12614..4ee0fba79b7 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -2039,7 +2039,7 @@ NODE is a node in the tree returned by `treesit-induce-sparse-tree' (not a tree-sitter node, its car is a tree-sitter node). Walk that tree and return an Imenu index. -Return a list of ENTRYs where +Return a list of entries where each ENTRY has the form: ENTRY := (NAME . MARKER) | (NAME . ((\" \" . MARKER) -- cgit v1.2.3