diff options
author | Yuan Fu <casouri@gmail.com> | 2022-11-19 17:59:14 -0800 |
---|---|---|
committer | Yuan Fu <casouri@gmail.com> | 2022-11-19 18:36:08 -0800 |
commit | ac9bc819bc438f21bd38a995812d756727ed8e2a (patch) | |
tree | 94bc9c6242de9a72b7768512388714f08bc3d783 | |
parent | 7d7e9ef46a1b310fb3f481cdf8023082f5ec1618 (diff) | |
download | emacs-ac9bc819bc438f21bd38a995812d756727ed8e2a.tar.gz emacs-ac9bc819bc438f21bd38a995812d756727ed8e2a.tar.bz2 emacs-ac9bc819bc438f21bd38a995812d756727ed8e2a.zip |
Fix tree-sitter comment indentation for C-like languages
The goal is to indent like this:
/* comment
comment --> This line aligns with the beginning of the first line
*/ --> This line aligns with the opening comment token
* lisp/treesit.el (treesit-comment-start)
(treesit-comment-end): New variables.
(treesit-simple-indent-presets): New preset comment-end,
comment-start, comment-start-skip
* lisp/progmodes/c-ts-mode.el (c-ts-mode--indent-styles)
(c-ts-mode)
* lisp/progmodes/java-ts-mode.el (java-ts-mode--indent-rules)
(java-ts-mode)
* lisp/progmodes/js.el (js--treesit-indent-rules)
(js-ts-mode)
* lisp/progmodes/ts-mode.el (ts-mode--indent-rules)
(ts-mode): Add identical indent rules to each mode, and set identical
treesit-comment-start/end's.
* doc/lispref/modes.texi (Parser-based Indentation)
* doc/lispref/parsing.texi (Tree-sitter major modes): Update manual.
-rw-r--r-- | doc/lispref/modes.texi | 22 | ||||
-rw-r--r-- | doc/lispref/parsing.texi | 14 | ||||
-rw-r--r-- | lisp/progmodes/c-ts-mode.el | 6 | ||||
-rw-r--r-- | lisp/progmodes/java-ts-mode.el | 4 | ||||
-rw-r--r-- | lisp/progmodes/js.el | 5 | ||||
-rw-r--r-- | lisp/progmodes/ts-mode.el | 4 | ||||
-rw-r--r-- | lisp/treesit.el | 42 |
7 files changed, 93 insertions, 4 deletions
diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index 8b20bc0b758..5e5eb458974 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -4964,6 +4964,12 @@ first child where parent is @code{argument_list}, use (match nil "argument_list" nil nil 0 0) @end example +@item comment-end +This matcher is a function of 3 arguments: @var{node}, @var{parent}, +and @var{bol}, and returns non-@code{nil} if point is before a comment +ending token. Comment ending tokens are defined by regular expression +@code{treesit-comment-end}. + @item first-sibling This anchor is a function that is called with 3 arguments: @var{node}, @var{parent}, and @var{bol}, and returns the start of the first child @@ -4996,8 +5002,22 @@ charater on the previous line. This anchor is a function is called with 3 arguments: @var{node}, @var{parent}, and @var{bol}, and returns the beginning of the buffer. This is useful as the beginning of the buffer is always at column 0. -@end ftable +@item comment-start +This anchor is a function is called with 3 arguments: @var{node}, +@var{parent}, and @var{bol}, and returns the position right after the +opening comment token. Opening comment tokens are defined by regular +expression @code{treesit-comment-start}. This function assumes +@var{parent} is the comment node. + +@item coment-start-skip +This anchor is a function is called with 3 arguments: @var{node}, +@var{parent}, and @var{bol}, and returns the position after the +opening comment token, after skipping forward any whitespace +characters. Opening comment tokens are defined by regular expression +@code{treesit-comment-start}. This function assumes @var{parent} is +the comment node. +@end ftable @end defvar @heading Indentation utilities diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index feb92345ff4..6a23b0feb33 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1731,6 +1731,20 @@ For more information of these built-in tree-sitter features, For supporting mixing of multiple languages in a major mode, @pxref{Multiple Languages}. +Setting the following local variables allows tree-sitter's indentation +engine to correctly indent multi-line comments: + +@defvar treesit-comment-start +This should be a regular expression matching an opening comment token. +For example, it should match @samp{//}, @samp{////}, @samp{/*}, +@samp{/****}, etc., in C. +@end defvar + +@defvar treesit-comment-end +This should be a regular expression matching an closing comment token. +For example, it should match @samp{*/}, @samp{****/}, etc., in C. +@end defvar + @node Tree-sitter C API @section Tree-sitter C API Correspondence diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index f8e809bb43f..43668a56d96 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -84,8 +84,8 @@ MODE is either `c' or `cpp'." ((node-is "else") parent-bol 0) ((node-is "case") parent-bol 0) ((node-is "preproc_arg") no-indent) - ((node-is "comment") no-indent) - ((parent-is "comment") no-indent) + ((and (parent-is "comment") comment-end) comment-start -1) + ((parent-is "comment") comment-start-skip 0) ((node-is "labeled_statement") parent-bol 0) ((parent-is "labeled_statement") parent-bol c-ts-mode-indent-offset) ((match "preproc_ifdef" "compound_statement") point-min 0) @@ -503,6 +503,8 @@ the subtrees." (setq-local comment-start "/* ") (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *") (setq-local comment-end " */") + (setq-local treesit-comment-start (rx "/" (or (+ "/") (+ "*")))) + (setq-local treesit-comment-end (rx (+ (or "*")) "/")) (setq-local treesit-simple-indent-rules (c-ts-mode--set-indent-style 'c)) diff --git a/lisp/progmodes/java-ts-mode.el b/lisp/progmodes/java-ts-mode.el index ee757530279..d3cae1dcca1 100644 --- a/lisp/progmodes/java-ts-mode.el +++ b/lisp/progmodes/java-ts-mode.el @@ -61,6 +61,8 @@ ((node-is "}") (and parent parent-bol) 0) ((node-is ")") parent-bol 0) ((node-is "]") parent-bol 0) + ((and (parent-is "comment") comment-end) comment-start -1) + ((parent-is "comment") comment-start-skip 0) ((parent-is "class_body") parent-bol java-ts-mode-indent-offset) ((parent-is "interface_body") parent-bol java-ts-mode-indent-offset) ((parent-is "constructor_body") parent-bol java-ts-mode-indent-offset) @@ -284,6 +286,8 @@ the subtrees." (setq-local comment-start "// ") (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *") (setq-local comment-end "") + (setq-local treesit-comment-start (rx "/" (or (+ "/") (+ "*")))) + (setq-local treesit-comment-end (rx (+ (or "*")) "/")) ;; Indent. (setq-local treesit-simple-indent-rules java-ts-mode--indent-rules) diff --git a/lisp/progmodes/js.el b/lisp/progmodes/js.el index c37cef977b2..159c32ca2ae 100644 --- a/lisp/progmodes/js.el +++ b/lisp/progmodes/js.el @@ -3412,6 +3412,9 @@ This function is intended for use in `after-change-functions'." ((node-is ")") parent-bol 0) ((node-is "]") parent-bol 0) ((node-is ">") parent-bol 0) + ((parent-is "comment") comment-start 0) + ((and (parent-is "comment") comment-end) comment-start -1) + ((parent-is "comment") comment-start-skip 0) ((parent-is "ternary_expression") parent-bol js-indent-level) ((parent-is "member_expression") parent-bol js-indent-level) ((node-is ,switch-case) parent-bol 0) @@ -3807,6 +3810,8 @@ Currently there are `js-mode' and `js-ts-mode'." (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *") (setq-local comment-end "") (setq-local comment-multi-line t) + (setq-local treesit-comment-start (rx "/" (or (+ "/") (+ "*")))) + (setq-local treesit-comment-end (rx (+ (or "*")) "/")) ;; Electric-indent. (setq-local electric-indent-chars (append "{}():;," electric-indent-chars)) ;FIXME: js2-mode adds "[]*". diff --git a/lisp/progmodes/ts-mode.el b/lisp/progmodes/ts-mode.el index c826302c7ac..a6f8e0a65ee 100644 --- a/lisp/progmodes/ts-mode.el +++ b/lisp/progmodes/ts-mode.el @@ -61,6 +61,8 @@ ((node-is ")") parent-bol 0) ((node-is "]") parent-bol 0) ((node-is ">") parent-bol 0) + ((and (parent-is "comment") comment-end) comment-start -1) + ((parent-is "comment") comment-start-skip 0) ((parent-is "ternary_expression") parent-bol ts-mode-indent-offset) ((parent-is "member_expression") parent-bol ts-mode-indent-offset) ((parent-is "named_imports") parent-bol ts-mode-indent-offset) @@ -273,6 +275,8 @@ (setq-local comment-start "// ") (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *") (setq-local comment-end "") + (setq-local treesit-comment-start (rx "/" (or (+ "/") (+ "*")))) + (setq-local treesit-comment-end (rx (+ (or "*")) "/")) ;; Electric (setq-local electric-indent-chars diff --git a/lisp/treesit.el b/lisp/treesit.el index 2ee97715224..0c98d3167d9 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -851,6 +851,16 @@ parser notifying of the change." ;;; Indent +;; `comment-start' and `comment-end' assumes there is only one type of +;; comment and comment spans only one line. So they are not +;; sufficient for our purpose. + +(defvar-local treesit-comment-start nil + "Regular expression matching an opening comment token.") + +(defvar-local treesit-comment-end nil + "Regular expression matching an closing comment token.") + (define-error 'treesit-indent-error "Generic tree-sitter indentation error" 'treesit-error) @@ -936,6 +946,8 @@ See `treesit-simple-indent-presets'.") (lambda (node &rest _) (string-match-p name (or (treesit-node-field-name node) ""))))) + (cons 'comment-end (lambda (&rest _) + (looking-at-p treesit-comment-end))) ;; TODO: Document. (cons 'catch-all (lambda (&rest _) t)) @@ -957,6 +969,19 @@ See `treesit-simple-indent-presets'.") (treesit-node-child parent n named))))) (cons 'parent (lambda (_n parent &rest _) (treesit-node-start parent))) + (cons 'comment-start + (lambda (_n parent &rest _) + (save-excursion + (goto-char (treesit-node-start parent)) + (re-search-forward treesit-comment-start) + (point)))) + (cons 'comment-start-skip + (lambda (_n parent &rest _) + (save-excursion + (goto-char (treesit-node-start parent)) + (re-search-forward treesit-comment-start) + (skip-syntax-forward "-") + (point)))) ;; TODO: Document. (cons 'grand-parent (lambda (_n parent &rest _) @@ -1036,6 +1061,10 @@ no-node Queries PARENT with QUERY, and checks if NODE is captured (by any capture name). +comment-end + + Matches if text after point matches `treesit-comment-end'. + ANCHOR: first-sibling @@ -1065,7 +1094,18 @@ prev-line point-min - Returns the beginning of buffer, which is always at column 0.") + Returns the beginning of buffer, which is always at column 0. + +comment-start + + Returns the ending position after matching `treesit-comment-start'. + Assuming PARENT is a comment node. + +comment-start-skip + + Goes to the position comment-start would return, skip + whitespaces forward, and return the resulting position. + Assuming PARENT is a comment node.") (defun treesit--simple-indent-eval (exp) "Evaluate EXP. |