From 7b2448ae6eaf4ae5f81f1a1b1b9f1b14735e90d6 Mon Sep 17 00:00:00 2001 From: Harald Jörg Date: Wed, 17 Feb 2021 00:54:38 +0100 Subject: cperl-mode: Improve detection of index entries for imenu * lisp/progmodes/cperl-mode.el (cperl-imenu-addback): Customization variable deleted. This variable has been declared obsolete in 1998. (cperl--basic-identifier-regexp) and many other variables: defining regular expressions for basic Perl constructs. (cperl-imenu--create-perl-index): This function has been completely rewritten, keeping only some parts of the output formatting. It now recognizes a lot more package and subroutine declarations which came since Perl 5.14: Packages with a version and/or a block attached, lexical subroutines, declarations with a newline between the keyword "package" and the package name, and several more. This version also correctly separates subroutine names from attributes, does no longer support "unnamed" packages (which don't exist in Perl), and doesn't fall for false positives like stuff that looks like a declaration in a multiline string. (cperl-tags-hier-init): Eliminate call to `cperl-imenu-addback` (which actually was commented out in 1997) * test/lisp/progmodes/cperl-mode-tests.el (cperl-test--validate-regexp) and six other new tests for the new regular expressions and the index creation. * test/lisp/progmodes/cperl-mode-resources/grammar.pl: New file showcasing different syntax variations for package and sub declarations (bug#46574). --- .../lisp/progmodes/cperl-mode-resources/grammar.pl | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 test/lisp/progmodes/cperl-mode-resources/grammar.pl (limited to 'test/lisp/progmodes/cperl-mode-resources/grammar.pl') diff --git a/test/lisp/progmodes/cperl-mode-resources/grammar.pl b/test/lisp/progmodes/cperl-mode-resources/grammar.pl new file mode 100644 index 00000000000..c05fd7efc2a --- /dev/null +++ b/test/lisp/progmodes/cperl-mode-resources/grammar.pl @@ -0,0 +1,158 @@ +use 5.024; +use strict; +use warnings; + +sub outside { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}'"; +} + +package Package; + +=head1 NAME + +grammar - A Test resource for regular expressions + +=head1 SYNOPSIS + +A Perl file showing a variety of declarations + +=head1 DESCRIPTION + +This file offers several syntactical constructs for packages, +subroutines, and POD to test the imenu capabilities of CPerl mode. + +Perl offers syntactical variations for package and subroutine +declarations. Packages may, or may not, have a version and may, or +may not, have a block of code attached to them. Subroutines can have +old-style prototypes, attributes, and signatures which are still +experimental but widely accepted. + +Various Extensions and future Perl versions will probably add new +keywords for "class" and "method", both with syntactical extras of +their own. + +This test file tries to keep up with them. + +=head2 Details + +The code is supposed to identify and exclude false positives, +e.g. declarations in a string or in POD, as well as POD in a string. +These should not go into the imenu index. + +=cut + +our $VERSION = 3.1415; +say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + +sub in_package { + # Special test for POD: A line which looks like POD, but actually + # is part of a multiline string. In the case shown here, the + # semicolon is not part of the string, but POD headings go to the + # end of the line. The code needs to distinguish between a POD + # heading "This Is Not A Pod/;" and a multiline string. + my $not_a_pod = q/Another false positive: + +=head1 This Is Not A Pod/; + +} + +sub Shoved::elsewhere { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', sub Shoved::elsewhere"; +} + +sub prototyped ($$) { + ...; +} + +package Versioned::Package 0.07; +say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + +sub versioned { + # This sub is in package Versioned::Package + say "sub 'versioned' in package '", __PACKAGE__, "'"; +} + +versioned(); + +my $false_positives = <<'EOH'; +The following declarations are not supposed to be recorded for imenu. +They are in a HERE-doc, which is a generic comment in CPerl mode. + +package Don::T::Report::This; +sub this_is_no_sub { + my $self = shuffle; +} + +And this is not a POD heading: + +=head1 Not a POD heading, just a string. + +EOH + +package Block { + our $VERSION = 2.7182; + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + + sub attr:lvalue { + say "sub 'attr' in package '", __PACKAGE__, "'"; + } + + attr(); + + package Block::Inner { + # This hopefully doesn't happen too often. + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + } + + # Now check that we're back to package "Block" + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; +} + +sub outer { + # This is in package Versioned::Package + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; +} + +outer(); + +package Versioned::Block 42 { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + + my sub lexical { + say "sub 'lexical' in package '", __PACKAGE__, "'"; + } + + lexical(); + + use experimental 'signatures'; + sub signatured :prototype($@) ($self,@rest) + { + ...; + } +} + +# After all is said and done, we're back in package Versioned::Package. +say "We're in package '", __PACKAGE__, "' now."; +say "Now try to call a subroutine which went out of scope:"; +eval { lexical() }; +say $@ if $@; + +# Now back to Package. This must not appear separately in the +# hierarchy list. +package Package; + +our sub in_package_again { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; +} + + +package :: { + # This is just a weird, but legal, package name. + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + + in_package_again(); # weird, but calls the sub from above +} + +Shoved::elsewhere(); + +1; -- cgit v1.2.3 From 3d49ad73e5a93625629c96b6c0b921bb019ea9da Mon Sep 17 00:00:00 2001 From: Harald Jörg Date: Tue, 14 Sep 2021 17:53:52 +0200 Subject: cperl-mode.el: Allow non-ASCII Perl identifiers Replace all "A-Z" regexp literals with unicode-aware rx constructs wherever Perl allows non-ASCII identifiers. * lisp/progmodes/cperl-mode.el (cperl-after-sub-regexp) (cperl-after-label. cperl-sniff-for-indent) (cperl-find-pods-heres, cperl-indent-exp) (cperl-fix-line-spacing, cperl-imenu--create-perl-index) (cperl-init-faces, cperl-find-tags): Replace ASCII regex literals by unicode-aware rx constructs. (cperl-init-faces): Eliminate unused lexical `font-lock-anchored'. (cperl-have-help-regexp, cperl-word-at-point-hard): Allow non-ASCII word characters. * test/lisp/progmodes/cperl-mode-tests.el (cperl-test-fontify-special-variables): New test for $^T and $^{VARNAME}. (cperl-test-ws-rx cperl-test-ws+-rx), (cperl-test-version-regexp, cperl-test-package-regexp): Skip for perl-mode. (cperl-test-identifier-rx, cperl--test-unicode-setup) (cperl-test-unicode-labels, cperl-test-unicode-sub) (cperl-test-unicode-varname) (cperl-test-unicode-varname-list, cperl-test-unicode-arrays) (cperl-test-unicode-hashes, cperl-test-unicode-hashref) (cperl-test-unicode-proto, cperl-test-unicode-fhs) (cperl-test-unicode-hashkeys, cperl-test-word-at-point): New tests for unicode identifiers. (cperl-test-imenu-index): Add a unicode identifier to the test. * test/lisp/progmodes/cperl-mode-resources/grammar.pl: Add a function with non-ASCII name for imenu tests. --- lisp/progmodes/cperl-mode.el | 330 ++++++++++++++------- .../lisp/progmodes/cperl-mode-resources/grammar.pl | 14 + test/lisp/progmodes/cperl-mode-tests.el | 301 ++++++++++++++++++- 3 files changed, 545 insertions(+), 100 deletions(-) (limited to 'test/lisp/progmodes/cperl-mode-resources/grammar.pl') diff --git a/lisp/progmodes/cperl-mode.el b/lisp/progmodes/cperl-mode.el index 76c82f8c73e..1147889969b 100644 --- a/lisp/progmodes/cperl-mode.el +++ b/lisp/progmodes/cperl-mode.el @@ -1407,7 +1407,7 @@ the last)." (concat ; Assume n groups before this... "\\(" ; n+1=name-group cperl-white-and-comment-rex ; n+2=pre-name - "\\(::[a-zA-Z_0-9:']+\\|[a-zA-Z_'][a-zA-Z_0-9:']*\\)" ; n+3=name + (rx-to-string `(group ,cperl--normal-identifier-rx)) "\\)" ; END n+1=name-group (if named "" "?") "\\(" ; n+4=proto-group @@ -2573,7 +2573,8 @@ Return the amount the indentation changed by." '(?w ?_)) (progn (backward-sexp) - (looking-at "[a-zA-Z_][a-zA-Z0-9_]*:[^:]")))) + (looking-at (rx (sequence (eval cperl--label-rx) + (not (in ":")))))))) (defun cperl-get-state (&optional parse-start start-state) "Return list (START STATE DEPTH PRESTART), @@ -2740,7 +2741,9 @@ Will not look before LIM." (progn (forward-sexp -1) (skip-chars-backward " \t") - (looking-at "[ \t]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:"))) + (looking-at + (rx (sequence (0+ blank) + (eval cperl--label-rx)))))) (get-text-property (point) 'first-format-line))) ;; Look at previous line that's at column 0 @@ -3836,7 +3839,8 @@ recursive calls in starting lines of here-documents." "\\<" cperl-sub-regexp "\\>" ; sub with proto/attr "\\(" cperl-white-and-comment-rex - "\\(::[a-zA-Z_:'0-9]*\\|[a-zA-Z_'][a-zA-Z_:'0-9]*\\)\\)?" ; name + (rx (group (eval cperl--normal-identifier-rx))) + "\\)" "\\(" cperl-maybe-white-and-comment-rex "\\(([^()]*)\\|:[^:]\\)\\)" ; prototype or attribute start @@ -4111,10 +4115,12 @@ recursive calls in starting lines of here-documents." (t t)))) ;; or <$file> (and (eq c ?\<) - ;; Do not stringify , <$fh> : + ;; Stringify what looks like a glob, but + ;; do not stringify file handles , <$fh> : (save-match-data (looking-at - "\\$?\\([_a-zA-Z:][_a-zA-Z0-9:]*\\)?>")))) + (rx (sequence (opt "$") + (eval cperl--normal-identifier-rx))))))) tb (match-beginning 0)) (goto-char (match-beginning b1)) (cperl-backward-to-noncomment (point-min)) @@ -4184,7 +4190,16 @@ recursive calls in starting lines of here-documents." (error nil))) (if (or bb (looking-at ; $foo -> {s} - "[$@]\\$*\\([a-zA-Z0-9_:]+\\|[^{]\\)\\([ \t\n]*->\\)?[ \t\n]*{") + (rx + (sequence + (in "$@") (0+ "$") + (or + (eval cperl--normal-identifier-rx) + (not (in "{"))) + (opt (sequence (eval cperl--ws*-rx)) + "->") + (eval cperl--ws*-rx) + "{"))) (and ; $foo[12] -> {s} (memq (following-char) '(?\{ ?\[)) (progn @@ -4199,7 +4214,12 @@ recursive calls in starting lines of here-documents." (setq bb t)) ((and (eq (following-char) ?:) (eq b1 ?\{) ; Check for $ { s::bar } - (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}") + ;; (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}") + (looking-at + (rx (sequence "::" + (eval cperl--normal-identifier-rx) + (eval cperl--ws*-rx) + "}"))) (progn (goto-char (1- go)) (skip-chars-backward " \t\n\f") @@ -4364,7 +4384,7 @@ recursive calls in starting lines of here-documents." "\\(" ;; XXXX 1-char variables, exc. |()\s "[$@]" "\\(" - "[_a-zA-Z:][_a-zA-Z0-9:]*" + (rx (eval cperl--normal-identifier-rx)) "\\|" "{[^{}]*}" ; only one-level allowed "\\|" @@ -4820,6 +4840,7 @@ recursive calls in starting lines of here-documents." (progn (backward-sexp) ;; sub {BLK}, print {BLK} $data, but NOT `bless', `return', `tr', `constant' + ;; a-zA-Z is fine here, these are Perl keywords (or (and (looking-at "[a-zA-Z0-9_:]+[ \t\n\f]*[{#]") ; Method call syntax (not (looking-at "\\(bless\\|return\\|q[wqrx]?\\|tr\\|[smy]\\|constant\\)\\>"))) ;; sub bless::foo {} @@ -5028,7 +5049,11 @@ conditional/loop constructs." cperl-maybe-white-and-comment-rex "\\(state\\|my\\|local\\|our\\)\\)?" cperl-maybe-white-and-comment-rex - "\\$[_a-zA-Z0-9]+\\)?\\)\\>")) + (rx + (sequence + "$" + (eval cperl--basic-identifier-rx))) + "\\)?\\)\\>")) (progn (goto-char top) (forward-sexp 1) @@ -5122,7 +5147,14 @@ Returns some position at the last line." ;; Looking at: ;; foreach my $var ( (if (looking-at - "[ \t]*\\\\([ \t]*(\\|[ \t\n]*{\\)\\|[ \t]*{") + (rx (sequence + (0+ blank) + (opt (sequence "}" (0+ blank) )) + symbol-start + (or "else" "elsif" "continue" "if" "unless" "while" "until" + (sequence (or "for" "foreach") + (opt + (opt (sequence (1+ blank) + (or "state" "my" "local" "our"))) + (0+ blank) + "$" (eval cperl--basic-identifier-rx)))) + symbol-end + (group-n 1 + (or + (or (sequence (0+ blank) "(") + (sequence (eval cperl--ws*-rx) "{")) + (sequence (0+ blank) "{")))))) (progn - (setq ml (match-beginning 8)) ; "(" or "{" after control word + (setq ml (match-beginning 1)) ; "(" or "{" after control word (re-search-forward "[({]") (forward-char -1) (setq p (point)) @@ -5544,7 +5592,11 @@ comment, or POD." (setq lst index-sub-alist) (while lst (setq elt (car lst) lst (cdr lst)) - (cond ((string-match "\\(::\\|'\\)[_a-zA-Z0-9]+$" (car elt)) + (cond ((string-match + (rx (sequence (or "::" "'") + (eval cperl--basic-identifier-rx) + string-end)) + (car elt)) (setq pack (substring (car elt) 0 (match-beginning 0))) (if (setq group (assoc pack hier-list)) (if (listp (cdr group)) @@ -5646,8 +5698,7 @@ default function." (defun cperl-init-faces () (condition-case errs (progn - (let (t-font-lock-keywords t-font-lock-keywords-1 font-lock-anchored) - (setq font-lock-anchored t) + (let (t-font-lock-keywords t-font-lock-keywords-1) (setq t-font-lock-keywords (list @@ -5760,20 +5811,41 @@ default function." (if (eq (char-after (cperl-1- (match-end 0))) ?\{ ) 'font-lock-function-name-face 'font-lock-variable-name-face)))) - '("\\<\\(package\\|require\\|use\\|import\\|no\\|bootstrap\\)[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t;]" ; require A if B; - 2 font-lock-function-name-face) + `(,(rx (sequence symbol-start + (or "package" "require" "use" "import" + "no" "bootstrap") + (eval cperl--ws+-rx) + (group-n 1 (eval cperl--normal-identifier-rx)) + (any " \t;"))) ; require A if B; + 1 font-lock-function-name-face) '("^[ \t]*format[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t]*=[ \t]*$" 1 font-lock-function-name-face) - (cond (font-lock-anchored - '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}" - (2 font-lock-string-face t) - ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}" - nil nil - (1 font-lock-string-face t)))) - (t '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}" - 2 font-lock-string-face t))) - '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1 - font-lock-string-face t) + ;; bareword hash key: $foo{bar} + `(,(rx (or (in "]}\\%@>*&") ; What Perl is this? + (sequence "$" (eval cperl--normal-identifier-rx))) + (0+ blank) "{" (0+ blank) + (group-n 1 (sequence (opt "-") + (eval cperl--basic-identifier-rx))) + (0+ blank) "}") +;; '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}" + (1 font-lock-string-face t) + ;; anchored bareword hash key: $foo{bar}{baz} + (,(rx point + (0+ blank) "{" (0+ blank) + (group-n 1 (sequence (opt "-") + (eval cperl--basic-identifier-rx))) + (0+ blank) "}") + ;; ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}" + nil nil + (1 font-lock-string-face t))) + ;; hash element assignments with bareword key => value + `(,(rx (in "[ \t{,()") + (group-n 1 (sequence (opt "-") + (eval cperl--basic-identifier-rx))) + (0+ blank) "=>") + 1 font-lock-string-face t) +;; '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1 +;; font-lock-string-face t) ;; labels `(,(rx (sequence @@ -5797,83 +5869,130 @@ default function." ;;; '("[$*]{?\\(\\sw+\\)" 1 font-lock-variable-name-face) ;;; '("\\([@%]\\|\\$#\\)\\(\\sw+\\)" ;;; (2 (cons font-lock-variable-name-face '(underline)))) - (cond (font-lock-anchored ;; 1=my_etc, 2=white? 3=(+white? 4=white? 5=var - `(,(concat "\\<\\(state\\|my\\|local\\|our\\)" - cperl-maybe-white-and-comment-rex - "\\((" - cperl-maybe-white-and-comment-rex - "\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)") - (5 ,(if cperl-font-lock-multiline - 'font-lock-variable-name-face - '(progn (setq cperl-font-lock-multiline-start - (match-beginning 0)) - 'font-lock-variable-name-face))) - (,(concat "\\=" - cperl-maybe-white-and-comment-rex - "," - cperl-maybe-white-and-comment-rex - "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)") - ;; Bug in font-lock: limit is used not only to limit - ;; searches, but to set the "extend window for - ;; facification" property. Thus we need to minimize. - ,(if cperl-font-lock-multiline - '(if (match-beginning 3) - (save-excursion - (goto-char (match-beginning 3)) - (condition-case nil - (forward-sexp 1) - (error - (condition-case nil - (forward-char 200) - (error nil)))) ; typeahead - (1- (point))) ; report limit - (forward-char -2)) ; disable continued expr - '(if (match-beginning 3) - (point-max) ; No limit for continuation - (forward-char -2))) ; disable continued expr - ,(if cperl-font-lock-multiline - nil - '(progn ; Do at end - ;; "my" may be already fontified (POD), - ;; so cperl-font-lock-multiline-start is nil - (if (or (not cperl-font-lock-multiline-start) - (> 2 (count-lines - cperl-font-lock-multiline-start - (point)))) - nil - (put-text-property - (1+ cperl-font-lock-multiline-start) (point) - 'syntax-type 'multiline)) - (setq cperl-font-lock-multiline-start nil))) - (3 font-lock-variable-name-face)))) - (t '("^[ \t{}]*\\(state\\|my\\|local\\|our\\)[ \t]*\\(([ \t]*\\)?\\([$@%*][a-zA-Z0-9_:]+\\)" - 3 font-lock-variable-name-face))) - '("\\ 2 (count-lines + cperl-font-lock-multiline-start + (point)))) + nil + (put-text-property + (1+ cperl-font-lock-multiline-start) (point) + 'syntax-type 'multiline)) + (setq cperl-font-lock-multiline-start nil))) + (1 font-lock-variable-name-face))) + ;; foreach my $foo ( + `(,(rx symbol-start "for" (opt "each") + (opt (sequence (1+ blank) + (or "state" "my" "local" "our"))) + (0+ blank) + (group-n 1 (sequence "$" + (eval cperl--basic-identifier-rx))) + (0+ blank) "(") +;; '("\\") (setq search @@ -6472,6 +6593,9 @@ Will not move the position at the start to the left." "Run etags with appropriate options for Perl files. If optional argument ALL is `recursive', will process Perl files in subdirectories too." + ;; Apparently etags doesn't support UTF-8 encoded sources, and usage + ;; of etags has been commented out in the menu since ... well, + ;; forever. So, let's just stick to ASCII here. -- haj, 2021-09-14 (interactive) (let ((cmd "etags") (args `("-l" "none" "-r" @@ -6611,6 +6735,9 @@ Does not move point." ;; Search for the function (progn ;;save-match-data (while (re-search-forward + ;; FIXME: Should XS code be unicode aware? Recent C + ;; compilers (Gcc 10+) are, but I guess this isn't used + ;; much. -- haj, 2021-09-14 "^\\([ \t]*MODULE\\>[^\n]*\\\\|\\([a-zA-Z_][a-zA-Z_0-9]*\\)(\\|[ \t]*BOOT:\\)" nil t) (cond @@ -6673,7 +6800,7 @@ Does not move point." (setq lst (mapcar (lambda (elt) - (cond ((string-match "^[_a-zA-Z]" (car elt)) + (cond ((string-match (rx line-start (or alpha "_")) (car elt)) (goto-char (cdr elt)) (beginning-of-line) ; pos should be of the start of the line (list (car elt) @@ -6703,9 +6830,14 @@ Does not move point." "," (number-to-string (1- (elt elt 1))) ; Char pos 0-based "\n") - (if (and (string-match "^[_a-zA-Z]+::" (car elt)) - (string-match (concat "^" cperl-sub-regexp "[ \t]+\\([_a-zA-Z]+\\)[^:_a-zA-Z]") - (elt elt 3))) + (if (and (string-match (rx line-start + (eval cperl--basic-identifier-rx) "++") + (car elt)) + (string-match (rx-to-string `(sequence line-start + (regexp ,cperl-sub-regexp) + (1+ (in " \t")) + ,cperl--normal-identifier-rx)) + (elt elt 3))) ;; Need to insert the name without package as well (setq lst (cons (cons (substring (elt elt 3) (match-beginning 1) @@ -7155,14 +7287,14 @@ Currently it is tuned to C and Perl syntax." ;;(concat "\\(" (mapconcat #'identity - '("[$@%*&][0-9a-zA-Z_:]+\\([ \t]*[[{]\\)?" ; Usual variable + '("[$@%*&][[:alnum:]_:]+\\([ \t]*[[{]\\)?" ; Usual variable "[$@]\\^[a-zA-Z]" ; Special variable "[$@][^ \n\t]" ; Special variable "-[a-zA-Z]" ; File test "\\\\[a-zA-Z0]" ; Special chars "^=[a-z][a-zA-Z0-9_]*" ; POD sections "[-!&*+,./<=>?\\^|~]+" ; Operator - "[a-zA-Z_0-9:]+" ; symbol or number + "[[:alnum:]_:]+" ; symbol or number "x=" "#!") ;;"\\)\\|\\(" @@ -7178,7 +7310,7 @@ Currently it is tuned to C and Perl syntax." ;; Does not save-excursion ;; Get to the something meaningful (or (eobp) (eolp) (forward-char 1)) - (re-search-backward "[-a-zA-Z0-9_:!&*+,./<=>?\\^|~$%@]" + (re-search-backward "[-[:alnum:]_:!&*+,./<=>?\\^|~$%@]" (point-at-bol) 'to-beg) ;; (cond @@ -7187,8 +7319,8 @@ Currently it is tuned to C and Perl syntax." ;; (or (bobp) (backward-char 1)))) ;; Try to backtrace (cond - ((looking-at "[a-zA-Z0-9_:]") ; symbol - (skip-chars-backward "a-zA-Z0-9_:") + ((looking-at "[[:alnum:]_:]") ; symbol + (skip-chars-backward "[:alnum:]_:") (cond ((and (eq (preceding-char) ?^) ; $^I (eq (char-after (- (point) 2)) ?\$)) @@ -7199,7 +7331,7 @@ Currently it is tuned to C and Perl syntax." (eq (current-column) 1)) (forward-char -1))) ; =head1 (if (and (eq (preceding-char) ?\<) - (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; + (looking-at "\\$?[[:alnum:]_:]+>")) ; (forward-char -1))) ((and (looking-at "=") (eq (preceding-char) ?x)) ; x= (forward-char -1)) @@ -7212,15 +7344,15 @@ Currently it is tuned to C and Perl syntax." (not (eq (char-after (- (point) 2)) ?\$))) ; $- (forward-char -1)) ((and (eq (following-char) ?\>) - (string-match "[a-zA-Z0-9_]" (char-to-string (preceding-char))) + (string-match "[[:alnum:]_]" (char-to-string (preceding-char))) (save-excursion (forward-sexp -1) (and (eq (preceding-char) ?\<) - (looking-at "\\$?[a-zA-Z0-9_:]+>")))) ; + (looking-at "\\$?[[:alnum:]_:]+>")))) ; (search-backward "<")))) ((and (eq (following-char) ?\$) (eq (preceding-char) ?\<) - (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <$fh> + (looking-at "\\$?[[:alnum:]_:]+>")) ; <$fh> (forward-char -1))) (if (looking-at cperl-have-help-regexp) (buffer-substring (match-beginning 0) (match-end 0)))) diff --git a/test/lisp/progmodes/cperl-mode-resources/grammar.pl b/test/lisp/progmodes/cperl-mode-resources/grammar.pl index c05fd7efc2a..96a86993082 100644 --- a/test/lisp/progmodes/cperl-mode-resources/grammar.pl +++ b/test/lisp/progmodes/cperl-mode-resources/grammar.pl @@ -1,6 +1,7 @@ use 5.024; use strict; use warnings; +use utf8; sub outside { say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}'"; @@ -155,4 +156,17 @@ package :: { Shoved::elsewhere(); +# Finally, try unicode identifiers. +package Erdős::Number; + +sub erdős_number { + my $name = shift; + if ($name eq "Erdős Pál") { + return 0; + } + else { + die "No access to the database. Sorry."; + } +} + 1; diff --git a/test/lisp/progmodes/cperl-mode-tests.el b/test/lisp/progmodes/cperl-mode-tests.el index 54012c3918e..29b9e3f6fb9 100644 --- a/test/lisp/progmodes/cperl-mode-tests.el +++ b/test/lisp/progmodes/cperl-mode-tests.el @@ -154,6 +154,22 @@ point in the distant past, and is still broken in perl-mode. " (should (equal (get-text-property (match-beginning 0) 'face) 'font-lock-keyword-face)))) +(ert-deftest cperl-test-fontify-special-variables () + "Test fontification of variables like $^T or ${^ENCODING}. +These can occur as \"local\" aliases." + (skip-unless (eq cperl-test-mode #'cperl-mode)) + (with-temp-buffer + (insert "local ($^I, ${^UNICODE});\n") + (goto-char (point-min)) + (funcall cperl-test-mode) + (font-lock-ensure) + (search-forward "$") + (should (equal (get-text-property (point) 'face) + 'font-lock-variable-name-face)) + (search-forward "$") + (should (equal (get-text-property (point) 'face) + 'font-lock-variable-name-face)))) + (ert-deftest cperl-test-identify-heredoc () "Test whether a construct containing \"<<\" followed by a bareword is properly identified for a here-document if @@ -297,6 +313,7 @@ the whole string." (ert-deftest cperl-test-ws-rx () "Tests capture of very simple regular expressions (yawn)." + (skip-unless (eq cperl-test-mode #'cperl-mode)) (let ((valid '(" " "\t" "\n")) (invalid @@ -306,6 +323,7 @@ the whole string." (ert-deftest cperl-test-ws+-rx () "Tests sequences of whitespace and comment lines." + (skip-unless (eq cperl-test-mode #'cperl-mode)) (let ((valid `(" " "\t#\n" "\n# \n" ,(concat "# comment\n" "# comment\n" "\n" "#comment\n"))) @@ -316,6 +334,7 @@ the whole string." (ert-deftest cperl-test-version-regexp () "Tests the regexp for recommended syntax of versions in Perl." + (skip-unless (eq cperl-test-mode #'cperl-mode)) (let ((valid '("1" "1.1" "1.1_1" "5.032001" "v120.100.103")) @@ -331,6 +350,7 @@ the whole string." (ert-deftest cperl-test-package-regexp () "Tests the regular expression of Perl package names with versions. Also includes valid cases with whitespace in strange places." + (skip-unless (eq cperl-test-mode #'cperl-mode)) (let ((valid '("package Foo" "package Foo::Bar" @@ -346,6 +366,284 @@ Also includes valid cases with whitespace in strange places." (cperl-test--validate-regexp (rx (eval cperl--package-rx)) valid invalid))) +(ert-deftest cperl-test-identifier-rx () + "Test valid and invalid identifiers (no sigils)." + (skip-unless (eq cperl-test-mode #'cperl-mode)) + (let ((valid + '("foo" "FOO" "f_oo" "a123" + "manĝis")) ; Unicode is allowed! + (invalid + '("$foo" ; no sigils allowed (yet) + "Foo::bar" ; no package qualifiers allowed + "lots_of_€"))) ; € is not alphabetic + (cperl-test--validate-regexp (rx (eval cperl--basic-identifier-rx)) + valid invalid))) + +;;; Test unicode identifier in various places + +(defun cperl--test-unicode-setup (code string) + "Insert CODE, prepare it for tests, and find STRING. +Invoke the appropriate major mode, ensure fontification, and set +point after the first occurrence of STRING (no regexp!)." + (insert code) + (funcall cperl-test-mode) + (font-lock-ensure) + (goto-char (point-min)) + (search-forward string)) + +(ert-deftest cperl-test-unicode-labels () + "Verify that non-ASCII labels are processed correctly." + (with-temp-buffer + (cperl--test-unicode-setup "LABEł: for ($manĝi) { say; }" "LAB") + (should (equal (get-text-property (point) 'face) + 'font-lock-constant-face)))) + +(ert-deftest cperl-test-unicode-sub () + (with-temp-buffer + (cperl--test-unicode-setup + (concat "use strict;\n" ; distinguish bob from b-o-f + "sub ℏ {\n" + " 6.62607015e-34\n" + "};") + "sub ") ; point is before "ℏ" + + ;; Testing fontification + ;; FIXME 2021-09-10: This tests succeeds because cperl-mode + ;; accepts almost anything as a sub name for fontification. For + ;; example, it fontifies "sub @ {...;}" which is a syntax error in + ;; Perl. I let this pass for the moment. + (should (equal (get-text-property (point) 'face) + 'font-lock-function-name-face)) + + ;; Testing `beginning-of-defun'. Not available in perl-mode, + ;; where it jumps to the beginning of the buffer. + (when (eq cperl-test-mode #'cperl-mode) + (goto-char (point-min)) + (search-forward "-34") + (beginning-of-defun) + (should (looking-at "sub"))))) + +(ert-deftest cperl-test-unicode-varname () + (with-temp-buffer + (cperl--test-unicode-setup + (concat "use strict;\n" + "my $π = 3.1415926535897932384626433832795028841971;\n" + "\n" + "my $manĝi = $π;\n" + "__END__\n") + "my $") ; perl-mode doesn't fontify the sigil, so include it here + + ;; Testing fontification + ;; FIXME 2021-09-10: This test succeeds in cperl-mode because the + ;; π character is "not ASCII alphabetic", so it treats $π as a + ;; punctuation variable. The following two `should' forms with a + ;; longer variable name were added for stronger verification. + (should (equal (get-text-property (point) 'face) + 'font-lock-variable-name-face)) + ;; Test both ends of a longer variable name + (search-forward "my $") ; again skip the sigil + (should (equal (get-text-property (point) 'face) + 'font-lock-variable-name-face)) + (search-forward "manĝi") + (should (equal (get-text-property (1- (match-end 0)) 'face) + 'font-lock-variable-name-face)))) + +(ert-deftest cperl-test-unicode-varname-list () + "Verify that all elements of a variable list are fontified." + + (let ((hash-face (if (eq cperl-test-mode #'perl-mode) + 'perl-non-scalar-variable + 'cperl-hash-face)) + (array-face (if (eq cperl-test-mode #'perl-mode) + 'perl-non-scalar-variable + 'cperl-array-face))) + (with-temp-buffer + (cperl--test-unicode-setup + "my (%äsh,@ärräy,$scâlâr);" "%") + (should (equal (get-text-property (point) 'face) + hash-face)) + (search-forward "@") + (should (equal (get-text-property (point) 'face) + array-face)) + (search-forward "scâlâr") + (should (equal (get-text-property (match-beginning 0) 'face) + 'font-lock-variable-name-face)) + (should (equal (get-text-property (1- (match-end 0)) 'face) + 'font-lock-variable-name-face))) + + ;; Now with package-qualified variables + (with-temp-buffer + (cperl--test-unicode-setup + "local (%Søme::äsh,@Søme::ärräy,$Søme::scâlâr);" "%") + (should (equal (get-text-property (point) 'face) + hash-face)) + (search-forward "Søme::") ; test basic identifier + (should (equal (get-text-property (point) 'face) + hash-face)) + (search-forward "@") ; test package name + (should (equal (get-text-property (point) 'face) + array-face)) + (search-forward "Søme::") ; test basic identifier + (should (equal (get-text-property (point) 'face) + array-face)) + (search-forward "Søme") ; test package name + (should (equal (get-text-property (match-beginning 0) 'face) + 'font-lock-variable-name-face)) + (should (equal (get-text-property (1- (match-end 0)) 'face) + 'font-lock-variable-name-face)) + (search-forward "scâlâr") ; test basic identifier + (should (equal (get-text-property (match-beginning 0) 'face) + 'font-lock-variable-name-face)) + (should (equal (get-text-property (1- (match-end 0)) 'face) + 'font-lock-variable-name-face))))) + +(ert-deftest cperl-test-unicode-arrays () + "Test fontification of array access." + ;; Perl mode just looks at the sigil, for element access + (skip-unless (eq cperl-test-mode #'cperl-mode)) + ;; simple array element + (with-temp-buffer + (cperl--test-unicode-setup + "$ärräy[1] = 7;" "$") + (should (equal (get-text-property (point) 'face) + 'cperl-array-face))) + ;; array slice + (with-temp-buffer + (cperl--test-unicode-setup + "@ärräy[(1..3)] = (4..6);" "@") + (should (equal (get-text-property (point) 'face) + 'cperl-array-face))) + ;; array max index + (with-temp-buffer + (cperl--test-unicode-setup + "$#ärräy = 1;" "$") + (should (equal (get-text-property (point) 'face) + 'cperl-array-face))) + ;; array dereference + (with-temp-buffer + (cperl--test-unicode-setup + "@$ärräy = (1,2,3)" "@") + (should (equal (get-text-property (1- (point)) 'face) + 'cperl-array-face)) + (should (equal (get-text-property (1+ (point)) 'face) + 'font-lock-variable-name-face)))) + +(ert-deftest cperl-test-unicode-hashes () + "Test fontification of hash access." + ;; Perl mode just looks at the sigil, for element access + (skip-unless (eq cperl-test-mode #'cperl-mode)) + ;; simple hash element + (with-temp-buffer + (cperl--test-unicode-setup + "$häsh{'a'} = 7;" "$") + (should (equal (get-text-property (point) 'face) + 'cperl-hash-face))) + ;; hash array slice + (with-temp-buffer + (cperl--test-unicode-setup + "@häsh{(1..3)} = (4..6);" "@") + (should (equal (get-text-property (point) 'face) + 'cperl-hash-face))) + ;; hash subset + (with-temp-buffer + (cperl--test-unicode-setup + "my %hash = %häsh{'a',2,3};" "= %") + (should (equal (get-text-property (point) 'face) + 'cperl-hash-face))) + ;; hash dereference + (with-temp-buffer + (cperl--test-unicode-setup + "%$äsh = (key => 'value');" "%") + (should (equal (get-text-property (1- (point)) 'face) + 'cperl-hash-face)) + (should (equal (get-text-property (1+ (point)) 'face) + 'font-lock-variable-name-face)))) + +(ert-deftest cperl-test-unicode-hashref () + "Verify that a hashref access disambiguates {s}. +CPerl mode takes the token \"s\" as a substitution unless +detected otherwise. Not for perl-mode: it doesn't stringify +bareword hash keys and doesn't recognize a substitution +\"s}foo}bar}\"" + (skip-unless (eq cperl-test-mode #'cperl-mode)) + (with-temp-buffer + (cperl--test-unicode-setup "$häshref->{s} # }}" "{") + (should (equal (get-text-property (point) 'face) + 'font-lock-string-face)) + (should (equal (get-text-property (1+ (point)) 'face) + nil)))) + +(ert-deftest cperl-test-unicode-proto () + ;; perl-mode doesn't fontify prototypes at all + (skip-unless (eq cperl-test-mode #'cperl-mode)) + (with-temp-buffer + (cperl--test-unicode-setup + (concat "sub prötötyped ($) {\n" + " ...;" + "}\n") + "prötötyped (") + + (should (equal (get-text-property (point) 'face) + 'font-lock-string-face)))) + +(ert-deftest cperl-test-unicode-fhs () + (with-temp-buffer + (cperl--test-unicode-setup + (concat "while () {\n" + " ...;)\n" + "}\n") + "while (<") ; point is before the first char of the handle + ;; Testing fontification + ;; FIXME 2021-09-10: perl-mode.el and cperl-mode.el handle these + ;; completely differently. perl-mode interprets barewords as + ;; constants, cperl-mode does not fontify them. Both treat + ;; non-barewords as globs, which are not fontified by perl-mode, + ;; but fontified as strings in cperl-mode. We keep (and test) + ;; that behavior "as is" because both bareword filehandles and + ;; syntax are no longer recommended. + (let ((bareword-face + (if (equal cperl-test-mode 'perl-mode) 'font-lock-constant-face + nil))) + (should (equal (get-text-property (point) 'face) + bareword-face))))) + +(ert-deftest cperl-test-unicode-hashkeys () + "Test stringification of bareword hash keys. Not in perl-mode. +perl-mode generally does not stringify bareword hash keys." + (skip-unless (eq cperl-test-mode #'cperl-mode)) + ;; Plain hash key + (with-temp-buffer + (cperl--test-unicode-setup + "$häsh { kéy }" "{ ") + (should (equal (get-text-property (point) 'face) + 'font-lock-string-face))) + ;; Nested hash key + (with-temp-buffer + (cperl--test-unicode-setup + "$häsh { kéy } { kèy }" "} { ") + (should (equal (get-text-property (point) 'face) + 'font-lock-string-face))) + ;; Key => value + (with-temp-buffer + (cperl--test-unicode-setup + "( kéy => 'value'," "( ") + (should (equal (get-text-property (point) 'face) + 'font-lock-string-face)))) + +(ert-deftest cperl-test-word-at-point () + "Test whether the function captures non-ASCII words." + (skip-unless (eq cperl-test-mode #'cperl-mode)) + (let ((words '("rôle" "café" "ångström" + "Data::Dump::dump" + "_underscore"))) + (dolist (word words) + (with-temp-buffer + (insert " + ") ; this will be the suffix + (beginning-of-line) + (insert ")") ; A non-word char + (insert word) + (should (string= word (cperl-word-at-point-hard))))))) + ;;; Function test: Building an index for imenu (ert-deftest cperl-test-imenu-index () @@ -369,7 +667,8 @@ created by CPerl mode, so skip it for Perl mode." "Versioned::Package::outer" "lexical" "Versioned::Block::signatured" - "Package::in_package_again"))) + "Package::in_package_again" + "Erdős::Number::erdős_number"))) (dolist (sub expected) (should (assoc-string sub index))))))) -- cgit v1.2.3