summaryrefslogtreecommitdiff
path: root/test/lisp/progmodes
diff options
context:
space:
mode:
authorHarald Jörg <haj@posteo.de>2021-09-14 17:53:52 +0200
committerHarald Jörg <haj@posteo.de>2021-09-14 17:53:52 +0200
commit3d49ad73e5a93625629c96b6c0b921bb019ea9da (patch)
treed71c35a755bb8fe45c134571dbc3f346fb13595a /test/lisp/progmodes
parent89068554d7d0e9970a7269a0963e7a2bd0b1cc99 (diff)
downloademacs-3d49ad73e5a93625629c96b6c0b921bb019ea9da.tar.gz
emacs-3d49ad73e5a93625629c96b6c0b921bb019ea9da.tar.bz2
emacs-3d49ad73e5a93625629c96b6c0b921bb019ea9da.zip
cperl-mode.el: Allow non-ASCII Perl identifiers
Replace all "A-Z" regexp literals with unicode-aware rx constructs wherever Perl allows non-ASCII identifiers. * lisp/progmodes/cperl-mode.el (cperl-after-sub-regexp) (cperl-after-label. cperl-sniff-for-indent) (cperl-find-pods-heres, cperl-indent-exp) (cperl-fix-line-spacing, cperl-imenu--create-perl-index) (cperl-init-faces, cperl-find-tags): Replace ASCII regex literals by unicode-aware rx constructs. (cperl-init-faces): Eliminate unused lexical `font-lock-anchored'. (cperl-have-help-regexp, cperl-word-at-point-hard): Allow non-ASCII word characters. * test/lisp/progmodes/cperl-mode-tests.el (cperl-test-fontify-special-variables): New test for $^T and $^{VARNAME}. (cperl-test-ws-rx cperl-test-ws+-rx), (cperl-test-version-regexp, cperl-test-package-regexp): Skip for perl-mode. (cperl-test-identifier-rx, cperl--test-unicode-setup) (cperl-test-unicode-labels, cperl-test-unicode-sub) (cperl-test-unicode-varname) (cperl-test-unicode-varname-list, cperl-test-unicode-arrays) (cperl-test-unicode-hashes, cperl-test-unicode-hashref) (cperl-test-unicode-proto, cperl-test-unicode-fhs) (cperl-test-unicode-hashkeys, cperl-test-word-at-point): New tests for unicode identifiers. (cperl-test-imenu-index): Add a unicode identifier to the test. * test/lisp/progmodes/cperl-mode-resources/grammar.pl: Add a function with non-ASCII name for imenu tests.
Diffstat (limited to 'test/lisp/progmodes')
-rw-r--r--test/lisp/progmodes/cperl-mode-resources/grammar.pl14
-rw-r--r--test/lisp/progmodes/cperl-mode-tests.el301
2 files changed, 314 insertions, 1 deletions
diff --git a/test/lisp/progmodes/cperl-mode-resources/grammar.pl b/test/lisp/progmodes/cperl-mode-resources/grammar.pl
index c05fd7efc2a..96a86993082 100644
--- a/test/lisp/progmodes/cperl-mode-resources/grammar.pl
+++ b/test/lisp/progmodes/cperl-mode-resources/grammar.pl
@@ -1,6 +1,7 @@
use 5.024;
use strict;
use warnings;
+use utf8;
sub outside {
say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}'";
@@ -155,4 +156,17 @@ package :: {
Shoved::elsewhere();
+# Finally, try unicode identifiers.
+package Erdős::Number;
+
+sub erdős_number {
+ my $name = shift;
+ if ($name eq "Erdős Pál") {
+ return 0;
+ }
+ else {
+ die "No access to the database. Sorry.";
+ }
+}
+
1;
diff --git a/test/lisp/progmodes/cperl-mode-tests.el b/test/lisp/progmodes/cperl-mode-tests.el
index 54012c3918e..29b9e3f6fb9 100644
--- a/test/lisp/progmodes/cperl-mode-tests.el
+++ b/test/lisp/progmodes/cperl-mode-tests.el
@@ -154,6 +154,22 @@ point in the distant past, and is still broken in perl-mode. "
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-keyword-face))))
+(ert-deftest cperl-test-fontify-special-variables ()
+ "Test fontification of variables like $^T or ${^ENCODING}.
+These can occur as \"local\" aliases."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (insert "local ($^I, ${^UNICODE});\n")
+ (goto-char (point-min))
+ (funcall cperl-test-mode)
+ (font-lock-ensure)
+ (search-forward "$")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "$")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))))
+
(ert-deftest cperl-test-identify-heredoc ()
"Test whether a construct containing \"<<\" followed by a
bareword is properly identified for a here-document if
@@ -297,6 +313,7 @@ the whole string."
(ert-deftest cperl-test-ws-rx ()
"Tests capture of very simple regular expressions (yawn)."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'(" " "\t" "\n"))
(invalid
@@ -306,6 +323,7 @@ the whole string."
(ert-deftest cperl-test-ws+-rx ()
"Tests sequences of whitespace and comment lines."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
`(" " "\t#\n" "\n# \n"
,(concat "# comment\n" "# comment\n" "\n" "#comment\n")))
@@ -316,6 +334,7 @@ the whole string."
(ert-deftest cperl-test-version-regexp ()
"Tests the regexp for recommended syntax of versions in Perl."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("1" "1.1" "1.1_1" "5.032001"
"v120.100.103"))
@@ -331,6 +350,7 @@ the whole string."
(ert-deftest cperl-test-package-regexp ()
"Tests the regular expression of Perl package names with versions.
Also includes valid cases with whitespace in strange places."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("package Foo"
"package Foo::Bar"
@@ -346,6 +366,284 @@ Also includes valid cases with whitespace in strange places."
(cperl-test--validate-regexp (rx (eval cperl--package-rx))
valid invalid)))
+(ert-deftest cperl-test-identifier-rx ()
+ "Test valid and invalid identifiers (no sigils)."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (let ((valid
+ '("foo" "FOO" "f_oo" "a123"
+ "manĝis")) ; Unicode is allowed!
+ (invalid
+ '("$foo" ; no sigils allowed (yet)
+ "Foo::bar" ; no package qualifiers allowed
+ "lots_of_€"))) ; € is not alphabetic
+ (cperl-test--validate-regexp (rx (eval cperl--basic-identifier-rx))
+ valid invalid)))
+
+;;; Test unicode identifier in various places
+
+(defun cperl--test-unicode-setup (code string)
+ "Insert CODE, prepare it for tests, and find STRING.
+Invoke the appropriate major mode, ensure fontification, and set
+point after the first occurrence of STRING (no regexp!)."
+ (insert code)
+ (funcall cperl-test-mode)
+ (font-lock-ensure)
+ (goto-char (point-min))
+ (search-forward string))
+
+(ert-deftest cperl-test-unicode-labels ()
+ "Verify that non-ASCII labels are processed correctly."
+ (with-temp-buffer
+ (cperl--test-unicode-setup "LABEł: for ($manĝi) { say; }" "LAB")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-constant-face))))
+
+(ert-deftest cperl-test-unicode-sub ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "use strict;\n" ; distinguish bob from b-o-f
+ "sub ℏ {\n"
+ " 6.62607015e-34\n"
+ "};")
+ "sub ") ; point is before "ℏ"
+
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: This tests succeeds because cperl-mode
+ ;; accepts almost anything as a sub name for fontification. For
+ ;; example, it fontifies "sub @ {...;}" which is a syntax error in
+ ;; Perl. I let this pass for the moment.
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-function-name-face))
+
+ ;; Testing `beginning-of-defun'. Not available in perl-mode,
+ ;; where it jumps to the beginning of the buffer.
+ (when (eq cperl-test-mode #'cperl-mode)
+ (goto-char (point-min))
+ (search-forward "-34")
+ (beginning-of-defun)
+ (should (looking-at "sub")))))
+
+(ert-deftest cperl-test-unicode-varname ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "use strict;\n"
+ "my $π = 3.1415926535897932384626433832795028841971;\n"
+ "\n"
+ "my $manĝi = $π;\n"
+ "__END__\n")
+ "my $") ; perl-mode doesn't fontify the sigil, so include it here
+
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: This test succeeds in cperl-mode because the
+ ;; π character is "not ASCII alphabetic", so it treats $π as a
+ ;; punctuation variable. The following two `should' forms with a
+ ;; longer variable name were added for stronger verification.
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ ;; Test both ends of a longer variable name
+ (search-forward "my $") ; again skip the sigil
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "manĝi")
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-varname-list ()
+ "Verify that all elements of a variable list are fontified."
+
+ (let ((hash-face (if (eq cperl-test-mode #'perl-mode)
+ 'perl-non-scalar-variable
+ 'cperl-hash-face))
+ (array-face (if (eq cperl-test-mode #'perl-mode)
+ 'perl-non-scalar-variable
+ 'cperl-array-face)))
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "my (%äsh,@ärräy,$scâlâr);" "%")
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "@")
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "scâlâr")
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face)))
+
+ ;; Now with package-qualified variables
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "local (%Søme::äsh,@Søme::ärräy,$Søme::scâlâr);" "%")
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "Søme::") ; test basic identifier
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "@") ; test package name
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "Søme::") ; test basic identifier
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "Søme") ; test package name
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "scâlâr") ; test basic identifier
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face)))))
+
+(ert-deftest cperl-test-unicode-arrays ()
+ "Test fontification of array access."
+ ;; Perl mode just looks at the sigil, for element access
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; simple array element
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$ärräy[1] = 7;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array slice
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@ärräy[(1..3)] = (4..6);" "@")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array max index
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$#ärräy = 1;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array dereference
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@$ärräy = (1,2,3)" "@")
+ (should (equal (get-text-property (1- (point)) 'face)
+ 'cperl-array-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-hashes ()
+ "Test fontification of hash access."
+ ;; Perl mode just looks at the sigil, for element access
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; simple hash element
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh{'a'} = 7;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash array slice
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@häsh{(1..3)} = (4..6);" "@")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash subset
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "my %hash = %häsh{'a',2,3};" "= %")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash dereference
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "%$äsh = (key => 'value');" "%")
+ (should (equal (get-text-property (1- (point)) 'face)
+ 'cperl-hash-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-hashref ()
+ "Verify that a hashref access disambiguates {s}.
+CPerl mode takes the token \"s\" as a substitution unless
+detected otherwise. Not for perl-mode: it doesn't stringify
+bareword hash keys and doesn't recognize a substitution
+\"s}foo}bar}\""
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (cperl--test-unicode-setup "$häshref->{s} # }}" "{")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ nil))))
+
+(ert-deftest cperl-test-unicode-proto ()
+ ;; perl-mode doesn't fontify prototypes at all
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "sub prötötyped ($) {\n"
+ " ...;"
+ "}\n")
+ "prötötyped (")
+
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))))
+
+(ert-deftest cperl-test-unicode-fhs ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "while (<BAREWÖRD>) {\n"
+ " ...;)\n"
+ "}\n")
+ "while (<") ; point is before the first char of the handle
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: perl-mode.el and cperl-mode.el handle these
+ ;; completely differently. perl-mode interprets barewords as
+ ;; constants, cperl-mode does not fontify them. Both treat
+ ;; non-barewords as globs, which are not fontified by perl-mode,
+ ;; but fontified as strings in cperl-mode. We keep (and test)
+ ;; that behavior "as is" because both bareword filehandles and
+ ;; <glob> syntax are no longer recommended.
+ (let ((bareword-face
+ (if (equal cperl-test-mode 'perl-mode) 'font-lock-constant-face
+ nil)))
+ (should (equal (get-text-property (point) 'face)
+ bareword-face)))))
+
+(ert-deftest cperl-test-unicode-hashkeys ()
+ "Test stringification of bareword hash keys. Not in perl-mode.
+perl-mode generally does not stringify bareword hash keys."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; Plain hash key
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh { kéy }" "{ ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face)))
+ ;; Nested hash key
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh { kéy } { kèy }" "} { ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face)))
+ ;; Key => value
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "( kéy => 'value'," "( ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))))
+
+(ert-deftest cperl-test-word-at-point ()
+ "Test whether the function captures non-ASCII words."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (let ((words '("rôle" "café" "ångström"
+ "Data::Dump::dump"
+ "_underscore")))
+ (dolist (word words)
+ (with-temp-buffer
+ (insert " + ") ; this will be the suffix
+ (beginning-of-line)
+ (insert ")") ; A non-word char
+ (insert word)
+ (should (string= word (cperl-word-at-point-hard)))))))
+
;;; Function test: Building an index for imenu
(ert-deftest cperl-test-imenu-index ()
@@ -369,7 +667,8 @@ created by CPerl mode, so skip it for Perl mode."
"Versioned::Package::outer"
"lexical"
"Versioned::Block::signatured"
- "Package::in_package_again")))
+ "Package::in_package_again"
+ "Erdős::Number::erdős_number")))
(dolist (sub expected)
(should (assoc-string sub index)))))))