summaryrefslogtreecommitdiff
path: root/lisp/progmodes/cperl-mode.el
diff options
context:
space:
mode:
authorHarald Jörg <haj@posteo.de>2021-09-14 17:53:52 +0200
committerHarald Jörg <haj@posteo.de>2021-09-14 17:53:52 +0200
commit3d49ad73e5a93625629c96b6c0b921bb019ea9da (patch)
treed71c35a755bb8fe45c134571dbc3f346fb13595a /lisp/progmodes/cperl-mode.el
parent89068554d7d0e9970a7269a0963e7a2bd0b1cc99 (diff)
downloademacs-3d49ad73e5a93625629c96b6c0b921bb019ea9da.tar.gz
emacs-3d49ad73e5a93625629c96b6c0b921bb019ea9da.tar.bz2
emacs-3d49ad73e5a93625629c96b6c0b921bb019ea9da.zip
cperl-mode.el: Allow non-ASCII Perl identifiers
Replace all "A-Z" regexp literals with unicode-aware rx constructs wherever Perl allows non-ASCII identifiers. * lisp/progmodes/cperl-mode.el (cperl-after-sub-regexp) (cperl-after-label. cperl-sniff-for-indent) (cperl-find-pods-heres, cperl-indent-exp) (cperl-fix-line-spacing, cperl-imenu--create-perl-index) (cperl-init-faces, cperl-find-tags): Replace ASCII regex literals by unicode-aware rx constructs. (cperl-init-faces): Eliminate unused lexical `font-lock-anchored'. (cperl-have-help-regexp, cperl-word-at-point-hard): Allow non-ASCII word characters. * test/lisp/progmodes/cperl-mode-tests.el (cperl-test-fontify-special-variables): New test for $^T and $^{VARNAME}. (cperl-test-ws-rx cperl-test-ws+-rx), (cperl-test-version-regexp, cperl-test-package-regexp): Skip for perl-mode. (cperl-test-identifier-rx, cperl--test-unicode-setup) (cperl-test-unicode-labels, cperl-test-unicode-sub) (cperl-test-unicode-varname) (cperl-test-unicode-varname-list, cperl-test-unicode-arrays) (cperl-test-unicode-hashes, cperl-test-unicode-hashref) (cperl-test-unicode-proto, cperl-test-unicode-fhs) (cperl-test-unicode-hashkeys, cperl-test-word-at-point): New tests for unicode identifiers. (cperl-test-imenu-index): Add a unicode identifier to the test. * test/lisp/progmodes/cperl-mode-resources/grammar.pl: Add a function with non-ASCII name for imenu tests.
Diffstat (limited to 'lisp/progmodes/cperl-mode.el')
-rw-r--r--lisp/progmodes/cperl-mode.el330
1 files changed, 231 insertions, 99 deletions
diff --git a/lisp/progmodes/cperl-mode.el b/lisp/progmodes/cperl-mode.el
index 76c82f8c73e..1147889969b 100644
--- a/lisp/progmodes/cperl-mode.el
+++ b/lisp/progmodes/cperl-mode.el
@@ -1407,7 +1407,7 @@ the last)."
(concat ; Assume n groups before this...
"\\(" ; n+1=name-group
cperl-white-and-comment-rex ; n+2=pre-name
- "\\(::[a-zA-Z_0-9:']+\\|[a-zA-Z_'][a-zA-Z_0-9:']*\\)" ; n+3=name
+ (rx-to-string `(group ,cperl--normal-identifier-rx))
"\\)" ; END n+1=name-group
(if named "" "?")
"\\(" ; n+4=proto-group
@@ -2573,7 +2573,8 @@ Return the amount the indentation changed by."
'(?w ?_))
(progn
(backward-sexp)
- (looking-at "[a-zA-Z_][a-zA-Z0-9_]*:[^:]"))))
+ (looking-at (rx (sequence (eval cperl--label-rx)
+ (not (in ":"))))))))
(defun cperl-get-state (&optional parse-start start-state)
"Return list (START STATE DEPTH PRESTART),
@@ -2740,7 +2741,9 @@ Will not look before LIM."
(progn
(forward-sexp -1)
(skip-chars-backward " \t")
- (looking-at "[ \t]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:")))
+ (looking-at
+ (rx (sequence (0+ blank)
+ (eval cperl--label-rx))))))
(get-text-property (point) 'first-format-line)))
;; Look at previous line that's at column 0
@@ -3836,7 +3839,8 @@ recursive calls in starting lines of here-documents."
"\\<" cperl-sub-regexp "\\>" ; sub with proto/attr
"\\("
cperl-white-and-comment-rex
- "\\(::[a-zA-Z_:'0-9]*\\|[a-zA-Z_'][a-zA-Z_:'0-9]*\\)\\)?" ; name
+ (rx (group (eval cperl--normal-identifier-rx)))
+ "\\)"
"\\("
cperl-maybe-white-and-comment-rex
"\\(([^()]*)\\|:[^:]\\)\\)" ; prototype or attribute start
@@ -4111,10 +4115,12 @@ recursive calls in starting lines of here-documents."
(t t))))
;; <file> or <$file>
(and (eq c ?\<)
- ;; Do not stringify <FH>, <$fh> :
+ ;; Stringify what looks like a glob, but
+ ;; do not stringify file handles <FH>, <$fh> :
(save-match-data
(looking-at
- "\\$?\\([_a-zA-Z:][_a-zA-Z0-9:]*\\)?>"))))
+ (rx (sequence (opt "$")
+ (eval cperl--normal-identifier-rx)))))))
tb (match-beginning 0))
(goto-char (match-beginning b1))
(cperl-backward-to-noncomment (point-min))
@@ -4184,7 +4190,16 @@ recursive calls in starting lines of here-documents."
(error nil)))
(if (or bb
(looking-at ; $foo -> {s}
- "[$@]\\$*\\([a-zA-Z0-9_:]+\\|[^{]\\)\\([ \t\n]*->\\)?[ \t\n]*{")
+ (rx
+ (sequence
+ (in "$@") (0+ "$")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (not (in "{")))
+ (opt (sequence (eval cperl--ws*-rx))
+ "->")
+ (eval cperl--ws*-rx)
+ "{")))
(and ; $foo[12] -> {s}
(memq (following-char) '(?\{ ?\[))
(progn
@@ -4199,7 +4214,12 @@ recursive calls in starting lines of here-documents."
(setq bb t))
((and (eq (following-char) ?:)
(eq b1 ?\{) ; Check for $ { s::bar }
- (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
+ ;; (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
+ (looking-at
+ (rx (sequence "::"
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--ws*-rx)
+ "}")))
(progn
(goto-char (1- go))
(skip-chars-backward " \t\n\f")
@@ -4364,7 +4384,7 @@ recursive calls in starting lines of here-documents."
"\\(" ;; XXXX 1-char variables, exc. |()\s
"[$@]"
"\\("
- "[_a-zA-Z:][_a-zA-Z0-9:]*"
+ (rx (eval cperl--normal-identifier-rx))
"\\|"
"{[^{}]*}" ; only one-level allowed
"\\|"
@@ -4820,6 +4840,7 @@ recursive calls in starting lines of here-documents."
(progn
(backward-sexp)
;; sub {BLK}, print {BLK} $data, but NOT `bless', `return', `tr', `constant'
+ ;; a-zA-Z is fine here, these are Perl keywords
(or (and (looking-at "[a-zA-Z0-9_:]+[ \t\n\f]*[{#]") ; Method call syntax
(not (looking-at "\\(bless\\|return\\|q[wqrx]?\\|tr\\|[smy]\\|constant\\)\\>")))
;; sub bless::foo {}
@@ -5028,7 +5049,11 @@ conditional/loop constructs."
cperl-maybe-white-and-comment-rex
"\\(state\\|my\\|local\\|our\\)\\)?"
cperl-maybe-white-and-comment-rex
- "\\$[_a-zA-Z0-9]+\\)?\\)\\>"))
+ (rx
+ (sequence
+ "$"
+ (eval cperl--basic-identifier-rx)))
+ "\\)?\\)\\>"))
(progn
(goto-char top)
(forward-sexp 1)
@@ -5122,7 +5147,14 @@ Returns some position at the last line."
;; Looking at:
;; foreach my $var (
(if (looking-at
- "[ \t]*\\<for\\(each\\)?[ \t]+\\(state\\|my\\|local\\|our\\)[ \t]*\\$[_a-zA-Z0-9]+\\(\t*\\|[ \t][ \t]+\\)[^ \t\n#]")
+ (rx (sequence (0+ blank) symbol-start
+ "for" (opt "each")
+ (1+ blank)
+ (or "state" "my" "local" "our")
+ (0+ blank)
+ "$" (eval cperl--basic-identifier-rx)
+ (1+ blank)
+ (not (in " \t\n#")))))
(progn
(forward-sexp 3)
(delete-horizontal-space)
@@ -5132,9 +5164,25 @@ Returns some position at the last line."
;; Looking at (with or without "}" at start, ending after "({"):
;; } foreach my $var () OR {
(if (looking-at
- "[ \t]*\\(}[ \t]*\\)?\\<\\(els\\(e\\|if\\)\\|continue\\|if\\|unless\\|while\\|for\\(each\\)?\\(\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\$[_a-zA-Z0-9]+\\)?\\|until\\)\\>\\([ \t]*(\\|[ \t\n]*{\\)\\|[ \t]*{")
+ (rx (sequence
+ (0+ blank)
+ (opt (sequence "}" (0+ blank) ))
+ symbol-start
+ (or "else" "elsif" "continue" "if" "unless" "while" "until"
+ (sequence (or "for" "foreach")
+ (opt
+ (opt (sequence (1+ blank)
+ (or "state" "my" "local" "our")))
+ (0+ blank)
+ "$" (eval cperl--basic-identifier-rx))))
+ symbol-end
+ (group-n 1
+ (or
+ (or (sequence (0+ blank) "(")
+ (sequence (eval cperl--ws*-rx) "{"))
+ (sequence (0+ blank) "{"))))))
(progn
- (setq ml (match-beginning 8)) ; "(" or "{" after control word
+ (setq ml (match-beginning 1)) ; "(" or "{" after control word
(re-search-forward "[({]")
(forward-char -1)
(setq p (point))
@@ -5544,7 +5592,11 @@ comment, or POD."
(setq lst index-sub-alist)
(while lst
(setq elt (car lst) lst (cdr lst))
- (cond ((string-match "\\(::\\|'\\)[_a-zA-Z0-9]+$" (car elt))
+ (cond ((string-match
+ (rx (sequence (or "::" "'")
+ (eval cperl--basic-identifier-rx)
+ string-end))
+ (car elt))
(setq pack (substring (car elt) 0 (match-beginning 0)))
(if (setq group (assoc pack hier-list))
(if (listp (cdr group))
@@ -5646,8 +5698,7 @@ default function."
(defun cperl-init-faces ()
(condition-case errs
(progn
- (let (t-font-lock-keywords t-font-lock-keywords-1 font-lock-anchored)
- (setq font-lock-anchored t)
+ (let (t-font-lock-keywords t-font-lock-keywords-1)
(setq
t-font-lock-keywords
(list
@@ -5760,20 +5811,41 @@ default function."
(if (eq (char-after (cperl-1- (match-end 0))) ?\{ )
'font-lock-function-name-face
'font-lock-variable-name-face))))
- '("\\<\\(package\\|require\\|use\\|import\\|no\\|bootstrap\\)[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t;]" ; require A if B;
- 2 font-lock-function-name-face)
+ `(,(rx (sequence symbol-start
+ (or "package" "require" "use" "import"
+ "no" "bootstrap")
+ (eval cperl--ws+-rx)
+ (group-n 1 (eval cperl--normal-identifier-rx))
+ (any " \t;"))) ; require A if B;
+ 1 font-lock-function-name-face)
'("^[ \t]*format[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t]*=[ \t]*$"
1 font-lock-function-name-face)
- (cond (font-lock-anchored
- '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- (2 font-lock-string-face t)
- ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- nil nil
- (1 font-lock-string-face t))))
- (t '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- 2 font-lock-string-face t)))
- '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
- font-lock-string-face t)
+ ;; bareword hash key: $foo{bar}
+ `(,(rx (or (in "]}\\%@>*&") ; What Perl is this?
+ (sequence "$" (eval cperl--normal-identifier-rx)))
+ (0+ blank) "{" (0+ blank)
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "}")
+;; '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
+ (1 font-lock-string-face t)
+ ;; anchored bareword hash key: $foo{bar}{baz}
+ (,(rx point
+ (0+ blank) "{" (0+ blank)
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "}")
+ ;; ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
+ nil nil
+ (1 font-lock-string-face t)))
+ ;; hash element assignments with bareword key => value
+ `(,(rx (in "[ \t{,()")
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "=>")
+ 1 font-lock-string-face t)
+;; '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
+;; font-lock-string-face t)
;; labels
`(,(rx
(sequence
@@ -5797,83 +5869,130 @@ default function."
;;; '("[$*]{?\\(\\sw+\\)" 1 font-lock-variable-name-face)
;;; '("\\([@%]\\|\\$#\\)\\(\\sw+\\)"
;;; (2 (cons font-lock-variable-name-face '(underline))))
- (cond (font-lock-anchored
;; 1=my_etc, 2=white? 3=(+white? 4=white? 5=var
- `(,(concat "\\<\\(state\\|my\\|local\\|our\\)"
- cperl-maybe-white-and-comment-rex
- "\\(("
- cperl-maybe-white-and-comment-rex
- "\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
- (5 ,(if cperl-font-lock-multiline
- 'font-lock-variable-name-face
- '(progn (setq cperl-font-lock-multiline-start
- (match-beginning 0))
- 'font-lock-variable-name-face)))
- (,(concat "\\="
- cperl-maybe-white-and-comment-rex
- ","
- cperl-maybe-white-and-comment-rex
- "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
- ;; Bug in font-lock: limit is used not only to limit
- ;; searches, but to set the "extend window for
- ;; facification" property. Thus we need to minimize.
- ,(if cperl-font-lock-multiline
- '(if (match-beginning 3)
- (save-excursion
- (goto-char (match-beginning 3))
- (condition-case nil
- (forward-sexp 1)
- (error
- (condition-case nil
- (forward-char 200)
- (error nil)))) ; typeahead
- (1- (point))) ; report limit
- (forward-char -2)) ; disable continued expr
- '(if (match-beginning 3)
- (point-max) ; No limit for continuation
- (forward-char -2))) ; disable continued expr
- ,(if cperl-font-lock-multiline
- nil
- '(progn ; Do at end
- ;; "my" may be already fontified (POD),
- ;; so cperl-font-lock-multiline-start is nil
- (if (or (not cperl-font-lock-multiline-start)
- (> 2 (count-lines
- cperl-font-lock-multiline-start
- (point))))
- nil
- (put-text-property
- (1+ cperl-font-lock-multiline-start) (point)
- 'syntax-type 'multiline))
- (setq cperl-font-lock-multiline-start nil)))
- (3 font-lock-variable-name-face))))
- (t '("^[ \t{}]*\\(state\\|my\\|local\\|our\\)[ \t]*\\(([ \t]*\\)?\\([$@%*][a-zA-Z0-9_:]+\\)"
- 3 font-lock-variable-name-face)))
- '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
+ `(,(rx (sequence (or "state" "my" "local" "our"))
+ (eval cperl--ws*-rx)
+ (opt (sequence "(" (eval cperl--ws*-rx)))
+ (group
+ (in "$@%*")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--special-identifier-rx))
+ )
+ )
+ ;; (concat "\\<\\(state\\|my\\|local\\|our\\)"
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\(("
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
+ ;; (5 ,(if cperl-font-lock-multiline
+ (1 ,(if cperl-font-lock-multiline
+ 'font-lock-variable-name-face
+ '(progn (setq cperl-font-lock-multiline-start
+ (match-beginning 0))
+ 'font-lock-variable-name-face)))
+ (,(rx (sequence point
+ (eval cperl--ws*-rx)
+ ","
+ (eval cperl--ws*-rx)
+ (group
+ (in "$@%*")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--special-identifier-rx))
+ )
+ )
+ )
+ ;; ,(concat "\\="
+ ;; cperl-maybe-white-and-comment-rex
+ ;; ","
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
+ ;; Bug in font-lock: limit is used not only to limit
+ ;; searches, but to set the "extend window for
+ ;; facification" property. Thus we need to minimize.
+ ,(if cperl-font-lock-multiline
+ '(if (match-beginning 1)
+ (save-excursion
+ (goto-char (match-beginning 1))
+ (condition-case nil
+ (forward-sexp 1)
+ (error
+ (condition-case nil
+ (forward-char 200)
+ (error nil)))) ; typeahead
+ (1- (point))) ; report limit
+ (forward-char -2)) ; disable continued expr
+ '(if (match-beginning 1)
+ (point-max) ; No limit for continuation
+ (forward-char -2))) ; disable continued expr
+ ,(if cperl-font-lock-multiline
+ nil
+ '(progn ; Do at end
+ ;; "my" may be already fontified (POD),
+ ;; so cperl-font-lock-multiline-start is nil
+ (if (or (not cperl-font-lock-multiline-start)
+ (> 2 (count-lines
+ cperl-font-lock-multiline-start
+ (point))))
+ nil
+ (put-text-property
+ (1+ cperl-font-lock-multiline-start) (point)
+ 'syntax-type 'multiline))
+ (setq cperl-font-lock-multiline-start nil)))
+ (1 font-lock-variable-name-face)))
+ ;; foreach my $foo (
+ `(,(rx symbol-start "for" (opt "each")
+ (opt (sequence (1+ blank)
+ (or "state" "my" "local" "our")))
+ (0+ blank)
+ (group-n 1 (sequence "$"
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "(")
+;; '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
4 font-lock-variable-name-face)
;; Avoid $!, and s!!, qq!! etc. when not fontifying syntactically
'("\\(?:^\\|[^smywqrx$]\\)\\(!\\)" 1 font-lock-negation-char-face)
'("\\[\\(\\^\\)" 1 font-lock-negation-char-face prepend)))
(setq
t-font-lock-keywords-1
- '(
- ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
+ `(
+ ;; arrays and hashes. Access to elements is fixed below
+ (,(rx (group-n 1 (group-n 2 (or (in "@%") "$#"))
+ (eval cperl--normal-identifier-rx)))
+ 1
+;; ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
(if (eq (char-after (match-beginning 2)) ?%)
'cperl-hash-face
'cperl-array-face)
nil) ; arrays and hashes
- ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
+ ;; access to array/hash elements
+ (,(rx (group-n 1 (group-n 2 (in "$@%"))
+ (eval cperl--normal-identifier-rx))
+ (0+ blank)
+ (group-n 3 (in "[{")))
+;; ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
1
(if (= (- (match-end 2) (match-beginning 2)) 1)
(if (eq (char-after (match-beginning 3)) ?{)
'cperl-hash-face
'cperl-array-face) ; arrays and hashes
font-lock-variable-name-face) ; Just to put something
- t)
- ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
+ t) ; override previous
+ ;; @$ array dereferences, $#$ last array index
+ (,(rx (group-n 1 (or "@" "$#"))
+ (group-n 2 (sequence "$"
+ (or (eval cperl--normal-identifier-rx)
+ (not (in " \t\n"))))))
+ ;; ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-array-face)
(2 font-lock-variable-name-face))
- ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
+ ;; %$ hash dereferences
+ (,(rx (group-n 1 "%")
+ (group-n 2 (sequence "$"
+ (or (eval cperl--normal-identifier-rx)
+ (not (in " \t\n"))))))
+ ;; ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-hash-face)
(2 font-lock-variable-name-face))
;;("\\([smy]\\|tr\\)\\([^a-z_A-Z0-9]\\)\\(\\([^\n\\]*||\\)\\)\\2")
@@ -6435,6 +6554,8 @@ Will not move the position at the start to the left."
(indent-region beg end nil)
(goto-char beg)
(setq col (current-column))
+ ;; Assuming that lineup is done on Perl syntax, this regexp
+ ;; doesn't need to be unicode aware -- haj, 2021-09-10
(if (looking-at "[a-zA-Z0-9_]")
(if (looking-at "\\<[a-zA-Z0-9_]+\\>")
(setq search
@@ -6472,6 +6593,9 @@ Will not move the position at the start to the left."
"Run etags with appropriate options for Perl files.
If optional argument ALL is `recursive', will process Perl files
in subdirectories too."
+ ;; Apparently etags doesn't support UTF-8 encoded sources, and usage
+ ;; of etags has been commented out in the menu since ... well,
+ ;; forever. So, let's just stick to ASCII here. -- haj, 2021-09-14
(interactive)
(let ((cmd "etags")
(args `("-l" "none" "-r"
@@ -6611,6 +6735,9 @@ Does not move point."
;; Search for the function
(progn ;;save-match-data
(while (re-search-forward
+ ;; FIXME: Should XS code be unicode aware? Recent C
+ ;; compilers (Gcc 10+) are, but I guess this isn't used
+ ;; much. -- haj, 2021-09-14
"^\\([ \t]*MODULE\\>[^\n]*\\<PACKAGE[ \t]*=[ \t]*\\([a-zA-Z_][a-zA-Z_0-9:]*\\)\\>\\|\\([a-zA-Z_][a-zA-Z_0-9]*\\)(\\|[ \t]*BOOT:\\)"
nil t)
(cond
@@ -6673,7 +6800,7 @@ Does not move point."
(setq lst
(mapcar
(lambda (elt)
- (cond ((string-match "^[_a-zA-Z]" (car elt))
+ (cond ((string-match (rx line-start (or alpha "_")) (car elt))
(goto-char (cdr elt))
(beginning-of-line) ; pos should be of the start of the line
(list (car elt)
@@ -6703,9 +6830,14 @@ Does not move point."
","
(number-to-string (1- (elt elt 1))) ; Char pos 0-based
"\n")
- (if (and (string-match "^[_a-zA-Z]+::" (car elt))
- (string-match (concat "^" cperl-sub-regexp "[ \t]+\\([_a-zA-Z]+\\)[^:_a-zA-Z]")
- (elt elt 3)))
+ (if (and (string-match (rx line-start
+ (eval cperl--basic-identifier-rx) "++")
+ (car elt))
+ (string-match (rx-to-string `(sequence line-start
+ (regexp ,cperl-sub-regexp)
+ (1+ (in " \t"))
+ ,cperl--normal-identifier-rx))
+ (elt elt 3)))
;; Need to insert the name without package as well
(setq lst (cons (cons (substring (elt elt 3)
(match-beginning 1)
@@ -7155,14 +7287,14 @@ Currently it is tuned to C and Perl syntax."
;;(concat "\\("
(mapconcat
#'identity
- '("[$@%*&][0-9a-zA-Z_:]+\\([ \t]*[[{]\\)?" ; Usual variable
+ '("[$@%*&][[:alnum:]_:]+\\([ \t]*[[{]\\)?" ; Usual variable
"[$@]\\^[a-zA-Z]" ; Special variable
"[$@][^ \n\t]" ; Special variable
"-[a-zA-Z]" ; File test
"\\\\[a-zA-Z0]" ; Special chars
"^=[a-z][a-zA-Z0-9_]*" ; POD sections
"[-!&*+,./<=>?\\^|~]+" ; Operator
- "[a-zA-Z_0-9:]+" ; symbol or number
+ "[[:alnum:]_:]+" ; symbol or number
"x="
"#!")
;;"\\)\\|\\("
@@ -7178,7 +7310,7 @@ Currently it is tuned to C and Perl syntax."
;; Does not save-excursion
;; Get to the something meaningful
(or (eobp) (eolp) (forward-char 1))
- (re-search-backward "[-a-zA-Z0-9_:!&*+,./<=>?\\^|~$%@]"
+ (re-search-backward "[-[:alnum:]_:!&*+,./<=>?\\^|~$%@]"
(point-at-bol)
'to-beg)
;; (cond
@@ -7187,8 +7319,8 @@ Currently it is tuned to C and Perl syntax."
;; (or (bobp) (backward-char 1))))
;; Try to backtrace
(cond
- ((looking-at "[a-zA-Z0-9_:]") ; symbol
- (skip-chars-backward "a-zA-Z0-9_:")
+ ((looking-at "[[:alnum:]_:]") ; symbol
+ (skip-chars-backward "[:alnum:]_:")
(cond
((and (eq (preceding-char) ?^) ; $^I
(eq (char-after (- (point) 2)) ?\$))
@@ -7199,7 +7331,7 @@ Currently it is tuned to C and Perl syntax."
(eq (current-column) 1))
(forward-char -1))) ; =head1
(if (and (eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <FH>
+ (looking-at "\\$?[[:alnum:]_:]+>")) ; <FH>
(forward-char -1)))
((and (looking-at "=") (eq (preceding-char) ?x)) ; x=
(forward-char -1))
@@ -7212,15 +7344,15 @@ Currently it is tuned to C and Perl syntax."
(not (eq (char-after (- (point) 2)) ?\$))) ; $-
(forward-char -1))
((and (eq (following-char) ?\>)
- (string-match "[a-zA-Z0-9_]" (char-to-string (preceding-char)))
+ (string-match "[[:alnum:]_]" (char-to-string (preceding-char)))
(save-excursion
(forward-sexp -1)
(and (eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")))) ; <FH>
+ (looking-at "\\$?[[:alnum:]_:]+>")))) ; <FH>
(search-backward "<"))))
((and (eq (following-char) ?\$)
(eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <$fh>
+ (looking-at "\\$?[[:alnum:]_:]+>")) ; <$fh>
(forward-char -1)))
(if (looking-at cperl-have-help-regexp)
(buffer-substring (match-beginning 0) (match-end 0))))