summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp
diff options
context:
space:
mode:
authorMattias Engdegård <mattiase@acm.org>2019-02-15 19:27:48 +0100
committerMattias Engdegård <mattiase@acm.org>2019-02-16 12:43:32 +0100
commit478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3 (patch)
tree7d05c376a0299282d291eff879eedcc6f3d2651d /lisp/emacs-lisp
parentaff0c585060b7cc92d52a32978c6aa64cf7e2a5e (diff)
downloademacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.tar.gz
emacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.tar.bz2
emacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.zip
Prevent over-eager rx character range condensation
`rx' incorrectly considers character ranges between ASCII and raw bytes to cover all codes in-between, which includes all non-ASCII Unicode chars. This causes (any "\000-\377" ?Å) to be simplified to (any "\000-\377"), which is not at all the same thing: [\000-\377] really means [\000-\177\200-\377] (Bug#34492). * lisp/emacs-lisp/rx.el (rx-any-condense-range): Split ranges going from ASCII to raw bytes. * test/lisp/emacs-lisp/rx-tests.el (rx-char-any-raw-byte): Add test case. * etc/NEWS: Mention the overall change (Bug#33205).
Diffstat (limited to 'lisp/emacs-lisp')
-rw-r--r--lisp/emacs-lisp/rx.el7
1 files changed, 7 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index b2299030a1b..715cd608c46 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -429,6 +429,13 @@ Only both edges of each range is checked."
;; set L list of all ranges
(mapc (lambda (e) (cond ((stringp e) (push e str))
((numberp e) (push (cons e e) l))
+ ;; Ranges between ASCII and raw bytes are split,
+ ;; to prevent accidental inclusion of Unicode
+ ;; characters later on.
+ ((and (<= (car e) #x7f)
+ (>= (cdr e) #x3fff80))
+ (push (cons (car e) #x7f) l)
+ (push (cons #x3fff80 (cdr e)) l))
(t (push e l))))
args)
;; condense overlapped ranges in L