summaryrefslogtreecommitdiff
path: root/lisp/net/puny.el
diff options
context:
space:
mode:
authorLars Ingebrigtsen <larsi@gnus.org>2015-12-29 17:46:00 +0100
committerLars Ingebrigtsen <larsi@gnus.org>2015-12-29 17:46:00 +0100
commitd259328fb87db8cc67d52771efcfa653e52c5b71 (patch)
treee80af1fce52d19babcd090e8f4dc56f56c738d65 /lisp/net/puny.el
parentd181366dc39620eb0f249fc3f1d58b6199b9e44d (diff)
downloademacs-d259328fb87db8cc67d52771efcfa653e52c5b71.tar.gz
emacs-d259328fb87db8cc67d52771efcfa653e52c5b71.tar.bz2
emacs-d259328fb87db8cc67d52771efcfa653e52c5b71.zip
Further Unicode restrictive fixups
* puny.el (puny-highly-restrictive-p): Include the extra identifier characters from table 3.
Diffstat (limited to 'lisp/net/puny.el')
-rw-r--r--lisp/net/puny.el31
1 files changed, 27 insertions, 4 deletions
diff --git a/lisp/net/puny.el b/lisp/net/puny.el
index 08da51b587f..ac47e13c97d 100644
--- a/lisp/net/puny.el
+++ b/lisp/net/puny.el
@@ -191,13 +191,36 @@ For instance \"xn--bcher-kva\" => \"bücher\"."
(buffer-string)))
;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection
+;; http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
(defun puny-highly-restrictive-p (string)
(let ((scripts
- (seq-uniq
- (seq-map (lambda (char)
- (aref char-script-table char))
- string))))
+ (delq
+ t
+ (seq-uniq
+ (seq-map (lambda (char)
+ (if (memq char
+ ;; These characters are always allowed
+ ;; in any string.
+ '(#x0027 ; APOSTROPHE
+ #x002D ; HYPHEN-MINUS
+ #x002E ; FULL STOP
+ #x003A ; COLON
+ #x00B7 ; MIDDLE DOT
+ #x058A ; ARMENIAN HYPHEN
+ #x05F3 ; HEBREW PUNCTUATION GERESH
+ #x05F4 ; HEBREW PUNCTUATION GERSHAYIM
+ #x0F0B ; IBETAN MARK INTERSYLLABIC TSHEG
+ #x200C ; ERO WIDTH NON-JOINER*
+ #x200D ; ERO WIDTH JOINER*
+ #x2010 ; YPHEN
+ #x2019 ; IGHT SINGLE QUOTATION MARK
+ #x2027 ; YPHENATION POINT
+ #x30A0 ; KATAKANA-HIRAGANA DOUBLE HYPHEN
+ #x30FB)) ; KATAKANA MIDDLE DOT
+ t
+ (aref char-script-table char)))
+ string)))))
(or
;; Every character uses the same script.
(= (length scripts) 1)