diff options
Diffstat (limited to 'lisp/international/characters.el')
-rw-r--r-- | lisp/international/characters.el | 138 |
1 files changed, 84 insertions, 54 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 7d625d1382a..ca28222c815 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -303,7 +303,8 @@ with L, LRE, or LRO Unicode bidi character type.") (setq charsets (cdr charsets)))) (modify-category-entry '(#x600 . #x6ff) ?b) (modify-category-entry '(#x870 . #x8ff) ?b) -(modify-category-entry '(#xfb50 . #xfdff) ?b) +(modify-category-entry '(#xfb50 . #xfdcf) ?b) +(modify-category-entry '(#xfdf0 . #xfdff) ?b) (modify-category-entry '(#xfe70 . #xfefe) ?b) ;; Cyrillic character set (ISO-8859-5) @@ -1440,6 +1441,10 @@ Setup `char-width-table' appropriate for non-CJK language environment." (set-char-table-range char-script-table range 'tibetan)) 'tibetan) +;; Fix some exceptions that blocks.awk/Blocks.txt couldn't get right. +(set-char-table-range char-script-table '(#x2ea . #x2eb) 'bopomofo) +(set-char-table-range char-script-table #xab65 'greek) + ;;; Setting unicode-category-table. @@ -1493,6 +1498,9 @@ Setup `char-width-table' appropriate for non-CJK language environment." (aset char-acronym-table #x202D "LRO") ; LEFT-TO-RIGHT OVERRIDE (aset char-acronym-table #x202E "RLO") ; RIGHT-TO-LEFT OVERRIDE (aset char-acronym-table #x2060 "WJ") ; WORD JOINER +(aset char-acronym-table #x2066 "LRI") ; LEFT-TO-RIGHT ISOLATE +(aset char-acronym-table #x2067 "RLI") ; RIGHT-TO-LEFT ISOLATE +(aset char-acronym-table #x2069 "PDI") ; POP DIRECTIONAL ISOLATE (aset char-acronym-table #x206A "ISS") ; INHIBIT SYMMETRIC SWAPPING (aset char-acronym-table #x206B "ASS") ; ACTIVATE SYMMETRIC SWAPPING (aset char-acronym-table #x206C "IAFS") ; INHIBIT ARABIC FORM SHAPING @@ -1517,18 +1525,42 @@ Setup `char-width-table' appropriate for non-CJK language environment." (aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i)))) (aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG +;; We can't use the \N{name} things here, because this file is used +;; too early in the build process. +(defvar bidi-control-characters + '(#x200e ; ?\N{left-to-right mark} + #x200f ; ?\N{right-to-left mark} + #x061c ; ?\N{arabic letter mark} + #x202a ; ?\N{left-to-right embedding} + #x202b ; ?\N{right-to-left embedding} + #x202d ; ?\N{left-to-right override} + #x202e ; ?\N{right-to-left override} + #x2066 ; ?\N{left-to-right isolate} + #x2067 ; ?\N{right-to-left isolate} + #x2068 ; ?\N{first strong isolate} + #x202c ; ?\N{pop directional formatting} + #x2069) ; ?\N{pop directional isolate} + "List of bidirectional control characters.") + +(defun bidi-string-strip-control-characters (string) + "Strip bidi control characters from STRING and return the result." + (apply #'string (seq-filter (lambda (char) + (not (memq char bidi-control-characters))) + string))) + (defun update-glyphless-char-display (&optional variable value) "Make the setting of `glyphless-char-display-control' take effect. This function updates the char-table `glyphless-char-display', and is intended to be used in the `:set' attribute of the option `glyphless-char-display'." - (when value + (when variable (set-default variable value)) (dolist (elt value) (let ((target (car elt)) (method (cdr elt))) - (or (memq method '(zero-width thin-space empty-box acronym hex-code)) - (error "Invalid glyphless character display method: %s" method)) + (unless (memq method '( zero-width thin-space empty-box + acronym hex-code bidi-control)) + (error "Invalid glyphless character display method: %s" method)) (cond ((eq target 'c0-control) (glyphless-set-char-table-range glyphless-char-display #x00 #x1F method) @@ -1543,24 +1575,28 @@ option `glyphless-char-display'." ((eq target 'variation-selectors) (glyphless-set-char-table-range glyphless-char-display #xFE00 #xFE0F method)) - ((eq target 'format-control) + ((or (eq target 'format-control) + (eq target 'bidi-control)) (when unicode-category-table (map-char-table (lambda (char category) - (if (eq category 'Cf) - (let ((this-method method) - from to) - (if (consp char) - (setq from (car char) to (cdr char)) - (setq from char to char)) - (while (<= from to) - (when (/= from #xAD) - (if (eq method 'acronym) - (setq this-method - (aref char-acronym-table from))) + (when (eq category 'Cf) + (let ((this-method method) + from to) + (if (consp char) + (setq from (car char) to (cdr char)) + (setq from char to char)) + (while (<= from to) + (when (/= from #xAD) + (when (eq method 'acronym) + (setq this-method + (or (aref char-acronym-table from) + "UNK"))) + (when (or (eq target 'format-control) + (memq from bidi-control-characters)) (set-char-table-range glyphless-char-display - from this-method)) - (setq from (1+ from)))))) + from this-method))) + (setq from (1+ from)))))) unicode-category-table))) ((eq target 'no-font) (set-char-table-extra-slot glyphless-char-display 0 method)) @@ -1576,6 +1612,19 @@ option `glyphless-char-display'." (set-char-table-range chartable (cons from to) method))) ;;; Control of displaying glyphless characters. +(define-widget 'glyphless-char-display-method 'lazy + "Display method for glyphless characters." + :group 'mule + :format "%v" + :value 'thin-space + :type + '(choice + (const :tag "Don't display" zero-width) + (const :tag "Display as thin space" thin-space) + (const :tag "Display as empty box" empty-box) + (const :tag "Display acronym" acronym) + (const :tag "Display hex code in a box" hex-code))) + (defcustom glyphless-char-display-control '((format-control . thin-space) (variation-selectors . thin-space) @@ -1594,12 +1643,17 @@ GROUP must be one of these symbols: such as U+200C (ZWNJ), U+200E (LRM), but excluding characters that have graphic images, such as U+00AD (SHY). - `variation-selectors': U+FE00..U+FE0F, used for choosing between - glyph variations (e.g. Emoji vs Text - presentation). - `no-font': characters for which no suitable font is found. - For character terminals, characters that cannot - be encoded by `terminal-coding-system'. + `bidi-control': A subset of `format-control', but only characters + that are relevant for bidirectional formatting control, + like U+2069 (PDI) and U+202B (RLE). + `variation-selectors': + Characters in the range U+FE00..U+FE0F, used for + selecting alternate glyph presentations, such as + Emoji vs Text presentation, of the preceding + character(s). + `no-font': For GUI frames, characters for which no suitable + font is found; for text-mode frames, characters + that cannot be encoded by `terminal-coding-system'. METHOD must be one of these symbols: `zero-width': don't display. @@ -1617,36 +1671,12 @@ function (`update-glyphless-char-display'), which updates :version "28.1" :type '(alist :key-type (symbol :tag "Character Group") :value-type (symbol :tag "Display Method")) - :options '((c0-control - (choice (const :tag "Don't display" zero-width) - (const :tag "Display as thin space" thin-space) - (const :tag "Display as empty box" empty-box) - (const :tag "Display acronym" acronym) - (const :tag "Display hex code in a box" hex-code))) - (c1-control - (choice (const :tag "Don't display" zero-width) - (const :tag "Display as thin space" thin-space) - (const :tag "Display as empty box" empty-box) - (const :tag "Display acronym" acronym) - (const :tag "Display hex code in a box" hex-code))) - (format-control - (choice (const :tag "Don't display" zero-width) - (const :tag "Display as thin space" thin-space) - (const :tag "Display as empty box" empty-box) - (const :tag "Display acronym" acronym) - (const :tag "Display hex code in a box" hex-code))) - (variation-selectors - (choice (const :tag "Don't display" zero-width) - (const :tag "Display as thin space" thin-space) - (const :tag "Display as empty box" empty-box) - (const :tag "Display acronym" acronym) - (const :tag "Display hex code in a box" hex-code))) - (no-font - (choice (const :tag "Don't display" zero-width) - (const :tag "Display as thin space" thin-space) - (const :tag "Display as empty box" empty-box) - (const :tag "Display acronym" acronym) - (const :tag "Display hex code in a box" hex-code)))) + :options '((c0-control glyphless-char-display-method) + (c1-control glyphless-char-display-method) + (format-control glyphless-char-display-method) + (bidi-control glyphless-char-display-method) + (variation-selectors glyphless-char-display-method) + (no-font (glyphless-char-display-method :value hex-code))) :set 'update-glyphless-char-display :group 'display) |