summaryrefslogtreecommitdiff
path: root/lisp/language/misc-lang.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/language/misc-lang.el')
-rw-r--r--lisp/language/misc-lang.el195
1 files changed, 191 insertions, 4 deletions
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
index 2843c7c9038..3d5b68f84be 100644
--- a/lisp/language/misc-lang.el
+++ b/lisp/language/misc-lang.el
@@ -1,5 +1,6 @@
-;;; misc-lang.el --- support for miscellaneous languages (characters)
+;;; misc-lang.el --- support for miscellaneous languages (characters) -*- lexical-binding: t; -*-
+;; Copyright (C) 2012-2022 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; National Institute of Advanced Industrial Science and Technology (AIST)
@@ -75,12 +76,198 @@ and Italian.")))
(sample-text . "Persian فارسی")
(documentation . "Bidirectional editing is supported.")))
+(defcustom arabic-shaper-ZWNJ-handling nil
+ "How to handle ZWMJ in Arabic text rendering.
+This variable controls the way to handle a glyph for ZWNJ
+returned by the underling shaping engine.
+
+The default value is nil, which means that the ZWNJ glyph is
+displayed as is.
+
+If the value is `absorb', ZWNJ is absorbed into the previous
+grapheme cluster, and not displayed.
+
+If the value is `as-space', the glyph is displayed by a
+thin (i.e. 1-dot width) space."
+ :group 'mule
+ :version "26.1"
+ :type '(choice
+ (const :tag "default" nil)
+ (const :tag "as space" as-space)
+ (const :tag "absorb" absorb))
+ :set (lambda (sym val)
+ (set-default sym val)
+ (clear-composition-cache)))
+
+;; Record error in arabic-change-gstring.
+(defvar arabic-shape-log nil)
+
+(defun arabic-shape-gstring (gstring direction)
+ (setq gstring (font-shape-gstring gstring direction))
+ (condition-case err
+ (when arabic-shaper-ZWNJ-handling
+ (let ((font (lgstring-font gstring))
+ (i 1)
+ (len (lgstring-glyph-len gstring))
+ (modified nil))
+ (while (< i len)
+ (let ((glyph (lgstring-glyph gstring i)))
+ (when (eq (lglyph-char glyph) #x200c)
+ (cond
+ ((eq arabic-shaper-ZWNJ-handling 'as-space)
+ (if (> (- (lglyph-rbearing glyph) (lglyph-lbearing glyph)) 0)
+ (let ((space-glyph (aref (font-get-glyphs font 0 1 " ") 0)))
+ (when space-glyph
+ (lglyph-set-code glyph (aref space-glyph 3))
+ (lglyph-set-width glyph (aref space-glyph 4)))))
+ (lglyph-set-adjustment glyph 0 0 1)
+ (setq modified t))
+ ((eq arabic-shaper-ZWNJ-handling 'absorb)
+ (let ((prev (lgstring-glyph gstring (1- i))))
+ (lglyph-set-from-to prev (lglyph-from prev) (lglyph-to glyph))
+ (setq gstring (lgstring-remove-glyph gstring i))
+ (setq len (1- len)))
+ (setq modified t)))))
+ (setq i (1+ i)))
+ (if modified
+ (lgstring-set-id gstring nil))))
+ (error (push err arabic-shape-log)))
+ gstring)
+
(set-char-table-range
composition-function-table
'(#x600 . #x74F)
- (list (vector "[\u0600-\u074F\u200C\u200D]+" 0 'font-shape-gstring)
- (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+"
- 1 'font-shape-gstring)))
+ (list (vector "[\u0600-\u074F\u200C\u200D]+"
+ 0 #'arabic-shape-gstring)))
+(set-char-table-range
+ composition-function-table
+ '(#x200C . #x200D)
+ (list (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+"
+ 0 #'arabic-shape-gstring)))
+
+;; The Egyptian Hieroglyph Format Controls were introduced in Unicode
+;; Standard v12.0. Apparently, they are not yet well supported in
+;; existing fonts, as of late 2020. But there's no reason for us not
+;; to be ready for when they will be!
+;; The below is needed to support the arrangement of the Egyptian
+;; Hieroglyphs in "quadrats", as directed by the format controls,
+;; which specify how the hieroglyphs should be joined horizontally and
+;; vertically.
+(defun egyptian-shape-grouping (gstring direction)
+ (if (= (lgstring-char gstring 0) #x13437)
+ (let ((nchars (lgstring-char-len gstring))
+ (i 1)
+ (nesting 1)
+ ch)
+ ;; Find where this group ends.
+ (while (and (< i nchars) (> nesting 0))
+ (setq ch (lgstring-char gstring i))
+ (cond
+ ((= ch #x13437)
+ (setq nesting (1+ nesting)))
+ ((= ch #x13438)
+ (setq nesting (1- nesting))))
+ (setq i (1+ i)))
+ (when (zerop nesting)
+ ;; Make a new gstring from the characters that constitute a
+ ;; complete nested group.
+ (let ((new-header (make-vector (1+ i) nil))
+ (new-gstring (make-vector (+ i 2) nil)))
+ (aset new-header 0 (lgstring-font gstring))
+ (dotimes (j i)
+ (aset new-header (1+ j) (lgstring-char gstring j))
+ (lgstring-set-glyph new-gstring j (lgstring-glyph gstring j)))
+ (lgstring-set-header new-gstring new-header)
+ (font-shape-gstring new-gstring direction))))))
+
+(let ((hieroglyph "[\U00013000-\U0001342F]"))
+ ;; HORIZONTAL/VERTICAL JOINER and INSERT AT.../OVERLAY controls
+ (set-char-table-range
+ composition-function-table
+ '(#x13430 . #x13436)
+ (list (vector (concat hieroglyph "[\U00013430-\U00013436]" hieroglyph)
+ ;; We use font-shape-gstring so that, if the font
+ ;; doesn't support these controls, the glyphs are
+ ;; displayed individually, and not as a single
+ ;; grapheme cluster.
+ 1 #'font-shape-gstring)))
+ ;; Grouping controls
+ (set-char-table-range
+ composition-function-table
+ #x13437
+ (list (vector "\U00013437[\U00013000-\U0001343F]+"
+ 0 #'egyptian-shape-grouping)))
+ ;; "Normal" hieroglyphs, for fonts that don't support the above
+ ;; controls, but do shape sequences of hieroglyphs without the
+ ;; controls.
+ ;; FIXME: As of late 2021, Egyptian Hieroglyph Format Controls are
+ ;; not yet supported in existing fonts and/or shaping engines, but
+ ;; some fonts do provide ligatures with which texts in Egyptian
+ ;; Hieroglyphs are correctly displayed. If and when these format
+ ;; controls are supported, as described in section 11.4 "Egyptian
+ ;; Hieroglyphs" of the Unicode Standard, the five lines below (which
+ ;; allow composition of hieroglyphs without formatting controls
+ ;; around) can be removed, and the entry in etc/HELLO can be
+ ;; restored to:
+ ;; Egyptian Hieroglyphs (𓂋𓐰𓏤𓈖𓆎𓅓𓏏𓐰𓊖) 𓅓𓊵𓐰𓐷𓏏𓊪𓐸, 𓇍𓇋𓂻𓍘𓇋
+ (set-char-table-range
+ composition-function-table
+ '(#x13000 . #x1342E)
+ (list (vector "[\U00013000-\U0001342E]+"
+ 0 #'font-shape-gstring))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Hanifi Rohingya
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(set-language-info-alist
+ "Hanifi Rohingya" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "hanifi-rohingya")
+ (sample-text . "Hanifi Rohingya (𐴌𐴟𐴇𐴥𐴝𐴚𐴒𐴙𐴝 𐴇𐴝𐴕𐴞𐴉𐴞 𐴓𐴠𐴑𐴤𐴝) 𐴀𐴝𐴏𐴓𐴝𐴀𐴡𐴤𐴛𐴝𐴓𐴝𐴙𐴑𐴟𐴔")
+ (documentation . "\
+Rohingya language and its script Hanifi Rohingya are supported
+in this language environment.")))
+
+;; Hanifi Rohingya composition rules
+(set-char-table-range
+ composition-function-table
+ '(#x10D1D . #x10D27)
+ (list (vector
+ "[\x10D00-\x10D27]+"
+ 1 'font-shape-gstring)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Kharoṣṭhī
+;; Author: Stefan Baums <baums@gandhari.org>
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(set-language-info-alist
+ "Kharoshthi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "kharoshthi")
+ (sample-text . "Kharoṣṭhī (𐨑𐨪𐨆𐨛𐨁) 𐨣𐨨𐨲𐨪𐨆 𐨐𐨪𐨅𐨨𐨁")
+ (documentation . "\
+Language environment for Gāndhārī, Sanskrit, and other languages
+using the Kharoṣṭhī script.")))
+
+(let ((consonant "[\U00010A00\U00010A10-\U00010A35]")
+ (vowel "[\U00010A01-\U00010A06]")
+ (virama "\U00010A3F")
+ (modifier "[\U00010A0C-\U00010A0F\U00010A38-\U00010A3A]"))
+ (set-char-table-range composition-function-table
+ '(#x10A3F . #x10A3F)
+ (list
+ (vector
+ (concat consonant
+ "\\(?:" virama consonant "\\)*"
+ modifier "*"
+ virama "?"
+ vowel "*"
+ modifier "*")
+ 1 'font-shape-gstring))))
(provide 'misc-lang)