summaryrefslogtreecommitdiff
path: root/lisp/language
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/language')
-rw-r--r--lisp/language/cyril-util.el2
-rw-r--r--lisp/language/greek.el4
-rw-r--r--lisp/language/hanja-util.el4
-rw-r--r--lisp/language/ind-util.el27
-rw-r--r--lisp/language/indian.el375
-rw-r--r--lisp/language/indonesian.el197
-rw-r--r--lisp/language/lao.el10
-rw-r--r--lisp/language/misc-lang.el22
-rw-r--r--lisp/language/philippine.el96
-rw-r--r--lisp/language/thai-util.el16
-rw-r--r--lisp/language/thai.el37
11 files changed, 759 insertions, 31 deletions
diff --git a/lisp/language/cyril-util.el b/lisp/language/cyril-util.el
index e06339cc625..5482b3ea306 100644
--- a/lisp/language/cyril-util.el
+++ b/lisp/language/cyril-util.el
@@ -60,7 +60,7 @@ If the argument is nil, we return the display table to its standard state."
(list
(let* ((completion-ignore-case t))
(completing-read
- "Cyrillic language (default nil): "
+ (format-prompt "Cyrillic language" "nil")
cyrillic-language-alist nil t nil nil nil))))
(or standard-display-table
diff --git a/lisp/language/greek.el b/lisp/language/greek.el
index 58f4fe6fc49..920cf67d871 100644
--- a/lisp/language/greek.el
+++ b/lisp/language/greek.el
@@ -79,7 +79,9 @@
(coding-priority greek-iso-8bit)
(nonascii-translation . iso-8859-7)
(input-method . "greek")
- (documentation . t)))
+ (documentation . "Support for Greek ISO-8859-7 using the greek input method.")
+ (sample-text . "Greek (ελληνικά) Γειά σας")
+ (tutorial . "TUTORIAL.el_GR")))
(provide 'greek)
diff --git a/lisp/language/hanja-util.el b/lisp/language/hanja-util.el
index 7aa3f024a33..0c2419c91cd 100644
--- a/lisp/language/hanja-util.el
+++ b/lisp/language/hanja-util.el
@@ -6573,8 +6573,8 @@ The value is a hanja character that is selected interactively."
(hanja-filter (lambda (x) (car x))
(mapcar (lambda (c)
(if (listp c)
- (cons (decode-char 'ucs (car c)) (cdr c))
- (list (decode-char 'ucs c))))
+ (cons (car c) (cdr c))
+ (list c)))
(aref hanja-table char)))))
(unwind-protect
(when (aref hanja-conversions 2)
diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 8b1c3d69ae5..60ada03fa25 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -273,6 +273,29 @@
(;; Misc Symbols
nil ?ஂ ?ஃ nil ?் nil nil)
(;; Digits
+ nil nil nil nil nil nil nil nil nil nil)
+ (;; Inscript-extra (4) (#, $, ^, *, ])
+ "்ர" "ர்" "த்ர" nil nil)))
+
+(defvar indian-tml-base-digits-table
+ '(
+ (;; VOWELS
+ (?அ nil) (?ஆ ?ா) (?இ ?ி) (?ஈ ?ீ) (?உ ?ு) (?ஊ ?ூ)
+ nil nil nil (?ஏ ?ே) (?எ ?ெ) (?ஐ ?ை)
+ nil (?ஓ ?ோ) (?ஒ ?ொ) (?ஔ ?ௌ) nil nil)
+ (;; CONSONANTS
+ ?க nil nil nil ?ங ;; GUTTRULS
+ ?ச nil ?ஜ nil ?ஞ ;; PALATALS
+ ?ட nil nil nil ?ண ;; CEREBRALS
+ ?த nil nil nil ?ந ?ன ;; DENTALS
+ ?ப nil nil nil ?ம ;; LABIALS
+ ?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS
+ nil ?ஷ ?ஸ ?ஹ ;; SIBILANTS
+ nil nil nil nil nil nil nil nil ;; NUKTAS
+ "ஜ்ஞ" "க்ஷ")
+ (;; Misc Symbols
+ nil ?ஂ ?ஃ nil ?் nil nil)
+ (;; Digits
?௦ ?௧ ?௨ ?௩ ?௪ ?௫ ?௬ ?௭ ?௮ ?௯)
(;; Inscript-extra (4) (#, $, ^, *, ])
"்ர" "ர்" "த்ர" nil nil)))
@@ -557,6 +580,10 @@
(defvar indian-tml-itrans-v5-hash
(indian-make-hash indian-tml-base-table
indian-itrans-v5-table-for-tamil))
+
+(defvar indian-tml-itrans-digits-v5-hash
+ (indian-make-hash indian-tml-base-digits-table
+ indian-itrans-v5-table-for-tamil))
)
(defmacro indian-translate-region (from to hashtable encode-p)
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
index e0adb0de6c3..9329b43fea3 100644
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -45,8 +45,9 @@
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "devanagari-aiba")
+ (sample-text . "Devanagari (देवनागरी) नमस्ते / नमस्कार")
(documentation . "\
-Such languages using Devanagari script as Hindi and Marathi
+Such languages using Devanagari script as Hindi, Marathi and Nepali
are supported in this language environment."))
'("Indian"))
@@ -55,16 +56,18 @@ are supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "bengali-itrans")
+ (sample-text . "Bengali (বাংলা) নমস্কার")
(documentation . "\
Such languages using Bengali script as Bengali and Assamese
are supported in this language environment."))
'("Indian"))
(set-language-info-alist
- "Punjabi" '((charset unicode)
+ "Gurmukhi" '((charset unicode)
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "punjabi-itrans")
+ (sample-text . "Gurmukhi (ਗੁਰਮੁਖੀ) ਸਤ ਸ੍ਰੀ ਅਕਾਲ")
(documentation . "\
North Indian language Punjabi is supported in this language environment."))
'("Indian"))
@@ -74,17 +77,31 @@ North Indian language Punjabi is supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "gujarati-itrans")
+ (sample-text . "Gujarati (ગુજરાતી) નમસ્તે")
(documentation . "\
North Indian language Gujarati is supported in this language environment."))
'("Indian"))
(set-language-info-alist
+ "Odia" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "odia")
+ (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର")
+ (documentation . "\
+Such languages using the Odia script as Odia, Khonti, and Santali
+are supported in this language environment. (This language
+environment was formerly known as \"Oriya\")."))
+ '("Indian"))
+
+(set-language-info-alist
"Oriya" '((charset unicode)
- (coding-system utf-8)
- (coding-priority utf-8)
- (input-method . "oriya-itrans")
- (documentation . "\
-Such languages using Oriya script as Oriya, Khonti, and Santali
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "odia")
+ (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର")
+ (documentation . "\
+Such languages using the Odia script as Odia, Khonti, and Santali
are supported in this language environment."))
'("Indian"))
@@ -93,6 +110,7 @@ are supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "tamil-itrans")
+ (sample-text . "Tamil (தமிழ்) வணக்கம்")
(documentation . "\
South Indian Language Tamil is supported in this language environment."))
'("Indian"))
@@ -102,6 +120,7 @@ South Indian Language Tamil is supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "telugu-itrans")
+ (sample-text . "Telugu (తెలుగు) నమస్కారం")
(documentation . "\
South Indian Language Telugu is supported in this language environment."))
'("Indian"))
@@ -113,7 +132,7 @@ South Indian Language Telugu is supported in this language environment."))
(input-method . "kannada-itrans")
(sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ")
(documentation . "\
-Kannada language and script is supported in this language
+Kannada language and script are supported in this language
environment."))
'("Indian"))
@@ -122,10 +141,109 @@ environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "malayalam-itrans")
+ (sample-text . "Malayalam (മലയാളം) നമസ്കാരം")
(documentation . "\
South Indian language Malayalam is supported in this language environment."))
'("Indian"))
+(set-language-info-alist
+ "Brahmi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "brahmi")
+ (sample-text . "Brahmi (𑀩𑁆𑀭𑀸𑀳𑁆𑀫𑀻) 𑀦𑀫𑀲𑁆𑀢𑁂")
+ (documentation . "\
+The ancient Brahmi script is supported in this language environment."))
+ '("Indian")) ; Should we have an "Old" category?
+
+(set-language-info-alist
+ "Kaithi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "kaithi")
+ (sample-text . "Kaithi (𑂍𑂶𑂟𑂲) 𑂩𑂰𑂧𑂩𑂰𑂧")
+ (documentation . "\
+Languages such as Awadhi, Bhojpuri, Magahi and Maithili
+which used the Kaithi script are supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Tirhuta" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "tirhuta")
+ (sample-text . "Tirhuta (𑒞𑒱𑒩𑒯𑒳𑒞𑒰) 𑒣𑓂𑒩𑒢𑒰𑒧 / 𑒮𑒲𑒞𑒰𑒩𑒰𑒧")
+ (documentation . "\
+Maithili language and its script Tirhuta are supported in this
+language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Sharada" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "sharada")
+ (sample-text . "Sharada (𑆯𑆳𑆫𑆢𑆳) 𑆤𑆩𑆱𑇀𑆑𑆳𑆫")
+ (documentation . "\
+Kashmiri language and its script Sharada are supported in this
+language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Siddham" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "siddham")
+ (sample-text . "Siddham (𑖭𑖰𑖟𑖿𑖠𑖽) 𑖡𑖦𑖭𑖿𑖝𑖸")
+ (documentation . "\
+Sanskrit language and one of its script Siddham are supported
+in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Syloti Nagri" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "syloti-nagri")
+ (sample-text . "Syloti Nagri (ꠍꠤꠟꠐꠤ ꠘꠣꠉꠞꠤ) ꠀꠌ꠆ꠍꠣꠟꠣꠝꠥ ꠀꠟꠣꠁꠇꠥꠝ / ꠘꠝꠡ꠆ꠇꠣꠞ")
+ (documentation . "\
+Sylheti language and its script Syloti Nagri are supported
+in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Modi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "modi")
+ (sample-text . "Modi (𑘦𑘻𑘚𑘲) 𑘡𑘦𑘭𑘿𑘎𑘰𑘨")
+ (documentation . "\
+Marathi language and one of its script Modi are supported
+in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Limbu" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "limbu")
+ (sample-text . "Limbu (ᤕᤠᤰᤌᤢᤱ ᤐᤠᤴ) ᤛᤣᤘᤠᤖᤥ")
+ (documentation . "\
+Limbu language and its script are supported in this
+language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Grantha" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "grantha")
+ (sample-text . "Grantha (𑌗𑍍𑌰𑌨𑍍𑌥) 𑌨𑌮𑌸𑍍𑌤𑍇 / 𑌨𑌮𑌸𑍍𑌕𑌾𑌰𑌃")
+ (documentation . "\
+Languages such as Sanskrit and Manipravalam, when they use the
+Grantha script, are supported in this language environment."))
+ '("Indian"))
+
;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
@@ -147,6 +265,8 @@ South Indian language Malayalam is supported in this language environment."))
("H" . "\u094D") ; HALANT
("s" . "[\u0951\u0952]") ; stress sign
("t" . "[\u0953\u0954]") ; accent
+ ("1" . "\u0967") ; numeral 1
+ ("3" . "\u0969") ; numeral 3
("N" . "\u200C") ; ZWNJ
("J" . "\u200D") ; ZWJ
("X" . "[\u0900-\u097F]")))) ; all coverage
@@ -158,6 +278,8 @@ South Indian language Malayalam is supported in this language environment."))
"Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
;; special consonant form, or
"JHR\\|"
+ ;; vedic accents with numerals, or
+ "1ss?\\|3ss\\|s3ss\\|"
;; any other singleton characters
"X")
table))
@@ -168,14 +290,15 @@ South Indian language Malayalam is supported in this language environment."))
'(("a" . "\u0981") ; SIGN CANDRABINDU
("A" . "[\u0982\u0983]") ; SIGN ANUSVARA .. VISARGA
("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel
- ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
+ ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F0\u09F1]") ; consonant
("B" . "[\u09AC\u09AF\u09B0\u09F0]") ; BA, YA, RA
("R" . "[\u09B0\u09F0]") ; RA
("n" . "\u09BC") ; NUKTA
("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign
("H" . "\u09CD") ; HALANT
("T" . "\u09CE") ; KHANDA TA
- ("N" . "\u200C") ; ZWNJ
+ ("S" . "\u09FE") ; SANDHI MARK
+ ("N" . "\u200C") ; ZWNJ
("J" . "\u200D") ; ZWJ
("X" . "[\u0980-\u09FF]")))) ; all coverage
(indian-compose-regexp
@@ -183,7 +306,7 @@ South Indian language Malayalam is supported in this language environment."))
;; syllables with an independent vowel, or
"\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
;; consonant-based syllables, or
- "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
+ "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?S?\\)\\|"
;; another syllables with an independent vowel, or
"\\(?:RH\\)?T\\|"
;; special consonant form, or
@@ -250,7 +373,7 @@ South Indian language Malayalam is supported in this language environment."))
'(("a" . "\u0B01") ; SIGN CANDRABINDU
("A" . "[\u0B02\u0B03]") ; SIGN ANUSVARA .. VISARGA
("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel
- ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]") ; consonant
+ ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B5F\u0B71]") ; consonant
("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form
("R" . "\u0B30") ; RA
("n" . "\u0B3C") ; NUKTA
@@ -384,6 +507,232 @@ South Indian language Malayalam is supported in this language environment."))
(list (vector (cdr slot) 0 #'font-shape-gstring))))))
char-script-table))
-(provide 'indian)
+;; Brahmi composition rules
+(let ((consonant "[\U00011013-\U00011034]")
+ (non-consonant "[^\U00011013-\U00011034\U00011046\U0001107F]")
+ (vowel "[\U00011038-\U00011045]")
+ (numeral "[\U00011052-\U00011065]")
+ (multiplier "[\U00011064\U00011065]")
+ (virama "\U00011046")
+ (number-joiner "\U0001107F"))
+ (set-char-table-range composition-function-table
+ '(#x11046 . #x11046)
+ (list (vector
+ ;; Consonant conjuncts
+ (concat consonant "\\(?:" virama consonant "\\)+"
+ vowel "?")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowelless consonants
+ (concat consonant virama non-consonant)
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x1107F . #x1107F)
+ (list (vector
+ ;; Additive-multiplicative numerals
+ (concat multiplier number-joiner numeral)
+ 1 'font-shape-gstring))))
+
+;; Kaithi composition rules
+(let ((consonant "[\x1108D-\x110AF]")
+ (nukta "\x110BA")
+ (independent-vowel "[\x11083-\x1108C]")
+ (vowel "[\x1108D-\x110C2]")
+ (nasal "[\x11080\x11081]")
+ (virama "\x110B9")
+ (number-sign "\x110BD")
+ (number-sign-above "\x110CD")
+ (numerals "[\x966-\x96F]+")
+ (zwj "\x200D"))
+ (set-char-table-range composition-function-table
+ '(#x110B0 . #x110BA)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?\\(?:" virama zwj "?" consonant
+ nukta "?\\)*\\(?:" virama zwj "?\\|" vowel "*" nukta
+ "?" nasal "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowel based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x110BD . #x110BD)
+ (list (vector
+ ;; Number sign
+ (concat number-sign numerals)
+ 0 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x110CD . #x110CD)
+ (list (vector
+ ;; Number sign above
+ (concat number-sign-above numerals)
+ 0 'font-shape-gstring))))
+
+;; Tirhuta composition rules
+(let ((consonant "[\x1148F-\x114AF]")
+ (nukta "\x114C3")
+ (independent-vowel "[\x11481-\x1148E]")
+ (vowel "[\x114B0-\x114BE]")
+ (nasal "[\x114BF\x114C0]")
+ (virama "\x114C2"))
+ (set-char-table-range composition-function-table
+ '(#x114B0 . #x114C3)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?\\(?:" virama consonant nukta
+ "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?"
+ nasal "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowel based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?" nasal "?")
+ 1 'font-shape-gstring))))
+
+;; Sharada composition rules
+(let ((consonant "[\x11191-\x111B2]")
+ (nukta "\x111CA")
+ (independent-vowel "[\x11183-\x11190]")
+ (vowel "[\x111B3-\x111BF\x111CE]")
+ (vowel-modifier "\x111CB")
+ (extra-short-vowel-mark "\x111CC")
+ (nasal "[\x11181\x11180\x111CF]")
+ (virama "\x111C0")
+ (fricatives "[\x111C2\x111C3]")
+ (sandhi-mark "\x111C9")
+ (misc "[\x111C4-\x111C8\x111CD]"))
+ (set-char-table-range composition-function-table
+ '(#x111B3 . #x111CE)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?" vowel-modifier "?\\(?:" virama
+ consonant nukta "?" vowel-modifier "?\\)*\\(?:" virama
+ "\\|" vowel "*" nukta "?" nasal "?" extra-short-vowel-mark
+ "?" vowel-modifier "?" sandhi-mark "?+" misc "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowel based syllables
+ (concat independent-vowel nukta "?" vowel-modifier "?" virama "?"
+ vowel "?" extra-short-vowel-mark "?" sandhi-mark "?"
+ fricatives "?" misc "?")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Fricatives with Consonants
+ (concat fricatives "?" consonant vowel "?")
+ 0 'font-shape-gstring))))
+
+;; Siddham composition rules
+(let ((consonant "[\x1158E-\x115AE]")
+ (nukta "\x115C0")
+ (independent-vowel "[\x11580-\x1158D\x115D8-\x115DB]")
+ (vowel "[\x115AF-\x115BB\x115DC\x115DD]")
+ (nasal "[\x115BC\x115BD]")
+ (visarga "\x115BE")
+ (virama "\x115BF"))
+ (set-char-table-range composition-function-table
+ '(#x115AF . #x115C0)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?" "\\(?:" virama consonant nukta
+ "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" nasal
+ "?" visarga "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?"
+ nasal "?" visarga "?")
+ 1 'font-shape-gstring))))
+
+;; Syloti Nagri composition rules
+(let ((consonant "[\xA807-\xA80A\xA80C-\xA822]")
+ (vowel "[\xA802\xA823-\xA827]")
+ (nasal "[\xA80B]")
+ (virama "\xA806")
+ (alternate-virama "\xA82C"))
+ (set-char-table-range composition-function-table
+ '(#xA806 . #xA806)
+ (list (vector
+ ;; Consonant conjunct based syllables
+ (concat consonant "\\(?:" virama consonant "\\)+"
+ vowel "?" nasal "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#xA823 . #xA827)
+ (list (vector
+ ;; Non Consonant conjunct based syllables
+ (concat consonant vowel nasal "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#xA82C . #xA82C)
+ (list (vector
+ ;; Consonant with the alternate virama
+ (concat consonant "\\(?:" alternate-virama consonant "\\)+"
+ vowel "?" nasal "?")
+ 1 'font-shape-gstring))))
+
+;; Modi composition rules
+(let ((consonant "[\x1160E-\x1162F]")
+ (independent-vowel "[\x11600-\x1160D]")
+ (vowel "[\x11630-\x1163C]")
+ (nasal "\x1163D")
+ (visarga "\x1163E")
+ (virama "\x1163F")
+ (ardhacandra "\x11640"))
+ (set-char-table-range composition-function-table
+ '(#x11630 . #x11640)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant "\\(?:" virama consonant "\\)*\\(?:"
+ virama "\\|" vowel "*" ardhacandra "?" nasal
+ "?" visarga "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel virama "?" vowel "?" ardhacandra
+ nasal "?" visarga "?")
+ 1 'font-shape-gstring))))
+
+;; Limbu composition rules
+(let ((consonant "[\x1900-\x191E]")
+ (vowel "[\x1920-\x1928]")
+ (subjoined-letter "[\x1929-\x192B]")
+ (small-letter "[\x1930-\x1938]")
+ (other-signs "[\x1939\x193A]")
+ (sa-i "\x193B"))
+ (set-char-table-range composition-function-table
+ '(#x1920 . #x193B)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant sa-i "?" subjoined-letter "?" small-letter
+ "?" vowel "?" other-signs "?")
+ 1 'font-shape-gstring))))
+
+;; Grantha composition rules
+(let ((consonant "[\x11315-\x11339]")
+ (nukta "\x1133C")
+ (independent-vowel "[\x11305-\x11314\x11360\x11361]")
+ (vowel "[\x1133E-\x1134C\x11357\x11362\x11363]")
+ (nasal "[\x11300-\x11302]")
+ (bindu "\x1133B")
+ (visarga "\x11303")
+ (virama "\x1134D")
+ (avagraha "\x1133D")
+ (modifier-above "[\x11366-\x11374]"))
+ (set-char-table-range composition-function-table
+ '(#x1133B . #x1134D)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?" "\\(?:" virama consonant nukta
+ "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" nasal
+ "?" bindu "?" visarga "?" modifier-above "?"
+ avagraha "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?"
+ nasal "?" bindu "?" visarga "?" modifier-above
+ "?" avagraha "?")
+ 1 'font-shape-gstring))))
+(provide 'indian)
;;; indian.el ends here
diff --git a/lisp/language/indonesian.el b/lisp/language/indonesian.el
new file mode 100644
index 00000000000..699f8192543
--- /dev/null
+++ b/lisp/language/indonesian.el
@@ -0,0 +1,197 @@
+;;; indonesian.el --- Indonesian languages support -*- coding: utf-8; lexical-binding: t; -*-
+
+;; Copyright (C) 2022 Free Software Foundation, Inc.
+
+;; Author: समीर सिंह Sameer Singh <lumarzeli30@gmail.com>
+;; Keywords: multilingual, input method, i18n, Indonesia
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This file contains definitions of Indonesia language environments, and
+;; setups for displaying the scripts used there.
+
+;;; Code:
+
+(set-language-info-alist
+ "Balinese" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "balinese")
+ (sample-text . "Balinese (ᬅᬓ᭄ᬱᬭᬩᬮᬶ) ᬒᬁᬲ᭄ᬯᬲ᭄ᬢ᭄ᬬᬲ᭄ᬢᬸ")
+ (documentation . "\
+Balinese language and its script are supported in this language environment.")))
+
+(set-language-info-alist
+ "Javanese" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "javanese")
+ (sample-text . "Javanese (ꦲꦏ꧀ꦱꦫꦗꦮ) ꦲꦭꦺꦴ")
+ (documentation . "\
+Javanese language and its script are supported in this language environment.")))
+
+(set-language-info-alist
+ "Sundanese" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "sundanese")
+ (sample-text . "Sundanese (ᮃᮊ᮪ᮞᮛᮞᮥᮔ᮪ᮓ) ᮞᮙ᮪ᮕᮥᮛᮞᮥᮔ᮪")
+ (documentation . "\
+Sundanese language and its script are supported in this language environment.")))
+
+(set-language-info-alist
+ "Batak" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "batak")
+ (sample-text . "Batak (ᯘᯮᯒᯗ᯲ᯅᯗᯂ᯲) ᯂᯬᯒᯘ᯲ / ᯔᯧᯐᯬᯀᯱᯐᯬᯀᯱ")
+ (documentation . "\
+Languages that use the Batak script, such as Karo, Toba, Pakpak, Mandailing
+and Simalungun, are supported in this language environment.")))
+
+(set-language-info-alist
+ "Rejang" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "rejang")
+ (sample-text . "Rejang (ꥆꤰ꥓ꤼꤽ ꤽꥍꤺꥏ) ꤸꥉꥐꤺꥉꥂꥎ")
+ (documentation . "\
+Rejang language and its script are supported in this language environment.")))
+
+(set-language-info-alist
+ "Makasar" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "makasar")
+ (sample-text . "Makasar (𑻪𑻢𑻪𑻢) 𑻦𑻤𑻵𑻱")
+ (documentation . "\
+Makassarese language and its script Makasar are supported in this language environment.")))
+
+(set-language-info-alist
+ "Buginese" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "lontara")
+ (sample-text . "Buginese (ᨒᨚᨈᨑ) ᨖᨒᨚ")
+ (documentation . "\
+Buginese language and its script Lontara are supported in this language environment.")))
+
+;; Balinese composition rules
+(let ((consonant "[\x1B13-\x1B33\x1B45-\x1B4B]")
+ (independent-vowel "[\x1B05-\x1B12]")
+ (rerekan "\x1B34")
+ (vowel "[\x1B35-\x1B43]")
+ (modifier-above "[\x1B00-\x1B04]")
+ (adeg-adeg "\x1B44")
+ (musical-symbol "[\x1B6B-\x1B73]"))
+ (set-char-table-range composition-function-table
+ '(#x1B34 . #x1B44)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant rerekan "?\\(?:" adeg-adeg consonant
+ rerekan "?\\)*\\(?:" adeg-adeg "\\|" vowel "*" rerekan
+ "?" modifier-above "?" musical-symbol "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel rerekan "?" adeg-adeg "?"
+ vowel "?" modifier-above "?" musical-symbol "?")
+ 1 'font-shape-gstring))))
+
+;; Javanese composition rules
+(let ((consonant "[\xA98F-\xA9B2]")
+ (independent-vowel "[\xA984-\xA98E]")
+ (telu "\xA9B3")
+ (vowel "[\xA9B4-\xA9BC]")
+ (dependant-consonant "[\xA9BD-\xA9BF]")
+ (modifier-above "[\xA980-\xA983]")
+ (pangkon "\xA9C0"))
+ (set-char-table-range composition-function-table
+ '(#xA9B3 . #xA9C0)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant telu "?\\(?:" pangkon consonant
+ telu "?\\)*\\(?:" pangkon "\\|" vowel "*" telu
+ "?" modifier-above "?" dependant-consonant "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel telu "?" pangkon "?"
+ vowel "?" modifier-above "?" dependant-consonant "?")
+ 1 'font-shape-gstring))))
+
+;; Sundanese composition rules
+(let ((consonant "[\x1B8A-\x1BA0\x1BAE\x1BAF\x1BBB-\x1BBF]")
+ (independent-vowel "[\x1B83-\x1B89]")
+ (vowel "[\x1BA4-\x1BA9]")
+ (dependant-consonant "[\x1BA1-\x1BA3\x1BAC-\x1BAD]")
+ (modifier-above "[\x1B80-\x1B82]")
+ (virama "[\x1BAA\x1BAB]"))
+ (set-char-table-range composition-function-table
+ '(#x1BA1 . #x1BAD)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant "\\(?:" virama consonant
+ "\\)*\\(?:" virama "\\|" vowel "*"
+ modifier-above "?" dependant-consonant "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel virama "?"
+ vowel "?" modifier-above "?" dependant-consonant "?")
+ 1 'font-shape-gstring))))
+
+;; Batak composition rules
+(let ((akshara "[\x1BC0-\x1BE5]")
+ (vowel "[\x1BE7-\x1BEF]")
+ (dependant-consonant "[\x1BF0\x1BF1]")
+ (modifier-above "\x1BE6")
+ (virama "[\x1BF2\x1BF3]"))
+ (set-char-table-range composition-function-table
+ '(#x1BE6 . #x1BF3)
+ (list (vector
+ ;; Akshara based syllables
+ (concat akshara virama "?" vowel "*" modifier-above
+ "?" dependant-consonant "?")
+ 1 'font-shape-gstring))))
+
+;; Rejang composition rules
+(let ((akshara "[\xA930-\xA946]")
+ (vowel "[\xA947-\xA94E]")
+ (dependant-consonant "[\xA94F\xA952]")
+ (virama "\xA953"))
+ (set-char-table-range composition-function-table
+ '(#xA947 . #xA953)
+ (list (vector
+ ;; Akshara based syllables
+ (concat akshara virama "?" vowel "*"
+ dependant-consonant "?")
+ 1 'font-shape-gstring))))
+
+;; Makasar composition rules
+(let ((akshara "[\x11EE0-\x11EF2]")
+ (vowel "[\x11EF3-\x11EF6]"))
+ (set-char-table-range composition-function-table
+ '(#x11EF3 . #x11EF6)
+ (list (vector
+ ;; Akshara based syllables
+ (concat akshara vowel "*")
+ 1 'font-shape-gstring))))
+
+(provide 'indonesian)
+;;; indonesian.el ends here
diff --git a/lisp/language/lao.el b/lisp/language/lao.el
index 5c545df4840..1861eff15eb 100644
--- a/lisp/language/lao.el
+++ b/lisp/language/lao.el
@@ -59,11 +59,11 @@
(let* ((chars (car l))
(len (length chars))
;; Replace `c', `t', `v' to consonant, tone, and vowel.
- (regexp (mapconcat #'(lambda (c)
- (cond ((= c ?c) consonant)
- ((= c ?t) tone)
- ((= c ?v) vowel-upper-lower)
- (t (string c))))
+ (regexp (mapconcat (lambda (c)
+ (cond ((= c ?c) consonant)
+ ((= c ?t) tone)
+ ((= c ?v) vowel-upper-lower)
+ (t (string c))))
(cdr l) ""))
;; Element of composition-function-table.
(elt (list (vector regexp 1 #'lao-composition-function)
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
index c8a4821abf7..46429a4380d 100644
--- a/lisp/language/misc-lang.el
+++ b/lisp/language/misc-lang.el
@@ -212,6 +212,28 @@ thin (i.e. 1-dot width) space."
(list (vector "[\U00013000-\U0001342E]+"
0 #'font-shape-gstring))))
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Hanifi Rohingya
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(set-language-info-alist
+ "Hanifi Rohingya" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "hanifi-rohingya")
+ (sample-text . "Hanifi Rohingya (𐴌𐴟𐴇𐴥𐴝𐴚𐴒𐴙𐴝 𐴇𐴝𐴕𐴞𐴉𐴞 𐴓𐴠𐴑𐴤𐴝) 𐴀𐴝𐴏𐴓𐴝𐴀𐴡𐴤𐴛𐴝𐴓𐴝𐴙𐴑𐴟𐴔")
+ (documentation . "\
+Rohingya language and its script Hanifi Rohingya are supported
+in this language environment.")))
+
+;; Hanifi Rohingya composition rules
+(set-char-table-range
+ composition-function-table
+ '(#x10D1D . #x10D27)
+ (list (vector
+ "[\x10D00-\x10D27]+"
+ 1 'font-shape-gstring)))
+
(provide 'misc-lang)
;;; misc-lang.el ends here
diff --git a/lisp/language/philippine.el b/lisp/language/philippine.el
new file mode 100644
index 00000000000..e52ad6912cd
--- /dev/null
+++ b/lisp/language/philippine.el
@@ -0,0 +1,96 @@
+;;; philippine.el --- Philippine languages support -*- coding: utf-8; lexical-binding: t; -*-
+
+;; Copyright (C) 2022 Free Software Foundation, Inc.
+
+;; Author: समीर सिंह Sameer Singh <lumarzeli30@gmail.com>
+;; Keywords: multilingual, input method, i18n, Philippines
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This file contains definitions of Philippine language environments, and
+;; setups for displaying the scripts used there.
+
+;;; Code:
+
+(set-language-info-alist
+ "Tagalog" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "tagalog")
+ (sample-text . "Tagalog (ᜊᜌ᜔ᜊᜌᜒᜈ᜔) ᜃᜓᜋᜓᜐ᜔ᜆ")
+ (documentation . "\
+Tagalog language using the Baybayin script is supported in
+this language environment.")))
+
+(set-language-info-alist
+ "Hanunoo" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "hanunoo")
+ (sample-text . "Hanunoo (ᜱᜨᜳᜨᜳᜢ) ᜫᜬᜧ᜴ ᜣᜭᜯᜥ᜴ ᜰᜲᜭᜥ᜴")
+ (documentation . "\
+Philippine Language Hanunoo is supported in this language environment.")))
+
+(set-language-info-alist
+ "Buhid" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "buhid")
+ (documentation . "\
+Philippine Language Buhid is supported in this language environment.")))
+
+(set-language-info-alist
+ "Tagbanwa" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "tagbanwa")
+ (sample-text . "Tagbanwa (ᝦᝪᝯ) ᝫᝩᝬᝥ ᝣᝮᝧᝯ")
+ (documentation . "\
+Philippine Languages Tagbanwa are supported in this language environment.")))
+
+;; Tagalog composition rules
+(let ((akshara "[\x1700-\x1711\x171F]")
+ (vowel "[\x1712\x1713]")
+ (virama "\x1714")
+ (pamudpod "\x1715"))
+ (set-char-table-range composition-function-table
+ '(#x1714 . #x1714)
+ (list (vector
+ ;; Akshara virama syllables
+ (concat akshara virama vowel "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x1715 . #x1715)
+ (list (vector
+ ;; Akshara pamudpod syllables
+ (concat akshara pamudpod vowel "?")
+ 1 'font-shape-gstring))))
+
+;; Hanunoo composition rules
+(let ((akshara "[\x1720-\x1731]")
+ (vowel "[\x1732\x1733]")
+ (pamudpod "\x1734"))
+ (set-char-table-range composition-function-table
+ '(#x1734 . #x1734)
+ (list (vector
+ ;; Akshara pamudpod syllables
+ (concat akshara pamudpod vowel "?")
+ 1 'font-shape-gstring))))
+
+(provide 'philippine)
+;;; philippine.el ends here
diff --git a/lisp/language/thai-util.el b/lisp/language/thai-util.el
index d11daf0f839..6c004e9495c 100644
--- a/lisp/language/thai-util.el
+++ b/lisp/language/thai-util.el
@@ -244,15 +244,13 @@ positions (integers or markers) specifying the region."
;; Thai-word-mode requires functions in the feature `thai-word'.
(require 'thai-word)
-(defvar thai-word-mode-map
- (let ((map (make-sparse-keymap)))
- (define-key map [remap forward-word] 'thai-forward-word)
- (define-key map [remap backward-word] 'thai-backward-word)
- (define-key map [remap kill-word] 'thai-kill-word)
- (define-key map [remap backward-kill-word] 'thai-backward-kill-word)
- (define-key map [remap transpose-words] 'thai-transpose-words)
- map)
- "Keymap for `thai-word-mode'.")
+(defvar-keymap thai-word-mode-map
+ :doc "Keymap for `thai-word-mode'."
+ "<remap> <forward-word>" #'thai-forward-word
+ "<remap> <backward-word>" #'thai-backward-word
+ "<remap> <kill-word>" #'thai-kill-word
+ "<remap> <backward-kill-word>" #'thai-backward-kill-word
+ "<remap> <transpose-words>" #'thai-transpose-words)
(define-minor-mode thai-word-mode
"Minor mode to make word-oriented commands aware of Thai words.
diff --git a/lisp/language/thai.el b/lisp/language/thai.el
index 6a6289a44c7..60f5f9d2a38 100644
--- a/lisp/language/thai.el
+++ b/lisp/language/thai.el
@@ -82,6 +82,43 @@ This is the same as `thai-tis620' with the addition of no-break-space."
(aset composition-function-table (aref chars i) elt)))
(aset composition-function-table ?ำ '(["[ก-ฯ]." 1 thai-composition-function]))
+;; Tai-Tham
+
+(set-language-info-alist
+ "Northern Thai" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (sample-text .
+ "Northern Thai (ᨣᩣᩴᨾᩮᩬᩥᨦ / ᨽᩣᩈᩣᩃ᩶ᩣ᩠ᨶᨶᩣ) ᩈ᩠ᩅᩢᩔ᩠ᨯᩦᨣᩕᩢ᩠ᨸ")
+ (documentation . t)))
+
+;; From Richard Wordingham <richard.wordingham@ntlworld.com>:
+(defvar tai-tham-composable-pattern
+ (let ((table
+ ;; C is letters, independent vowels, digits, punctuation and symbols.
+ '(("C" . "[\u1A20-\u1A54\u1A80-\u1A89\u1A90-\u1A99\u1AA0-\u1AAD]")
+ ("M" . ; Marks, CGJ, ZWNJ, ZWJ
+ "[\u0324\u034F\u0E49\u0E4A\u0E4B\u1A55-\u1A57\u1A59-\u1A5E\u1A61-\u1A7C\u1A7F\u200C\200D]")
+ ("H" . "\u1A60") ; Sakot
+ ("S" . ; Marks commuting with sakot
+ "[\u0E49-\u0E4B\u0EC9\u0ECB\u1A75-\u1A7C]")
+ ("N" . "\u1A58"))) ; mai kang lai
+ (basic-syllable "C\\(N*\\(M\\|HS*C\\)\\)*")
+ (regexp "X\\(N\\(X\\)?\\)*H?")) ; where X is basic syllable
+ (let ((case-fold-search nil))
+ (setq regexp (replace-regexp-in-string "X" basic-syllable regexp t t))
+ (dolist (elt table)
+ (setq regexp (replace-regexp-in-string (car elt) (cdr elt)
+ regexp t t))))
+ regexp))
+
+(let ((elt (list (vector tai-tham-composable-pattern 0 'font-shape-gstring)
+ )))
+ (set-char-table-range composition-function-table '(#x1A20 . #x1A54) elt)
+ (set-char-table-range composition-function-table '(#x1A80 . #x1A89) elt)
+ (set-char-table-range composition-function-table '(#x1A90 . #x1A99) elt)
+ (set-char-table-range composition-function-table '(#x1AA0 . #x1AAD) elt))
+
(provide 'thai)
;;; thai.el ends here