diff options
Diffstat (limited to 'lisp/language')
-rw-r--r-- | lisp/language/cyril-util.el | 2 | ||||
-rw-r--r-- | lisp/language/greek.el | 4 | ||||
-rw-r--r-- | lisp/language/hanja-util.el | 4 | ||||
-rw-r--r-- | lisp/language/ind-util.el | 27 | ||||
-rw-r--r-- | lisp/language/indian.el | 375 | ||||
-rw-r--r-- | lisp/language/indonesian.el | 197 | ||||
-rw-r--r-- | lisp/language/lao.el | 10 | ||||
-rw-r--r-- | lisp/language/misc-lang.el | 22 | ||||
-rw-r--r-- | lisp/language/philippine.el | 96 | ||||
-rw-r--r-- | lisp/language/thai-util.el | 16 | ||||
-rw-r--r-- | lisp/language/thai.el | 37 |
11 files changed, 759 insertions, 31 deletions
diff --git a/lisp/language/cyril-util.el b/lisp/language/cyril-util.el index e06339cc625..5482b3ea306 100644 --- a/lisp/language/cyril-util.el +++ b/lisp/language/cyril-util.el @@ -60,7 +60,7 @@ If the argument is nil, we return the display table to its standard state." (list (let* ((completion-ignore-case t)) (completing-read - "Cyrillic language (default nil): " + (format-prompt "Cyrillic language" "nil") cyrillic-language-alist nil t nil nil nil)))) (or standard-display-table diff --git a/lisp/language/greek.el b/lisp/language/greek.el index 58f4fe6fc49..920cf67d871 100644 --- a/lisp/language/greek.el +++ b/lisp/language/greek.el @@ -79,7 +79,9 @@ (coding-priority greek-iso-8bit) (nonascii-translation . iso-8859-7) (input-method . "greek") - (documentation . t))) + (documentation . "Support for Greek ISO-8859-7 using the greek input method.") + (sample-text . "Greek (ελληνικά) Γειά σας") + (tutorial . "TUTORIAL.el_GR"))) (provide 'greek) diff --git a/lisp/language/hanja-util.el b/lisp/language/hanja-util.el index 7aa3f024a33..0c2419c91cd 100644 --- a/lisp/language/hanja-util.el +++ b/lisp/language/hanja-util.el @@ -6573,8 +6573,8 @@ The value is a hanja character that is selected interactively." (hanja-filter (lambda (x) (car x)) (mapcar (lambda (c) (if (listp c) - (cons (decode-char 'ucs (car c)) (cdr c)) - (list (decode-char 'ucs c)))) + (cons (car c) (cdr c)) + (list c))) (aref hanja-table char))))) (unwind-protect (when (aref hanja-conversions 2) diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el index 8b1c3d69ae5..60ada03fa25 100644 --- a/lisp/language/ind-util.el +++ b/lisp/language/ind-util.el @@ -273,6 +273,29 @@ (;; Misc Symbols nil ?ஂ ?ஃ nil ?் nil nil) (;; Digits + nil nil nil nil nil nil nil nil nil nil) + (;; Inscript-extra (4) (#, $, ^, *, ]) + "்ர" "ர்" "த்ர" nil nil))) + +(defvar indian-tml-base-digits-table + '( + (;; VOWELS + (?அ nil) (?ஆ ?ா) (?இ ?ி) (?ஈ ?ீ) (?உ ?ு) (?ஊ ?ூ) + nil nil nil (?ஏ ?ே) (?எ ?ெ) (?ஐ ?ை) + nil (?ஓ ?ோ) (?ஒ ?ொ) (?ஔ ?ௌ) nil nil) + (;; CONSONANTS + ?க nil nil nil ?ங ;; GUTTRULS + ?ச nil ?ஜ nil ?ஞ ;; PALATALS + ?ட nil nil nil ?ண ;; CEREBRALS + ?த nil nil nil ?ந ?ன ;; DENTALS + ?ப nil nil nil ?ம ;; LABIALS + ?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS + nil ?ஷ ?ஸ ?ஹ ;; SIBILANTS + nil nil nil nil nil nil nil nil ;; NUKTAS + "ஜ்ஞ" "க்ஷ") + (;; Misc Symbols + nil ?ஂ ?ஃ nil ?் nil nil) + (;; Digits ?௦ ?௧ ?௨ ?௩ ?௪ ?௫ ?௬ ?௭ ?௮ ?௯) (;; Inscript-extra (4) (#, $, ^, *, ]) "்ர" "ர்" "த்ர" nil nil))) @@ -557,6 +580,10 @@ (defvar indian-tml-itrans-v5-hash (indian-make-hash indian-tml-base-table indian-itrans-v5-table-for-tamil)) + +(defvar indian-tml-itrans-digits-v5-hash + (indian-make-hash indian-tml-base-digits-table + indian-itrans-v5-table-for-tamil)) ) (defmacro indian-translate-region (from to hashtable encode-p) diff --git a/lisp/language/indian.el b/lisp/language/indian.el index e0adb0de6c3..9329b43fea3 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@ -45,8 +45,9 @@ (coding-system utf-8) (coding-priority utf-8) (input-method . "devanagari-aiba") + (sample-text . "Devanagari (देवनागरी) नमस्ते / नमस्कार") (documentation . "\ -Such languages using Devanagari script as Hindi and Marathi +Such languages using Devanagari script as Hindi, Marathi and Nepali are supported in this language environment.")) '("Indian")) @@ -55,16 +56,18 @@ are supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "bengali-itrans") + (sample-text . "Bengali (বাংলা) নমস্কার") (documentation . "\ Such languages using Bengali script as Bengali and Assamese are supported in this language environment.")) '("Indian")) (set-language-info-alist - "Punjabi" '((charset unicode) + "Gurmukhi" '((charset unicode) (coding-system utf-8) (coding-priority utf-8) (input-method . "punjabi-itrans") + (sample-text . "Gurmukhi (ਗੁਰਮੁਖੀ) ਸਤ ਸ੍ਰੀ ਅਕਾਲ") (documentation . "\ North Indian language Punjabi is supported in this language environment.")) '("Indian")) @@ -74,17 +77,31 @@ North Indian language Punjabi is supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "gujarati-itrans") + (sample-text . "Gujarati (ગુજરાતી) નમસ્તે") (documentation . "\ North Indian language Gujarati is supported in this language environment.")) '("Indian")) (set-language-info-alist + "Odia" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "odia") + (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର") + (documentation . "\ +Such languages using the Odia script as Odia, Khonti, and Santali +are supported in this language environment. (This language +environment was formerly known as \"Oriya\").")) + '("Indian")) + +(set-language-info-alist "Oriya" '((charset unicode) - (coding-system utf-8) - (coding-priority utf-8) - (input-method . "oriya-itrans") - (documentation . "\ -Such languages using Oriya script as Oriya, Khonti, and Santali + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "odia") + (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର") + (documentation . "\ +Such languages using the Odia script as Odia, Khonti, and Santali are supported in this language environment.")) '("Indian")) @@ -93,6 +110,7 @@ are supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "tamil-itrans") + (sample-text . "Tamil (தமிழ்) வணக்கம்") (documentation . "\ South Indian Language Tamil is supported in this language environment.")) '("Indian")) @@ -102,6 +120,7 @@ South Indian Language Tamil is supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "telugu-itrans") + (sample-text . "Telugu (తెలుగు) నమస్కారం") (documentation . "\ South Indian Language Telugu is supported in this language environment.")) '("Indian")) @@ -113,7 +132,7 @@ South Indian Language Telugu is supported in this language environment.")) (input-method . "kannada-itrans") (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") (documentation . "\ -Kannada language and script is supported in this language +Kannada language and script are supported in this language environment.")) '("Indian")) @@ -122,10 +141,109 @@ environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "malayalam-itrans") + (sample-text . "Malayalam (മലയാളം) നമസ്കാരം") (documentation . "\ South Indian language Malayalam is supported in this language environment.")) '("Indian")) +(set-language-info-alist + "Brahmi" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "brahmi") + (sample-text . "Brahmi (𑀩𑁆𑀭𑀸𑀳𑁆𑀫𑀻) 𑀦𑀫𑀲𑁆𑀢𑁂") + (documentation . "\ +The ancient Brahmi script is supported in this language environment.")) + '("Indian")) ; Should we have an "Old" category? + +(set-language-info-alist + "Kaithi" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "kaithi") + (sample-text . "Kaithi (𑂍𑂶𑂟𑂲) 𑂩𑂰𑂧𑂩𑂰𑂧") + (documentation . "\ +Languages such as Awadhi, Bhojpuri, Magahi and Maithili +which used the Kaithi script are supported in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Tirhuta" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "tirhuta") + (sample-text . "Tirhuta (𑒞𑒱𑒩𑒯𑒳𑒞𑒰) 𑒣𑓂𑒩𑒢𑒰𑒧 / 𑒮𑒲𑒞𑒰𑒩𑒰𑒧") + (documentation . "\ +Maithili language and its script Tirhuta are supported in this +language environment.")) + '("Indian")) + +(set-language-info-alist + "Sharada" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "sharada") + (sample-text . "Sharada (𑆯𑆳𑆫𑆢𑆳) 𑆤𑆩𑆱𑇀𑆑𑆳𑆫") + (documentation . "\ +Kashmiri language and its script Sharada are supported in this +language environment.")) + '("Indian")) + +(set-language-info-alist + "Siddham" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "siddham") + (sample-text . "Siddham (𑖭𑖰𑖟𑖿𑖠𑖽) 𑖡𑖦𑖭𑖿𑖝𑖸") + (documentation . "\ +Sanskrit language and one of its script Siddham are supported +in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Syloti Nagri" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "syloti-nagri") + (sample-text . "Syloti Nagri (ꠍꠤꠟꠐꠤ ꠘꠣꠉꠞꠤ) ꠀꠌ꠆ꠍꠣꠟꠣꠝꠥ ꠀꠟꠣꠁꠇꠥꠝ / ꠘꠝꠡ꠆ꠇꠣꠞ") + (documentation . "\ +Sylheti language and its script Syloti Nagri are supported +in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Modi" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "modi") + (sample-text . "Modi (𑘦𑘻𑘚𑘲) 𑘡𑘦𑘭𑘿𑘎𑘰𑘨") + (documentation . "\ +Marathi language and one of its script Modi are supported +in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Limbu" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "limbu") + (sample-text . "Limbu (ᤕᤠᤰᤌᤢᤱ ᤐᤠᤴ) ᤛᤣᤘᤠᤖᤥ") + (documentation . "\ +Limbu language and its script are supported in this +language environment.")) + '("Indian")) + +(set-language-info-alist + "Grantha" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "grantha") + (sample-text . "Grantha (𑌗𑍍𑌰𑌨𑍍𑌥) 𑌨𑌮𑌸𑍍𑌤𑍇 / 𑌨𑌮𑌸𑍍𑌕𑌾𑌰𑌃") + (documentation . "\ +Languages such as Sanskrit and Manipravalam, when they use the +Grantha script, are supported in this language environment.")) + '("Indian")) + ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). @@ -147,6 +265,8 @@ South Indian language Malayalam is supported in this language environment.")) ("H" . "\u094D") ; HALANT ("s" . "[\u0951\u0952]") ; stress sign ("t" . "[\u0953\u0954]") ; accent + ("1" . "\u0967") ; numeral 1 + ("3" . "\u0969") ; numeral 3 ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ ("X" . "[\u0900-\u097F]")))) ; all coverage @@ -158,6 +278,8 @@ South Indian language Malayalam is supported in this language environment.")) "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" ;; special consonant form, or "JHR\\|" + ;; vedic accents with numerals, or + "1ss?\\|3ss\\|s3ss\\|" ;; any other singleton characters "X") table)) @@ -168,14 +290,15 @@ South Indian language Malayalam is supported in this language environment.")) '(("a" . "\u0981") ; SIGN CANDRABINDU ("A" . "[\u0982\u0983]") ; SIGN ANUSVARA .. VISARGA ("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel - ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant + ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F0\u09F1]") ; consonant ("B" . "[\u09AC\u09AF\u09B0\u09F0]") ; BA, YA, RA ("R" . "[\u09B0\u09F0]") ; RA ("n" . "\u09BC") ; NUKTA ("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign ("H" . "\u09CD") ; HALANT ("T" . "\u09CE") ; KHANDA TA - ("N" . "\u200C") ; ZWNJ + ("S" . "\u09FE") ; SANDHI MARK + ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ ("X" . "[\u0980-\u09FF]")))) ; all coverage (indian-compose-regexp @@ -183,7 +306,7 @@ South Indian language Malayalam is supported in this language environment.")) ;; syllables with an independent vowel, or "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" ;; consonant-based syllables, or - "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" + "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?S?\\)\\|" ;; another syllables with an independent vowel, or "\\(?:RH\\)?T\\|" ;; special consonant form, or @@ -250,7 +373,7 @@ South Indian language Malayalam is supported in this language environment.")) '(("a" . "\u0B01") ; SIGN CANDRABINDU ("A" . "[\u0B02\u0B03]") ; SIGN ANUSVARA .. VISARGA ("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel - ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]") ; consonant + ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B5F\u0B71]") ; consonant ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form ("R" . "\u0B30") ; RA ("n" . "\u0B3C") ; NUKTA @@ -384,6 +507,232 @@ South Indian language Malayalam is supported in this language environment.")) (list (vector (cdr slot) 0 #'font-shape-gstring)))))) char-script-table)) -(provide 'indian) +;; Brahmi composition rules +(let ((consonant "[\U00011013-\U00011034]") + (non-consonant "[^\U00011013-\U00011034\U00011046\U0001107F]") + (vowel "[\U00011038-\U00011045]") + (numeral "[\U00011052-\U00011065]") + (multiplier "[\U00011064\U00011065]") + (virama "\U00011046") + (number-joiner "\U0001107F")) + (set-char-table-range composition-function-table + '(#x11046 . #x11046) + (list (vector + ;; Consonant conjuncts + (concat consonant "\\(?:" virama consonant "\\)+" + vowel "?") + 1 'font-shape-gstring) + (vector + ;; Vowelless consonants + (concat consonant virama non-consonant) + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x1107F . #x1107F) + (list (vector + ;; Additive-multiplicative numerals + (concat multiplier number-joiner numeral) + 1 'font-shape-gstring)))) + +;; Kaithi composition rules +(let ((consonant "[\x1108D-\x110AF]") + (nukta "\x110BA") + (independent-vowel "[\x11083-\x1108C]") + (vowel "[\x1108D-\x110C2]") + (nasal "[\x11080\x11081]") + (virama "\x110B9") + (number-sign "\x110BD") + (number-sign-above "\x110CD") + (numerals "[\x966-\x96F]+") + (zwj "\x200D")) + (set-char-table-range composition-function-table + '(#x110B0 . #x110BA) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?\\(?:" virama zwj "?" consonant + nukta "?\\)*\\(?:" virama zwj "?\\|" vowel "*" nukta + "?" nasal "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowel based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x110BD . #x110BD) + (list (vector + ;; Number sign + (concat number-sign numerals) + 0 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x110CD . #x110CD) + (list (vector + ;; Number sign above + (concat number-sign-above numerals) + 0 'font-shape-gstring)))) + +;; Tirhuta composition rules +(let ((consonant "[\x1148F-\x114AF]") + (nukta "\x114C3") + (independent-vowel "[\x11481-\x1148E]") + (vowel "[\x114B0-\x114BE]") + (nasal "[\x114BF\x114C0]") + (virama "\x114C2")) + (set-char-table-range composition-function-table + '(#x114B0 . #x114C3) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?\\(?:" virama consonant nukta + "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" + nasal "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowel based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?" nasal "?") + 1 'font-shape-gstring)))) + +;; Sharada composition rules +(let ((consonant "[\x11191-\x111B2]") + (nukta "\x111CA") + (independent-vowel "[\x11183-\x11190]") + (vowel "[\x111B3-\x111BF\x111CE]") + (vowel-modifier "\x111CB") + (extra-short-vowel-mark "\x111CC") + (nasal "[\x11181\x11180\x111CF]") + (virama "\x111C0") + (fricatives "[\x111C2\x111C3]") + (sandhi-mark "\x111C9") + (misc "[\x111C4-\x111C8\x111CD]")) + (set-char-table-range composition-function-table + '(#x111B3 . #x111CE) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?" vowel-modifier "?\\(?:" virama + consonant nukta "?" vowel-modifier "?\\)*\\(?:" virama + "\\|" vowel "*" nukta "?" nasal "?" extra-short-vowel-mark + "?" vowel-modifier "?" sandhi-mark "?+" misc "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowel based syllables + (concat independent-vowel nukta "?" vowel-modifier "?" virama "?" + vowel "?" extra-short-vowel-mark "?" sandhi-mark "?" + fricatives "?" misc "?") + 1 'font-shape-gstring) + (vector + ;; Fricatives with Consonants + (concat fricatives "?" consonant vowel "?") + 0 'font-shape-gstring)))) + +;; Siddham composition rules +(let ((consonant "[\x1158E-\x115AE]") + (nukta "\x115C0") + (independent-vowel "[\x11580-\x1158D\x115D8-\x115DB]") + (vowel "[\x115AF-\x115BB\x115DC\x115DD]") + (nasal "[\x115BC\x115BD]") + (visarga "\x115BE") + (virama "\x115BF")) + (set-char-table-range composition-function-table + '(#x115AF . #x115C0) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?" "\\(?:" virama consonant nukta + "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" nasal + "?" visarga "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?" + nasal "?" visarga "?") + 1 'font-shape-gstring)))) + +;; Syloti Nagri composition rules +(let ((consonant "[\xA807-\xA80A\xA80C-\xA822]") + (vowel "[\xA802\xA823-\xA827]") + (nasal "[\xA80B]") + (virama "\xA806") + (alternate-virama "\xA82C")) + (set-char-table-range composition-function-table + '(#xA806 . #xA806) + (list (vector + ;; Consonant conjunct based syllables + (concat consonant "\\(?:" virama consonant "\\)+" + vowel "?" nasal "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#xA823 . #xA827) + (list (vector + ;; Non Consonant conjunct based syllables + (concat consonant vowel nasal "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#xA82C . #xA82C) + (list (vector + ;; Consonant with the alternate virama + (concat consonant "\\(?:" alternate-virama consonant "\\)+" + vowel "?" nasal "?") + 1 'font-shape-gstring)))) + +;; Modi composition rules +(let ((consonant "[\x1160E-\x1162F]") + (independent-vowel "[\x11600-\x1160D]") + (vowel "[\x11630-\x1163C]") + (nasal "\x1163D") + (visarga "\x1163E") + (virama "\x1163F") + (ardhacandra "\x11640")) + (set-char-table-range composition-function-table + '(#x11630 . #x11640) + (list (vector + ;; Consonant based syllables + (concat consonant "\\(?:" virama consonant "\\)*\\(?:" + virama "\\|" vowel "*" ardhacandra "?" nasal + "?" visarga "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel virama "?" vowel "?" ardhacandra + nasal "?" visarga "?") + 1 'font-shape-gstring)))) + +;; Limbu composition rules +(let ((consonant "[\x1900-\x191E]") + (vowel "[\x1920-\x1928]") + (subjoined-letter "[\x1929-\x192B]") + (small-letter "[\x1930-\x1938]") + (other-signs "[\x1939\x193A]") + (sa-i "\x193B")) + (set-char-table-range composition-function-table + '(#x1920 . #x193B) + (list (vector + ;; Consonant based syllables + (concat consonant sa-i "?" subjoined-letter "?" small-letter + "?" vowel "?" other-signs "?") + 1 'font-shape-gstring)))) + +;; Grantha composition rules +(let ((consonant "[\x11315-\x11339]") + (nukta "\x1133C") + (independent-vowel "[\x11305-\x11314\x11360\x11361]") + (vowel "[\x1133E-\x1134C\x11357\x11362\x11363]") + (nasal "[\x11300-\x11302]") + (bindu "\x1133B") + (visarga "\x11303") + (virama "\x1134D") + (avagraha "\x1133D") + (modifier-above "[\x11366-\x11374]")) + (set-char-table-range composition-function-table + '(#x1133B . #x1134D) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?" "\\(?:" virama consonant nukta + "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" nasal + "?" bindu "?" visarga "?" modifier-above "?" + avagraha "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?" + nasal "?" bindu "?" visarga "?" modifier-above + "?" avagraha "?") + 1 'font-shape-gstring)))) +(provide 'indian) ;;; indian.el ends here diff --git a/lisp/language/indonesian.el b/lisp/language/indonesian.el new file mode 100644 index 00000000000..699f8192543 --- /dev/null +++ b/lisp/language/indonesian.el @@ -0,0 +1,197 @@ +;;; indonesian.el --- Indonesian languages support -*- coding: utf-8; lexical-binding: t; -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; Author: समीर सिंह Sameer Singh <lumarzeli30@gmail.com> +;; Keywords: multilingual, input method, i18n, Indonesia + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; This file contains definitions of Indonesia language environments, and +;; setups for displaying the scripts used there. + +;;; Code: + +(set-language-info-alist + "Balinese" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "balinese") + (sample-text . "Balinese (ᬅᬓ᭄ᬱᬭᬩᬮᬶ) ᬒᬁᬲ᭄ᬯᬲ᭄ᬢ᭄ᬬᬲ᭄ᬢᬸ") + (documentation . "\ +Balinese language and its script are supported in this language environment."))) + +(set-language-info-alist + "Javanese" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "javanese") + (sample-text . "Javanese (ꦲꦏ꧀ꦱꦫꦗꦮ) ꦲꦭꦺꦴ") + (documentation . "\ +Javanese language and its script are supported in this language environment."))) + +(set-language-info-alist + "Sundanese" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "sundanese") + (sample-text . "Sundanese (ᮃᮊ᮪ᮞᮛᮞᮥᮔ᮪ᮓ) ᮞᮙ᮪ᮕᮥᮛᮞᮥᮔ᮪") + (documentation . "\ +Sundanese language and its script are supported in this language environment."))) + +(set-language-info-alist + "Batak" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "batak") + (sample-text . "Batak (ᯘᯮᯒᯗ᯲ᯅᯗᯂ᯲) ᯂᯬᯒᯘ᯲ / ᯔᯧᯐᯬᯀᯱᯐᯬᯀᯱ") + (documentation . "\ +Languages that use the Batak script, such as Karo, Toba, Pakpak, Mandailing +and Simalungun, are supported in this language environment."))) + +(set-language-info-alist + "Rejang" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "rejang") + (sample-text . "Rejang (ꥆꤰ꥓ꤼꤽ ꤽꥍꤺꥏ) ꤸꥉꥐꤺꥉꥂꥎ") + (documentation . "\ +Rejang language and its script are supported in this language environment."))) + +(set-language-info-alist + "Makasar" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "makasar") + (sample-text . "Makasar (𑻪𑻢𑻪𑻢) 𑻦𑻤𑻵𑻱") + (documentation . "\ +Makassarese language and its script Makasar are supported in this language environment."))) + +(set-language-info-alist + "Buginese" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "lontara") + (sample-text . "Buginese (ᨒᨚᨈᨑ) ᨖᨒᨚ") + (documentation . "\ +Buginese language and its script Lontara are supported in this language environment."))) + +;; Balinese composition rules +(let ((consonant "[\x1B13-\x1B33\x1B45-\x1B4B]") + (independent-vowel "[\x1B05-\x1B12]") + (rerekan "\x1B34") + (vowel "[\x1B35-\x1B43]") + (modifier-above "[\x1B00-\x1B04]") + (adeg-adeg "\x1B44") + (musical-symbol "[\x1B6B-\x1B73]")) + (set-char-table-range composition-function-table + '(#x1B34 . #x1B44) + (list (vector + ;; Consonant based syllables + (concat consonant rerekan "?\\(?:" adeg-adeg consonant + rerekan "?\\)*\\(?:" adeg-adeg "\\|" vowel "*" rerekan + "?" modifier-above "?" musical-symbol "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel rerekan "?" adeg-adeg "?" + vowel "?" modifier-above "?" musical-symbol "?") + 1 'font-shape-gstring)))) + +;; Javanese composition rules +(let ((consonant "[\xA98F-\xA9B2]") + (independent-vowel "[\xA984-\xA98E]") + (telu "\xA9B3") + (vowel "[\xA9B4-\xA9BC]") + (dependant-consonant "[\xA9BD-\xA9BF]") + (modifier-above "[\xA980-\xA983]") + (pangkon "\xA9C0")) + (set-char-table-range composition-function-table + '(#xA9B3 . #xA9C0) + (list (vector + ;; Consonant based syllables + (concat consonant telu "?\\(?:" pangkon consonant + telu "?\\)*\\(?:" pangkon "\\|" vowel "*" telu + "?" modifier-above "?" dependant-consonant "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel telu "?" pangkon "?" + vowel "?" modifier-above "?" dependant-consonant "?") + 1 'font-shape-gstring)))) + +;; Sundanese composition rules +(let ((consonant "[\x1B8A-\x1BA0\x1BAE\x1BAF\x1BBB-\x1BBF]") + (independent-vowel "[\x1B83-\x1B89]") + (vowel "[\x1BA4-\x1BA9]") + (dependant-consonant "[\x1BA1-\x1BA3\x1BAC-\x1BAD]") + (modifier-above "[\x1B80-\x1B82]") + (virama "[\x1BAA\x1BAB]")) + (set-char-table-range composition-function-table + '(#x1BA1 . #x1BAD) + (list (vector + ;; Consonant based syllables + (concat consonant "\\(?:" virama consonant + "\\)*\\(?:" virama "\\|" vowel "*" + modifier-above "?" dependant-consonant "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel virama "?" + vowel "?" modifier-above "?" dependant-consonant "?") + 1 'font-shape-gstring)))) + +;; Batak composition rules +(let ((akshara "[\x1BC0-\x1BE5]") + (vowel "[\x1BE7-\x1BEF]") + (dependant-consonant "[\x1BF0\x1BF1]") + (modifier-above "\x1BE6") + (virama "[\x1BF2\x1BF3]")) + (set-char-table-range composition-function-table + '(#x1BE6 . #x1BF3) + (list (vector + ;; Akshara based syllables + (concat akshara virama "?" vowel "*" modifier-above + "?" dependant-consonant "?") + 1 'font-shape-gstring)))) + +;; Rejang composition rules +(let ((akshara "[\xA930-\xA946]") + (vowel "[\xA947-\xA94E]") + (dependant-consonant "[\xA94F\xA952]") + (virama "\xA953")) + (set-char-table-range composition-function-table + '(#xA947 . #xA953) + (list (vector + ;; Akshara based syllables + (concat akshara virama "?" vowel "*" + dependant-consonant "?") + 1 'font-shape-gstring)))) + +;; Makasar composition rules +(let ((akshara "[\x11EE0-\x11EF2]") + (vowel "[\x11EF3-\x11EF6]")) + (set-char-table-range composition-function-table + '(#x11EF3 . #x11EF6) + (list (vector + ;; Akshara based syllables + (concat akshara vowel "*") + 1 'font-shape-gstring)))) + +(provide 'indonesian) +;;; indonesian.el ends here diff --git a/lisp/language/lao.el b/lisp/language/lao.el index 5c545df4840..1861eff15eb 100644 --- a/lisp/language/lao.el +++ b/lisp/language/lao.el @@ -59,11 +59,11 @@ (let* ((chars (car l)) (len (length chars)) ;; Replace `c', `t', `v' to consonant, tone, and vowel. - (regexp (mapconcat #'(lambda (c) - (cond ((= c ?c) consonant) - ((= c ?t) tone) - ((= c ?v) vowel-upper-lower) - (t (string c)))) + (regexp (mapconcat (lambda (c) + (cond ((= c ?c) consonant) + ((= c ?t) tone) + ((= c ?v) vowel-upper-lower) + (t (string c)))) (cdr l) "")) ;; Element of composition-function-table. (elt (list (vector regexp 1 #'lao-composition-function) diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el index c8a4821abf7..46429a4380d 100644 --- a/lisp/language/misc-lang.el +++ b/lisp/language/misc-lang.el @@ -212,6 +212,28 @@ thin (i.e. 1-dot width) space." (list (vector "[\U00013000-\U0001342E]+" 0 #'font-shape-gstring)))) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Hanifi Rohingya +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(set-language-info-alist + "Hanifi Rohingya" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "hanifi-rohingya") + (sample-text . "Hanifi Rohingya (𐴌𐴟𐴇𐴥𐴝𐴚𐴒𐴙𐴝 𐴇𐴝𐴕𐴞𐴉𐴞 𐴓𐴠𐴑𐴤𐴝) 𐴀𐴝𐴏𐴓𐴝𐴀𐴡𐴤𐴛𐴝𐴓𐴝𐴙𐴑𐴟𐴔") + (documentation . "\ +Rohingya language and its script Hanifi Rohingya are supported +in this language environment."))) + +;; Hanifi Rohingya composition rules +(set-char-table-range + composition-function-table + '(#x10D1D . #x10D27) + (list (vector + "[\x10D00-\x10D27]+" + 1 'font-shape-gstring))) + (provide 'misc-lang) ;;; misc-lang.el ends here diff --git a/lisp/language/philippine.el b/lisp/language/philippine.el new file mode 100644 index 00000000000..e52ad6912cd --- /dev/null +++ b/lisp/language/philippine.el @@ -0,0 +1,96 @@ +;;; philippine.el --- Philippine languages support -*- coding: utf-8; lexical-binding: t; -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; Author: समीर सिंह Sameer Singh <lumarzeli30@gmail.com> +;; Keywords: multilingual, input method, i18n, Philippines + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; This file contains definitions of Philippine language environments, and +;; setups for displaying the scripts used there. + +;;; Code: + +(set-language-info-alist + "Tagalog" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "tagalog") + (sample-text . "Tagalog (ᜊᜌ᜔ᜊᜌᜒᜈ᜔) ᜃᜓᜋᜓᜐ᜔ᜆ") + (documentation . "\ +Tagalog language using the Baybayin script is supported in +this language environment."))) + +(set-language-info-alist + "Hanunoo" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "hanunoo") + (sample-text . "Hanunoo (ᜱᜨᜳᜨᜳᜢ) ᜫᜬᜧ᜴ ᜣᜭᜯᜥ᜴ ᜰᜲᜭᜥ᜴") + (documentation . "\ +Philippine Language Hanunoo is supported in this language environment."))) + +(set-language-info-alist + "Buhid" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "buhid") + (documentation . "\ +Philippine Language Buhid is supported in this language environment."))) + +(set-language-info-alist + "Tagbanwa" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "tagbanwa") + (sample-text . "Tagbanwa (ᝦᝪᝯ) ᝫᝩᝬᝥ ᝣᝮᝧᝯ") + (documentation . "\ +Philippine Languages Tagbanwa are supported in this language environment."))) + +;; Tagalog composition rules +(let ((akshara "[\x1700-\x1711\x171F]") + (vowel "[\x1712\x1713]") + (virama "\x1714") + (pamudpod "\x1715")) + (set-char-table-range composition-function-table + '(#x1714 . #x1714) + (list (vector + ;; Akshara virama syllables + (concat akshara virama vowel "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x1715 . #x1715) + (list (vector + ;; Akshara pamudpod syllables + (concat akshara pamudpod vowel "?") + 1 'font-shape-gstring)))) + +;; Hanunoo composition rules +(let ((akshara "[\x1720-\x1731]") + (vowel "[\x1732\x1733]") + (pamudpod "\x1734")) + (set-char-table-range composition-function-table + '(#x1734 . #x1734) + (list (vector + ;; Akshara pamudpod syllables + (concat akshara pamudpod vowel "?") + 1 'font-shape-gstring)))) + +(provide 'philippine) +;;; philippine.el ends here diff --git a/lisp/language/thai-util.el b/lisp/language/thai-util.el index d11daf0f839..6c004e9495c 100644 --- a/lisp/language/thai-util.el +++ b/lisp/language/thai-util.el @@ -244,15 +244,13 @@ positions (integers or markers) specifying the region." ;; Thai-word-mode requires functions in the feature `thai-word'. (require 'thai-word) -(defvar thai-word-mode-map - (let ((map (make-sparse-keymap))) - (define-key map [remap forward-word] 'thai-forward-word) - (define-key map [remap backward-word] 'thai-backward-word) - (define-key map [remap kill-word] 'thai-kill-word) - (define-key map [remap backward-kill-word] 'thai-backward-kill-word) - (define-key map [remap transpose-words] 'thai-transpose-words) - map) - "Keymap for `thai-word-mode'.") +(defvar-keymap thai-word-mode-map + :doc "Keymap for `thai-word-mode'." + "<remap> <forward-word>" #'thai-forward-word + "<remap> <backward-word>" #'thai-backward-word + "<remap> <kill-word>" #'thai-kill-word + "<remap> <backward-kill-word>" #'thai-backward-kill-word + "<remap> <transpose-words>" #'thai-transpose-words) (define-minor-mode thai-word-mode "Minor mode to make word-oriented commands aware of Thai words. diff --git a/lisp/language/thai.el b/lisp/language/thai.el index 6a6289a44c7..60f5f9d2a38 100644 --- a/lisp/language/thai.el +++ b/lisp/language/thai.el @@ -82,6 +82,43 @@ This is the same as `thai-tis620' with the addition of no-break-space." (aset composition-function-table (aref chars i) elt))) (aset composition-function-table ?ำ '(["[ก-ฯ]." 1 thai-composition-function])) +;; Tai-Tham + +(set-language-info-alist + "Northern Thai" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (sample-text . + "Northern Thai (ᨣᩣᩴᨾᩮᩬᩥᨦ / ᨽᩣᩈᩣᩃ᩶ᩣ᩠ᨶᨶᩣ) ᩈ᩠ᩅᩢᩔ᩠ᨯᩦᨣᩕᩢ᩠ᨸ") + (documentation . t))) + +;; From Richard Wordingham <richard.wordingham@ntlworld.com>: +(defvar tai-tham-composable-pattern + (let ((table + ;; C is letters, independent vowels, digits, punctuation and symbols. + '(("C" . "[\u1A20-\u1A54\u1A80-\u1A89\u1A90-\u1A99\u1AA0-\u1AAD]") + ("M" . ; Marks, CGJ, ZWNJ, ZWJ + "[\u0324\u034F\u0E49\u0E4A\u0E4B\u1A55-\u1A57\u1A59-\u1A5E\u1A61-\u1A7C\u1A7F\u200C\200D]") + ("H" . "\u1A60") ; Sakot + ("S" . ; Marks commuting with sakot + "[\u0E49-\u0E4B\u0EC9\u0ECB\u1A75-\u1A7C]") + ("N" . "\u1A58"))) ; mai kang lai + (basic-syllable "C\\(N*\\(M\\|HS*C\\)\\)*") + (regexp "X\\(N\\(X\\)?\\)*H?")) ; where X is basic syllable + (let ((case-fold-search nil)) + (setq regexp (replace-regexp-in-string "X" basic-syllable regexp t t)) + (dolist (elt table) + (setq regexp (replace-regexp-in-string (car elt) (cdr elt) + regexp t t)))) + regexp)) + +(let ((elt (list (vector tai-tham-composable-pattern 0 'font-shape-gstring) + ))) + (set-char-table-range composition-function-table '(#x1A20 . #x1A54) elt) + (set-char-table-range composition-function-table '(#x1A80 . #x1A89) elt) + (set-char-table-range composition-function-table '(#x1A90 . #x1A99) elt) + (set-char-table-range composition-function-table '(#x1AA0 . #x1AAD) elt)) + (provide 'thai) ;;; thai.el ends here |