diff options
Diffstat (limited to 'lisp/language/indian.el')
-rw-r--r-- | lisp/language/indian.el | 309 |
1 files changed, 297 insertions, 12 deletions
diff --git a/lisp/language/indian.el b/lisp/language/indian.el index e0adb0de6c3..a5563a3ff77 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@ -45,8 +45,9 @@ (coding-system utf-8) (coding-priority utf-8) (input-method . "devanagari-aiba") + (sample-text . "Devanagari (देवनागरी) नमस्ते / नमस्कार") (documentation . "\ -Such languages using Devanagari script as Hindi and Marathi +Such languages using Devanagari script as Hindi, Marathi and Nepali are supported in this language environment.")) '("Indian")) @@ -55,16 +56,18 @@ are supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "bengali-itrans") + (sample-text . "Bengali (বাংলা) নমস্কার") (documentation . "\ Such languages using Bengali script as Bengali and Assamese are supported in this language environment.")) '("Indian")) (set-language-info-alist - "Punjabi" '((charset unicode) + "Gurmukhi" '((charset unicode) (coding-system utf-8) (coding-priority utf-8) (input-method . "punjabi-itrans") + (sample-text . "Gurmukhi (ਗੁਰਮੁਖੀ) ਸਤ ਸ੍ਰੀ ਅਕਾਲ") (documentation . "\ North Indian language Punjabi is supported in this language environment.")) '("Indian")) @@ -74,17 +77,31 @@ North Indian language Punjabi is supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "gujarati-itrans") + (sample-text . "Gujarati (ગુજરાતી) નમસ્તે") (documentation . "\ North Indian language Gujarati is supported in this language environment.")) '("Indian")) (set-language-info-alist + "Odia" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "odia") + (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର") + (documentation . "\ +Such languages using the Odia script as Odia, Khonti, and Santali +are supported in this language environment. (This language +environment was formerly known as \"Oriya\").")) + '("Indian")) + +(set-language-info-alist "Oriya" '((charset unicode) - (coding-system utf-8) - (coding-priority utf-8) - (input-method . "oriya-itrans") - (documentation . "\ -Such languages using Oriya script as Oriya, Khonti, and Santali + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "odia") + (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର") + (documentation . "\ +Such languages using the Odia script as Odia, Khonti, and Santali are supported in this language environment.")) '("Indian")) @@ -93,6 +110,7 @@ are supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "tamil-itrans") + (sample-text . "Tamil (தமிழ்) வணக்கம்") (documentation . "\ South Indian Language Tamil is supported in this language environment.")) '("Indian")) @@ -102,6 +120,7 @@ South Indian Language Tamil is supported in this language environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "telugu-itrans") + (sample-text . "Telugu (తెలుగు) నమస్కారం") (documentation . "\ South Indian Language Telugu is supported in this language environment.")) '("Indian")) @@ -122,10 +141,87 @@ environment.")) (coding-system utf-8) (coding-priority utf-8) (input-method . "malayalam-itrans") + (sample-text . "Malayalam (മലയാളം) നമസ്കാരം") (documentation . "\ South Indian language Malayalam is supported in this language environment.")) '("Indian")) +(set-language-info-alist + "Brahmi" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "brahmi") + (sample-text . "Brahmi (𑀩𑁆𑀭𑀸𑀳𑁆𑀫𑀻) 𑀦𑀫𑀲𑁆𑀢𑁂") + (documentation . "\ +The ancient Brahmi script is supported in this language environment.")) + '("Indian")) ; Should we have an "Old" category? + +(set-language-info-alist + "Kaithi" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "kaithi") + (sample-text . "Kaithi (𑂍𑂶𑂟𑂲) 𑂩𑂰𑂧𑂩𑂰𑂧") + (documentation . "\ +Languages such as Awadhi, Bhojpuri, Magahi and Maithili +which used the Kaithi script are supported in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Tirhuta" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "tirhuta") + (sample-text . "Tirhuta (𑒞𑒱𑒩𑒯𑒳𑒞𑒰) 𑒣𑓂𑒩𑒢𑒰𑒧 / 𑒮𑒲𑒞𑒰𑒩𑒰𑒧") + (documentation . "\ +Maithili language and its script Tirhuta is supported in this +language environment.")) + '("Indian")) + +(set-language-info-alist + "Sharada" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "sharada") + (sample-text . "Sharada (𑆯𑆳𑆫𑆢𑆳) 𑆤𑆩𑆱𑇀𑆑𑆳𑆫") + (documentation . "\ +Kashmiri language and its script Sharada is supported in this +language environment.")) + '("Indian")) + +(set-language-info-alist + "Siddham" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "siddham") + (sample-text . "Siddham (𑖭𑖰𑖟𑖿𑖠𑖽) 𑖡𑖦𑖭𑖿𑖝𑖸") + (documentation . "\ +Sanskrit language and one of its script Siddham is supported +in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Syloti Nagri" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "syloti-nagri") + (sample-text . "Syloti Nagri (ꠍꠤꠟꠐꠤ ꠘꠣꠉꠞꠤ) ꠀꠌ꠆ꠍꠣꠟꠣꠝꠥ ꠀꠟꠣꠁꠇꠥꠝ / ꠘꠝꠡ꠆ꠇꠣꠞ") + (documentation . "\ +Sylheti language and its script Syloti Nagri is supported +in this language environment.")) + '("Indian")) + +(set-language-info-alist + "Modi" '((charset unicode) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "modi") + (sample-text . "Modi (𑘦𑘻𑘚𑘲) 𑘡𑘦𑘭𑘿𑘎𑘰𑘨") + (documentation . "\ +Marathi language and one of its script Modi is supported +in this language environment.")) + '("Indian")) + ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). @@ -147,6 +243,8 @@ South Indian language Malayalam is supported in this language environment.")) ("H" . "\u094D") ; HALANT ("s" . "[\u0951\u0952]") ; stress sign ("t" . "[\u0953\u0954]") ; accent + ("1" . "\u0967") ; numeral 1 + ("3" . "\u0969") ; numeral 3 ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ ("X" . "[\u0900-\u097F]")))) ; all coverage @@ -158,6 +256,8 @@ South Indian language Malayalam is supported in this language environment.")) "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" ;; special consonant form, or "JHR\\|" + ;; vedic accents with numerals, or + "1ss?\\|3ss\\|s3ss\\|" ;; any other singleton characters "X") table)) @@ -168,14 +268,15 @@ South Indian language Malayalam is supported in this language environment.")) '(("a" . "\u0981") ; SIGN CANDRABINDU ("A" . "[\u0982\u0983]") ; SIGN ANUSVARA .. VISARGA ("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel - ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant + ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F0\u09F1]") ; consonant ("B" . "[\u09AC\u09AF\u09B0\u09F0]") ; BA, YA, RA ("R" . "[\u09B0\u09F0]") ; RA ("n" . "\u09BC") ; NUKTA ("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign ("H" . "\u09CD") ; HALANT ("T" . "\u09CE") ; KHANDA TA - ("N" . "\u200C") ; ZWNJ + ("S" . "\u09FE") ; SANDHI MARK + ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ ("X" . "[\u0980-\u09FF]")))) ; all coverage (indian-compose-regexp @@ -183,7 +284,7 @@ South Indian language Malayalam is supported in this language environment.")) ;; syllables with an independent vowel, or "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" ;; consonant-based syllables, or - "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" + "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?S?\\)\\|" ;; another syllables with an independent vowel, or "\\(?:RH\\)?T\\|" ;; special consonant form, or @@ -250,7 +351,7 @@ South Indian language Malayalam is supported in this language environment.")) '(("a" . "\u0B01") ; SIGN CANDRABINDU ("A" . "[\u0B02\u0B03]") ; SIGN ANUSVARA .. VISARGA ("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel - ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]") ; consonant + ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B5F\u0B71]") ; consonant ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form ("R" . "\u0B30") ; RA ("n" . "\u0B3C") ; NUKTA @@ -384,6 +485,190 @@ South Indian language Malayalam is supported in this language environment.")) (list (vector (cdr slot) 0 #'font-shape-gstring)))))) char-script-table)) -(provide 'indian) +;; Brahmi composition rules +(let ((consonant "[\U00011013-\U00011034]") + (non-consonant "[^\U00011013-\U00011034\U00011046\U0001107F]") + (vowel "[\U00011038-\U00011045]") + (numeral "[\U00011052-\U00011065]") + (multiplier "[\U00011064\U00011065]") + (virama "\U00011046") + (number-joiner "\U0001107F")) + (set-char-table-range composition-function-table + '(#x11046 . #x11046) + (list (vector + ;; Consonant conjuncts + (concat consonant "\\(?:" virama consonant "\\)+" + vowel "?") + 1 'font-shape-gstring) + (vector + ;; Vowelless consonants + (concat consonant virama non-consonant) + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x1107F . #x1107F) + (list (vector + ;; Additive-multiplicative numerals + (concat multiplier number-joiner numeral) + 1 'font-shape-gstring)))) + +;; Kaithi composition rules +(let ((consonant "[\x1108D-\x110AF]") + (nukta "\x110BA") + (independent-vowel "[\x11083-\x1108C]") + (vowel "[\x1108D-\x110C2]") + (nasal "[\x11080\x11081]") + (virama "\x110B9") + (number-sign "\x110BD") + (number-sign-above "\x110CD") + (numerals "[\x966-\x96F]+") + (zwj "\x200D")) + (set-char-table-range composition-function-table + '(#x110B0 . #x110BA) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?\\(?:" virama zwj "?" consonant + nukta "?\\)*\\(?:" virama zwj "?\\|" vowel "*" nukta + "?" nasal "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowel based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x110BD . #x110BD) + (list (vector + ;; Number sign + (concat number-sign numerals) + 0 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#x110CD . #x110CD) + (list (vector + ;; Number sign above + (concat number-sign-above numerals) + 0 'font-shape-gstring)))) + +;; Tirhuta composition rules +(let ((consonant "[\x1148F-\x114AF]") + (nukta "\x114C3") + (independent-vowel "[\x11481-\x1148E]") + (vowel "[\x114B0-\x114BE]") + (nasal "[\x114BF\x114C0]") + (virama "\x114C2")) + (set-char-table-range composition-function-table + '(#x114B0 . #x114C3) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?\\(?:" virama consonant nukta + "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" + nasal "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowel based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?" nasal "?") + 1 'font-shape-gstring)))) + +;; Sharada composition rules +(let ((consonant "[\x11191-\x111B2]") + (nukta "\x111CA") + (independent-vowel "[\x11183-\x11190]") + (vowel "[\x111B3-\x111BF\x111CE]") + (vowel-modifier "\x111CB") + (extra-short-vowel-mark "\x111CC") + (nasal "[\x11181\x11180\x111CF]") + (virama "\x111C0") + (fricatives "[\x111C2\x111C3]") + (sandhi-mark "\x111C9") + (misc "[\x111C4-\x111C8\x111CD]")) + (set-char-table-range composition-function-table + '(#x111B3 . #x111CE) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?" vowel-modifier "?\\(?:" virama + consonant nukta "?" vowel-modifier "?\\)*\\(?:" virama + "\\|" vowel "*" nukta "?" nasal "?" extra-short-vowel-mark + "?" vowel-modifier "?" sandhi-mark "?+" misc "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowel based syllables + (concat independent-vowel nukta "?" vowel-modifier "?" virama "?" + vowel "?" extra-short-vowel-mark "?" sandhi-mark "?" + fricatives "?" misc "?") + 1 'font-shape-gstring) + (vector + ;; Fricatives with Consonants + (concat fricatives "?" consonant vowel "?") + 0 'font-shape-gstring)))) + +;; Siddham composition rules +(let ((consonant "[\x1158E-\x115AE]") + (nukta "\x115C0") + (independent-vowel "[\x11580-\x1158D\x115D8-\x115DB]") + (vowel "[\x115AF-\x115BB\x115DC\x115DD]") + (nasal "[\x115BC\x115BD]") + (visarga "\x115BE") + (virama "\x115BF")) + (set-char-table-range composition-function-table + '(#x115AF . #x115C0) + (list (vector + ;; Consonant based syllables + (concat consonant nukta "?" "\\(?:" virama consonant nukta + "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" nasal + "?" visarga "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel nukta "?" virama "?" vowel "?" + nasal "?" visarga "?") + 1 'font-shape-gstring)))) + +;; Syloti Nagri composition rules +(let ((consonant "[\xA807-\xA80A\xA80C-\xA822]") + (vowel "[\xA802\xA823-\xA827]") + (nasal "[\xA80B]") + (virama "\xA806") + (alternate-virama "\xA82C")) + (set-char-table-range composition-function-table + '(#xA806 . #xA806) + (list (vector + ;; Consonant conjunct based syllables + (concat consonant "\\(?:" virama consonant "\\)+" + vowel "?" nasal "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#xA823 . #xA827) + (list (vector + ;; Non Consonant conjunct based syllables + (concat consonant vowel nasal "?") + 1 'font-shape-gstring))) + (set-char-table-range composition-function-table + '(#xA82C . #xA82C) + (list (vector + ;; Consonant with the alternate virama + (concat consonant "\\(?:" alternate-virama consonant "\\)+" + vowel "?" nasal "?") + 1 'font-shape-gstring)))) + +;; Modi composition rules +(let ((consonant "[\x1160E-\x1162F]") + (independent-vowel "[\x11600-\x1160D]") + (vowel "[\x11630-\x1163C]") + (nasal "\x1163D") + (visarga "\x1163E") + (virama "\x1163F") + (ardhacandra "\x11640")) + (set-char-table-range composition-function-table + '(#x11630 . #x11640) + (list (vector + ;; Consonant based syllables + (concat consonant "\\(?:" virama consonant "\\)*\\(?:" + virama "\\|" vowel "*" ardhacandra "?" nasal + "?" visarga "?\\)") + 1 'font-shape-gstring) + (vector + ;; Vowels based syllables + (concat independent-vowel virama "?" vowel "?" ardhacandra + nasal "?" visarga "?") + 1 'font-shape-gstring)))) +(provide 'indian) ;;; indian.el ends here |