summaryrefslogtreecommitdiff
path: root/lisp/language/indian.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/language/indian.el')
-rw-r--r--lisp/language/indian.el309
1 files changed, 297 insertions, 12 deletions
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
index e0adb0de6c3..a5563a3ff77 100644
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -45,8 +45,9 @@
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "devanagari-aiba")
+ (sample-text . "Devanagari (देवनागरी) नमस्ते / नमस्कार")
(documentation . "\
-Such languages using Devanagari script as Hindi and Marathi
+Such languages using Devanagari script as Hindi, Marathi and Nepali
are supported in this language environment."))
'("Indian"))
@@ -55,16 +56,18 @@ are supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "bengali-itrans")
+ (sample-text . "Bengali (বাংলা) নমস্কার")
(documentation . "\
Such languages using Bengali script as Bengali and Assamese
are supported in this language environment."))
'("Indian"))
(set-language-info-alist
- "Punjabi" '((charset unicode)
+ "Gurmukhi" '((charset unicode)
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "punjabi-itrans")
+ (sample-text . "Gurmukhi (ਗੁਰਮੁਖੀ) ਸਤ ਸ੍ਰੀ ਅਕਾਲ")
(documentation . "\
North Indian language Punjabi is supported in this language environment."))
'("Indian"))
@@ -74,17 +77,31 @@ North Indian language Punjabi is supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "gujarati-itrans")
+ (sample-text . "Gujarati (ગુજરાતી) નમસ્તે")
(documentation . "\
North Indian language Gujarati is supported in this language environment."))
'("Indian"))
(set-language-info-alist
+ "Odia" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "odia")
+ (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର")
+ (documentation . "\
+Such languages using the Odia script as Odia, Khonti, and Santali
+are supported in this language environment. (This language
+environment was formerly known as \"Oriya\")."))
+ '("Indian"))
+
+(set-language-info-alist
"Oriya" '((charset unicode)
- (coding-system utf-8)
- (coding-priority utf-8)
- (input-method . "oriya-itrans")
- (documentation . "\
-Such languages using Oriya script as Oriya, Khonti, and Santali
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "odia")
+ (sample-text . "Odia (ଓଡ଼ିଆ) ନମସ୍କାର")
+ (documentation . "\
+Such languages using the Odia script as Odia, Khonti, and Santali
are supported in this language environment."))
'("Indian"))
@@ -93,6 +110,7 @@ are supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "tamil-itrans")
+ (sample-text . "Tamil (தமிழ்) வணக்கம்")
(documentation . "\
South Indian Language Tamil is supported in this language environment."))
'("Indian"))
@@ -102,6 +120,7 @@ South Indian Language Tamil is supported in this language environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "telugu-itrans")
+ (sample-text . "Telugu (తెలుగు) నమస్కారం")
(documentation . "\
South Indian Language Telugu is supported in this language environment."))
'("Indian"))
@@ -122,10 +141,87 @@ environment."))
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "malayalam-itrans")
+ (sample-text . "Malayalam (മലയാളം) നമസ്കാരം")
(documentation . "\
South Indian language Malayalam is supported in this language environment."))
'("Indian"))
+(set-language-info-alist
+ "Brahmi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "brahmi")
+ (sample-text . "Brahmi (𑀩𑁆𑀭𑀸𑀳𑁆𑀫𑀻) 𑀦𑀫𑀲𑁆𑀢𑁂")
+ (documentation . "\
+The ancient Brahmi script is supported in this language environment."))
+ '("Indian")) ; Should we have an "Old" category?
+
+(set-language-info-alist
+ "Kaithi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "kaithi")
+ (sample-text . "Kaithi (𑂍𑂶𑂟𑂲) 𑂩𑂰𑂧𑂩𑂰𑂧")
+ (documentation . "\
+Languages such as Awadhi, Bhojpuri, Magahi and Maithili
+which used the Kaithi script are supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Tirhuta" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "tirhuta")
+ (sample-text . "Tirhuta (𑒞𑒱𑒩𑒯𑒳𑒞𑒰) 𑒣𑓂𑒩𑒢𑒰𑒧 / 𑒮𑒲𑒞𑒰𑒩𑒰𑒧")
+ (documentation . "\
+Maithili language and its script Tirhuta is supported in this
+language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Sharada" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "sharada")
+ (sample-text . "Sharada (𑆯𑆳𑆫𑆢𑆳) 𑆤𑆩𑆱𑇀𑆑𑆳𑆫")
+ (documentation . "\
+Kashmiri language and its script Sharada is supported in this
+language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Siddham" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "siddham")
+ (sample-text . "Siddham (𑖭𑖰𑖟𑖿𑖠𑖽) 𑖡𑖦𑖭𑖿𑖝𑖸")
+ (documentation . "\
+Sanskrit language and one of its script Siddham is supported
+in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Syloti Nagri" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "syloti-nagri")
+ (sample-text . "Syloti Nagri (ꠍꠤꠟꠐꠤ ꠘꠣꠉꠞꠤ) ꠀꠌ꠆ꠍꠣꠟꠣꠝꠥ ꠀꠟꠣꠁꠇꠥꠝ / ꠘꠝꠡ꠆ꠇꠣꠞ")
+ (documentation . "\
+Sylheti language and its script Syloti Nagri is supported
+in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Modi" '((charset unicode)
+ (coding-system utf-8)
+ (coding-priority utf-8)
+ (input-method . "modi")
+ (sample-text . "Modi (𑘦𑘻𑘚𑘲) 𑘡𑘦𑘭𑘿𑘎𑘰𑘨")
+ (documentation . "\
+Marathi language and one of its script Modi is supported
+in this language environment."))
+ '("Indian"))
+
;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
@@ -147,6 +243,8 @@ South Indian language Malayalam is supported in this language environment."))
("H" . "\u094D") ; HALANT
("s" . "[\u0951\u0952]") ; stress sign
("t" . "[\u0953\u0954]") ; accent
+ ("1" . "\u0967") ; numeral 1
+ ("3" . "\u0969") ; numeral 3
("N" . "\u200C") ; ZWNJ
("J" . "\u200D") ; ZWJ
("X" . "[\u0900-\u097F]")))) ; all coverage
@@ -158,6 +256,8 @@ South Indian language Malayalam is supported in this language environment."))
"Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
;; special consonant form, or
"JHR\\|"
+ ;; vedic accents with numerals, or
+ "1ss?\\|3ss\\|s3ss\\|"
;; any other singleton characters
"X")
table))
@@ -168,14 +268,15 @@ South Indian language Malayalam is supported in this language environment."))
'(("a" . "\u0981") ; SIGN CANDRABINDU
("A" . "[\u0982\u0983]") ; SIGN ANUSVARA .. VISARGA
("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel
- ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
+ ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F0\u09F1]") ; consonant
("B" . "[\u09AC\u09AF\u09B0\u09F0]") ; BA, YA, RA
("R" . "[\u09B0\u09F0]") ; RA
("n" . "\u09BC") ; NUKTA
("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign
("H" . "\u09CD") ; HALANT
("T" . "\u09CE") ; KHANDA TA
- ("N" . "\u200C") ; ZWNJ
+ ("S" . "\u09FE") ; SANDHI MARK
+ ("N" . "\u200C") ; ZWNJ
("J" . "\u200D") ; ZWJ
("X" . "[\u0980-\u09FF]")))) ; all coverage
(indian-compose-regexp
@@ -183,7 +284,7 @@ South Indian language Malayalam is supported in this language environment."))
;; syllables with an independent vowel, or
"\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
;; consonant-based syllables, or
- "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
+ "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?S?\\)\\|"
;; another syllables with an independent vowel, or
"\\(?:RH\\)?T\\|"
;; special consonant form, or
@@ -250,7 +351,7 @@ South Indian language Malayalam is supported in this language environment."))
'(("a" . "\u0B01") ; SIGN CANDRABINDU
("A" . "[\u0B02\u0B03]") ; SIGN ANUSVARA .. VISARGA
("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel
- ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]") ; consonant
+ ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B5F\u0B71]") ; consonant
("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form
("R" . "\u0B30") ; RA
("n" . "\u0B3C") ; NUKTA
@@ -384,6 +485,190 @@ South Indian language Malayalam is supported in this language environment."))
(list (vector (cdr slot) 0 #'font-shape-gstring))))))
char-script-table))
-(provide 'indian)
+;; Brahmi composition rules
+(let ((consonant "[\U00011013-\U00011034]")
+ (non-consonant "[^\U00011013-\U00011034\U00011046\U0001107F]")
+ (vowel "[\U00011038-\U00011045]")
+ (numeral "[\U00011052-\U00011065]")
+ (multiplier "[\U00011064\U00011065]")
+ (virama "\U00011046")
+ (number-joiner "\U0001107F"))
+ (set-char-table-range composition-function-table
+ '(#x11046 . #x11046)
+ (list (vector
+ ;; Consonant conjuncts
+ (concat consonant "\\(?:" virama consonant "\\)+"
+ vowel "?")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowelless consonants
+ (concat consonant virama non-consonant)
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x1107F . #x1107F)
+ (list (vector
+ ;; Additive-multiplicative numerals
+ (concat multiplier number-joiner numeral)
+ 1 'font-shape-gstring))))
+
+;; Kaithi composition rules
+(let ((consonant "[\x1108D-\x110AF]")
+ (nukta "\x110BA")
+ (independent-vowel "[\x11083-\x1108C]")
+ (vowel "[\x1108D-\x110C2]")
+ (nasal "[\x11080\x11081]")
+ (virama "\x110B9")
+ (number-sign "\x110BD")
+ (number-sign-above "\x110CD")
+ (numerals "[\x966-\x96F]+")
+ (zwj "\x200D"))
+ (set-char-table-range composition-function-table
+ '(#x110B0 . #x110BA)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?\\(?:" virama zwj "?" consonant
+ nukta "?\\)*\\(?:" virama zwj "?\\|" vowel "*" nukta
+ "?" nasal "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowel based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x110BD . #x110BD)
+ (list (vector
+ ;; Number sign
+ (concat number-sign numerals)
+ 0 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#x110CD . #x110CD)
+ (list (vector
+ ;; Number sign above
+ (concat number-sign-above numerals)
+ 0 'font-shape-gstring))))
+
+;; Tirhuta composition rules
+(let ((consonant "[\x1148F-\x114AF]")
+ (nukta "\x114C3")
+ (independent-vowel "[\x11481-\x1148E]")
+ (vowel "[\x114B0-\x114BE]")
+ (nasal "[\x114BF\x114C0]")
+ (virama "\x114C2"))
+ (set-char-table-range composition-function-table
+ '(#x114B0 . #x114C3)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?\\(?:" virama consonant nukta
+ "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?"
+ nasal "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowel based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?" nasal "?")
+ 1 'font-shape-gstring))))
+
+;; Sharada composition rules
+(let ((consonant "[\x11191-\x111B2]")
+ (nukta "\x111CA")
+ (independent-vowel "[\x11183-\x11190]")
+ (vowel "[\x111B3-\x111BF\x111CE]")
+ (vowel-modifier "\x111CB")
+ (extra-short-vowel-mark "\x111CC")
+ (nasal "[\x11181\x11180\x111CF]")
+ (virama "\x111C0")
+ (fricatives "[\x111C2\x111C3]")
+ (sandhi-mark "\x111C9")
+ (misc "[\x111C4-\x111C8\x111CD]"))
+ (set-char-table-range composition-function-table
+ '(#x111B3 . #x111CE)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?" vowel-modifier "?\\(?:" virama
+ consonant nukta "?" vowel-modifier "?\\)*\\(?:" virama
+ "\\|" vowel "*" nukta "?" nasal "?" extra-short-vowel-mark
+ "?" vowel-modifier "?" sandhi-mark "?+" misc "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowel based syllables
+ (concat independent-vowel nukta "?" vowel-modifier "?" virama "?"
+ vowel "?" extra-short-vowel-mark "?" sandhi-mark "?"
+ fricatives "?" misc "?")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Fricatives with Consonants
+ (concat fricatives "?" consonant vowel "?")
+ 0 'font-shape-gstring))))
+
+;; Siddham composition rules
+(let ((consonant "[\x1158E-\x115AE]")
+ (nukta "\x115C0")
+ (independent-vowel "[\x11580-\x1158D\x115D8-\x115DB]")
+ (vowel "[\x115AF-\x115BB\x115DC\x115DD]")
+ (nasal "[\x115BC\x115BD]")
+ (visarga "\x115BE")
+ (virama "\x115BF"))
+ (set-char-table-range composition-function-table
+ '(#x115AF . #x115C0)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant nukta "?" "\\(?:" virama consonant nukta
+ "?\\)*\\(?:" virama "\\|" vowel "*" nukta "?" nasal
+ "?" visarga "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel nukta "?" virama "?" vowel "?"
+ nasal "?" visarga "?")
+ 1 'font-shape-gstring))))
+
+;; Syloti Nagri composition rules
+(let ((consonant "[\xA807-\xA80A\xA80C-\xA822]")
+ (vowel "[\xA802\xA823-\xA827]")
+ (nasal "[\xA80B]")
+ (virama "\xA806")
+ (alternate-virama "\xA82C"))
+ (set-char-table-range composition-function-table
+ '(#xA806 . #xA806)
+ (list (vector
+ ;; Consonant conjunct based syllables
+ (concat consonant "\\(?:" virama consonant "\\)+"
+ vowel "?" nasal "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#xA823 . #xA827)
+ (list (vector
+ ;; Non Consonant conjunct based syllables
+ (concat consonant vowel nasal "?")
+ 1 'font-shape-gstring)))
+ (set-char-table-range composition-function-table
+ '(#xA82C . #xA82C)
+ (list (vector
+ ;; Consonant with the alternate virama
+ (concat consonant "\\(?:" alternate-virama consonant "\\)+"
+ vowel "?" nasal "?")
+ 1 'font-shape-gstring))))
+
+;; Modi composition rules
+(let ((consonant "[\x1160E-\x1162F]")
+ (independent-vowel "[\x11600-\x1160D]")
+ (vowel "[\x11630-\x1163C]")
+ (nasal "\x1163D")
+ (visarga "\x1163E")
+ (virama "\x1163F")
+ (ardhacandra "\x11640"))
+ (set-char-table-range composition-function-table
+ '(#x11630 . #x11640)
+ (list (vector
+ ;; Consonant based syllables
+ (concat consonant "\\(?:" virama consonant "\\)*\\(?:"
+ virama "\\|" vowel "*" ardhacandra "?" nasal
+ "?" visarga "?\\)")
+ 1 'font-shape-gstring)
+ (vector
+ ;; Vowels based syllables
+ (concat independent-vowel virama "?" vowel "?" ardhacandra
+ nasal "?" visarga "?")
+ 1 'font-shape-gstring))))
+(provide 'indian)
;;; indian.el ends here