summaryrefslogtreecommitdiff
path: root/lisp/language/ind-util.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/language/ind-util.el')
-rw-r--r--lisp/language/ind-util.el121
1 files changed, 90 insertions, 31 deletions
diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index fc8f4c9d983..27facaa858f 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -1,8 +1,7 @@
-;;; ind-util.el --- Transliteration and Misc. Tools for Indian Languages -*- coding: utf-8-emacs; -*-
+;;; ind-util.el --- Transliteration and Misc. Tools for Indian Languages -*- coding: utf-8-emacs; lexical-binding: t; -*-
-;; Copyright (C) 2001-2017 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2022 Free Software Foundation, Inc.
-;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
;; Keywords: multilingual, Indian, Devanagari
;; This file is part of GNU Emacs.
@@ -41,7 +40,7 @@
(defun indian-regexp-of-hashtbl-keys (hashtbl)
"Return the regular expression of hash table keys."
(let (keys)
- (maphash (lambda (key val) (push key keys)) hashtbl)
+ (maphash (lambda (key _val) (push key keys)) hashtbl)
(regexp-opt keys)))
(defvar indian-dev-base-table
@@ -233,8 +232,8 @@
'(
(;; VOWELS
(?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ)
- (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
- nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil)
+ (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
+ nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ))
(;; CONSONANTS
?ക ?ഖ ?ഗ ?ഘ ?ങ ;; GUTTRULS
?ച ?ഛ ?ജ ?ഝ ?ഞ ;; PALATALS
@@ -244,13 +243,16 @@
?യ ?ര ?റ ?ല ?ള ?ഴ ?വ ;; SEMIVOWELS
?ശ ?ഷ ?സ ?ഹ ;; SIBILANTS
nil nil nil nil nil nil nil nil ;; NUKTAS
- "ജ്ഞ" "ക്ഷ")
+ "ജ്ഞ" "ക്ഷ"
+ "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന"
+ "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക"
+ "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ")
(;; Misc Symbols
nil ?ം ?ഃ nil ?് nil nil)
(;; Digits
?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)
- (;; Inscript-extra (4) (#, $, ^, *, ])
- "്ര" "ര്" "ത്ര" "ശ്ര" nil)))
+ (;; Chillus
+ "ണ്" ?ൺ "ന്" ?ൻ "ര്" ?ർ "ല്" ?ൽ "ള്" ?ൾ)))
(defvar indian-tml-base-table
'(
@@ -265,11 +267,34 @@
?த nil nil nil ?ந ?ன ;; DENTALS
?ப nil nil nil ?ம ;; LABIALS
?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS
- nil ?ஷ ?ஸ ?ஹ ;; SIBILANTS
+ ?ஶ ?ஷ ?ஸ ?ஹ ;; SIBILANTS
+ nil nil nil nil nil nil nil nil ;; NUKTAS
+ "ஜ்ஞ" "க்ஷ" "க்‌ஷ")
+ (;; Misc Symbols
+ nil ?ஂ ?ஃ nil ?் ?ௐ nil)
+ (;; Digits
+ nil nil nil nil nil nil nil nil nil nil)
+ (;; Inscript-extra (4) (#, $, ^, *, ])
+ "்ர" "ர்" "த்ர" nil nil)))
+
+(defvar indian-tml-base-digits-table
+ '(
+ (;; VOWELS
+ (?அ nil) (?ஆ ?ா) (?இ ?ி) (?ஈ ?ீ) (?உ ?ு) (?ஊ ?ூ)
+ nil nil nil (?ஏ ?ே) (?எ ?ெ) (?ஐ ?ை)
+ nil (?ஓ ?ோ) (?ஒ ?ொ) (?ஔ ?ௌ) nil nil)
+ (;; CONSONANTS
+ ?க nil nil nil ?ங ;; GUTTRULS
+ ?ச nil ?ஜ nil ?ஞ ;; PALATALS
+ ?ட nil nil nil ?ண ;; CEREBRALS
+ ?த nil nil nil ?ந ?ன ;; DENTALS
+ ?ப nil nil nil ?ம ;; LABIALS
+ ?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS
+ ?ஶ ?ஷ ?ஸ ?ஹ ;; SIBILANTS
nil nil nil nil nil nil nil nil ;; NUKTAS
- "ஜ்ஞ" "க்ஷ")
+ "ஜ்ஞ" "க்ஷ" "க்‌ஷ")
(;; Misc Symbols
- nil ?ஂ ?ஃ nil ?் nil nil)
+ nil ?ஂ ?ஃ nil ?் ?ௐ nil)
(;; Digits
?௦ ?௧ ?௨ ?௩ ?௪ ?௫ ?௬ ?௭ ?௮ ?௯)
(;; Inscript-extra (4) (#, $, ^, *, ])
@@ -290,8 +315,8 @@
'(;; for encode/decode
(;; vowels -- 18
"a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
- ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai"
- "o.c" "O" "o" "au" ("RRI" "R^I") ("LLI" "L^I"))
+ ("RRi" "R^i" "RRu" "R^u") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai"
+ "o.c" "O" "o" "au" ("RRI" "R^I" "RRU" "R^U") ("LLI" "L^I"))
(;; consonants -- 40
"k" "kh" "g" "gh" ("~N" "N^")
"ch" ("Ch" "chh") "j" "jh" ("~n" "JN")
@@ -324,6 +349,29 @@
(;; misc -- 7
".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))
+(defvar indian-mlm-mozhi-table
+ '(;; for encode/decode
+ (;; vowels -- 18
+ "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
+ "R" "Ll" "Lll" ("E" "ae") "e" "ai"
+ nil "o" "O" "au" "~" "RR")
+ (;; consonants -- 40
+ ("k" "c") "kh" "g" "gh" "ng"
+ "ch" ("Ch" "chh") "j" "jh" "nj"
+ "T" "Th" "D" "Dh" "N"
+ "th" "thh" "d" "dh" "n" nil
+ "p" ("ph" "f") "b" "bh" "m"
+ "y" "r" "rr" "l" "L" "zh" ("v" "w")
+ ("S" "z") "sh" "s" "h"
+ nil nil nil nil nil nil nil nil
+ nil "X"
+ ;; some of these are extra to Mozhi
+ ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn"
+ "nch" "nc" "nk" "cch" "cchh" "cc"
+ "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z")
+ (;; misc -- 7
+ nil nil "H")))
+
(defvar indian-kyoto-harvard-table
'(;; for encode/decode
(;; vowel
@@ -462,7 +510,7 @@
c trans-c))
(defun indian-make-hash (table trans-table)
- "Indian Transliteration Hash for decode/encode"
+ "Indian Transliteration Hash for decode/encode."
(let* ((encode-hash (make-hash-table :test 'equal))
(decode-hash (make-hash-table :test 'equal))
(hashtbls (cons encode-hash decode-hash))
@@ -525,9 +573,17 @@
(indian-make-hash indian-mlm-base-table
indian-itrans-v5-table))
+(defvar indian-mlm-mozhi-hash
+ (indian-make-hash indian-mlm-base-table
+ indian-mlm-mozhi-table))
+
(defvar indian-tml-itrans-v5-hash
(indian-make-hash indian-tml-base-table
indian-itrans-v5-table-for-tamil))
+
+(defvar indian-tml-itrans-digits-v5-hash
+ (indian-make-hash indian-tml-base-digits-table
+ indian-itrans-v5-table-for-tamil))
)
(defmacro indian-translate-region (from to hashtable encode-p)
@@ -536,7 +592,7 @@
(let ((regexp ,(indian-regexp-of-hashtbl-keys
(if encode-p (car (eval hashtable))
(cdr (eval hashtable))))))
- (narrow-to-region from to)
+ (narrow-to-region ,from ,to)
(goto-char (point-min))
(while (re-search-forward regexp nil t)
(let ((matchstr (gethash (match-string 0)
@@ -584,7 +640,7 @@
;; The followings provide conversion between IS 13194 (ISCII) and UCS.
-(let
+(dlet
;;Unicode vs IS13194 ;; only Devanagari is supported now.
((ucs-devanagari-to-is13194-alist
'((?\x0900 . "[U+0900]")
@@ -776,31 +832,31 @@
(defvar is13194-to-ucs-kannada-hashtbl nil)
(defvar is13194-to-ucs-kannada-regexp nil)
-(defvar ucs-to-is13194-regexp
+(defvar indian-ucs-to-is13194-regexp
;; only Devanagari is supported now.
(concat "[" (char-to-string #x0900)
"-" (char-to-string #x097f) "]")
- "Regexp that matches to conversion")
+ "Regexp that matches to conversion.")
-(defun ucs-to-iscii-region (from to)
- "Converts the indian UCS characters in the region to ISCII.
-Returns new end position."
+(defun indian-ucs-to-iscii-region (from to)
+ "Convert the indian UCS characters in the region to ISCII.
+Return new end position."
(interactive "r")
;; only Devanagari is supported now.
(save-excursion
(save-restriction
(narrow-to-region from to)
(goto-char (point-min))
- (let* ((current-repertory is13194-default-repertory))
- (while (re-search-forward ucs-to-is13194-regexp nil t)
+ ;; (let* ((current-repertory is13194-default-repertory))
+ (while (re-search-forward indian-ucs-to-is13194-regexp nil t)
(replace-match
(get-char-code-property (string-to-char (match-string 0))
- 'iscii))))
+ 'iscii)));; )
(point-max))))
-(defun iscii-to-ucs-region (from to)
- "Converts the ISCII characters in the region to UCS.
-Returns new end position."
+(defun indian-iscii-to-ucs-region (from to)
+ "Convert the ISCII characters in the region to UCS.
+Return new end position."
(interactive "r")
;; only Devanagari is supported now.
(save-excursion
@@ -829,6 +885,9 @@ Returns new end position."
(let ((pos from) newpos func (max to))
(narrow-to-region from to)
(while (< pos max)
+ ;; FIXME: The below seems to assume
+ ;; composition-function-table holds functions? That is no
+ ;; longer true, since long ago.
(setq func (aref composition-function-table (char-after pos)))
(if (fboundp func)
(setq newpos (funcall func pos nil)
@@ -846,7 +905,7 @@ Returns new end position."
;;;###autoload
(defun in-is13194-post-read-conversion (len)
(let ((pos (point)) endpos)
- (setq endpos (iscii-to-ucs-region pos (+ pos len)))
+ (setq endpos (indian-iscii-to-ucs-region pos (+ pos len)))
(- endpos pos)))
;;;###autoload
@@ -856,7 +915,7 @@ Returns new end position."
(if (stringp from)
(insert from)
(insert-buffer-substring buf from to))
- (ucs-to-iscii-region (point-min) (point-max))
+ (indian-ucs-to-iscii-region (point-min) (point-max))
nil))
@@ -1214,7 +1273,7 @@ Returns new end position."
(interactive "r")
(save-excursion
(save-restriction
- (let ((pos from)
+ (let (;; (pos from)
(alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0)))
(narrow-to-region from to)
(decompose-region from to)