diff options
Diffstat (limited to 'lisp/language/ind-util.el')
-rw-r--r-- | lisp/language/ind-util.el | 121 |
1 files changed, 90 insertions, 31 deletions
diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el index fc8f4c9d983..27facaa858f 100644 --- a/lisp/language/ind-util.el +++ b/lisp/language/ind-util.el @@ -1,8 +1,7 @@ -;;; ind-util.el --- Transliteration and Misc. Tools for Indian Languages -*- coding: utf-8-emacs; -*- +;;; ind-util.el --- Transliteration and Misc. Tools for Indian Languages -*- coding: utf-8-emacs; lexical-binding: t; -*- -;; Copyright (C) 2001-2017 Free Software Foundation, Inc. +;; Copyright (C) 2001-2022 Free Software Foundation, Inc. -;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> ;; Keywords: multilingual, Indian, Devanagari ;; This file is part of GNU Emacs. @@ -41,7 +40,7 @@ (defun indian-regexp-of-hashtbl-keys (hashtbl) "Return the regular expression of hash table keys." (let (keys) - (maphash (lambda (key val) (push key keys)) hashtbl) + (maphash (lambda (key _val) (push key keys)) hashtbl) (regexp-opt keys))) (defvar indian-dev-base-table @@ -233,8 +232,8 @@ '( (;; VOWELS (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ) - (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ) - nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil) + (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ) + nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ)) (;; CONSONANTS ?ക ?ഖ ?ഗ ?ഘ ?ങ ;; GUTTRULS ?ച ?ഛ ?ജ ?ഝ ?ഞ ;; PALATALS @@ -244,13 +243,16 @@ ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ ;; SEMIVOWELS ?ശ ?ഷ ?സ ?ഹ ;; SIBILANTS nil nil nil nil nil nil nil nil ;; NUKTAS - "ജ്ഞ" "ക്ഷ") + "ജ്ഞ" "ക്ഷ" + "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന" + "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക" + "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ") (;; Misc Symbols nil ?ം ?ഃ nil ?് nil nil) (;; Digits ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯) - (;; Inscript-extra (4) (#, $, ^, *, ]) - "്ര" "ര്" "ത്ര" "ശ്ര" nil))) + (;; Chillus + "ണ്" ?ൺ "ന്" ?ൻ "ര്" ?ർ "ല്" ?ൽ "ള്" ?ൾ))) (defvar indian-tml-base-table '( @@ -265,11 +267,34 @@ ?த nil nil nil ?ந ?ன ;; DENTALS ?ப nil nil nil ?ம ;; LABIALS ?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS - nil ?ஷ ?ஸ ?ஹ ;; SIBILANTS + ?ஶ ?ஷ ?ஸ ?ஹ ;; SIBILANTS + nil nil nil nil nil nil nil nil ;; NUKTAS + "ஜ்ஞ" "க்ஷ" "க்ஷ") + (;; Misc Symbols + nil ?ஂ ?ஃ nil ?் ?ௐ nil) + (;; Digits + nil nil nil nil nil nil nil nil nil nil) + (;; Inscript-extra (4) (#, $, ^, *, ]) + "்ர" "ர்" "த்ர" nil nil))) + +(defvar indian-tml-base-digits-table + '( + (;; VOWELS + (?அ nil) (?ஆ ?ா) (?இ ?ி) (?ஈ ?ீ) (?உ ?ு) (?ஊ ?ூ) + nil nil nil (?ஏ ?ே) (?எ ?ெ) (?ஐ ?ை) + nil (?ஓ ?ோ) (?ஒ ?ொ) (?ஔ ?ௌ) nil nil) + (;; CONSONANTS + ?க nil nil nil ?ங ;; GUTTRULS + ?ச nil ?ஜ nil ?ஞ ;; PALATALS + ?ட nil nil nil ?ண ;; CEREBRALS + ?த nil nil nil ?ந ?ன ;; DENTALS + ?ப nil nil nil ?ம ;; LABIALS + ?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS + ?ஶ ?ஷ ?ஸ ?ஹ ;; SIBILANTS nil nil nil nil nil nil nil nil ;; NUKTAS - "ஜ்ஞ" "க்ஷ") + "ஜ்ஞ" "க்ஷ" "க்ஷ") (;; Misc Symbols - nil ?ஂ ?ஃ nil ?் nil nil) + nil ?ஂ ?ஃ nil ?் ?ௐ nil) (;; Digits ?௦ ?௧ ?௨ ?௩ ?௪ ?௫ ?௬ ?௭ ?௮ ?௯) (;; Inscript-extra (4) (#, $, ^, *, ]) @@ -290,8 +315,8 @@ '(;; for encode/decode (;; vowels -- 18 "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U") - ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai" - "o.c" "O" "o" "au" ("RRI" "R^I") ("LLI" "L^I")) + ("RRi" "R^i" "RRu" "R^u") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai" + "o.c" "O" "o" "au" ("RRI" "R^I" "RRU" "R^U") ("LLI" "L^I")) (;; consonants -- 40 "k" "kh" "g" "gh" ("~N" "N^") "ch" ("Ch" "chh") "j" "jh" ("~n" "JN") @@ -324,6 +349,29 @@ (;; misc -- 7 ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") ".."))) +(defvar indian-mlm-mozhi-table + '(;; for encode/decode + (;; vowels -- 18 + "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U") + "R" "Ll" "Lll" ("E" "ae") "e" "ai" + nil "o" "O" "au" "~" "RR") + (;; consonants -- 40 + ("k" "c") "kh" "g" "gh" "ng" + "ch" ("Ch" "chh") "j" "jh" "nj" + "T" "Th" "D" "Dh" "N" + "th" "thh" "d" "dh" "n" nil + "p" ("ph" "f") "b" "bh" "m" + "y" "r" "rr" "l" "L" "zh" ("v" "w") + ("S" "z") "sh" "s" "h" + nil nil nil nil nil nil nil nil + nil "X" + ;; some of these are extra to Mozhi + ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn" + "nch" "nc" "nk" "cch" "cchh" "cc" + "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z") + (;; misc -- 7 + nil nil "H"))) + (defvar indian-kyoto-harvard-table '(;; for encode/decode (;; vowel @@ -462,7 +510,7 @@ c trans-c)) (defun indian-make-hash (table trans-table) - "Indian Transliteration Hash for decode/encode" + "Indian Transliteration Hash for decode/encode." (let* ((encode-hash (make-hash-table :test 'equal)) (decode-hash (make-hash-table :test 'equal)) (hashtbls (cons encode-hash decode-hash)) @@ -525,9 +573,17 @@ (indian-make-hash indian-mlm-base-table indian-itrans-v5-table)) +(defvar indian-mlm-mozhi-hash + (indian-make-hash indian-mlm-base-table + indian-mlm-mozhi-table)) + (defvar indian-tml-itrans-v5-hash (indian-make-hash indian-tml-base-table indian-itrans-v5-table-for-tamil)) + +(defvar indian-tml-itrans-digits-v5-hash + (indian-make-hash indian-tml-base-digits-table + indian-itrans-v5-table-for-tamil)) ) (defmacro indian-translate-region (from to hashtable encode-p) @@ -536,7 +592,7 @@ (let ((regexp ,(indian-regexp-of-hashtbl-keys (if encode-p (car (eval hashtable)) (cdr (eval hashtable)))))) - (narrow-to-region from to) + (narrow-to-region ,from ,to) (goto-char (point-min)) (while (re-search-forward regexp nil t) (let ((matchstr (gethash (match-string 0) @@ -584,7 +640,7 @@ ;; The followings provide conversion between IS 13194 (ISCII) and UCS. -(let +(dlet ;;Unicode vs IS13194 ;; only Devanagari is supported now. ((ucs-devanagari-to-is13194-alist '((?\x0900 . "[U+0900]") @@ -776,31 +832,31 @@ (defvar is13194-to-ucs-kannada-hashtbl nil) (defvar is13194-to-ucs-kannada-regexp nil) -(defvar ucs-to-is13194-regexp +(defvar indian-ucs-to-is13194-regexp ;; only Devanagari is supported now. (concat "[" (char-to-string #x0900) "-" (char-to-string #x097f) "]") - "Regexp that matches to conversion") + "Regexp that matches to conversion.") -(defun ucs-to-iscii-region (from to) - "Converts the indian UCS characters in the region to ISCII. -Returns new end position." +(defun indian-ucs-to-iscii-region (from to) + "Convert the indian UCS characters in the region to ISCII. +Return new end position." (interactive "r") ;; only Devanagari is supported now. (save-excursion (save-restriction (narrow-to-region from to) (goto-char (point-min)) - (let* ((current-repertory is13194-default-repertory)) - (while (re-search-forward ucs-to-is13194-regexp nil t) + ;; (let* ((current-repertory is13194-default-repertory)) + (while (re-search-forward indian-ucs-to-is13194-regexp nil t) (replace-match (get-char-code-property (string-to-char (match-string 0)) - 'iscii)))) + 'iscii)));; ) (point-max)))) -(defun iscii-to-ucs-region (from to) - "Converts the ISCII characters in the region to UCS. -Returns new end position." +(defun indian-iscii-to-ucs-region (from to) + "Convert the ISCII characters in the region to UCS. +Return new end position." (interactive "r") ;; only Devanagari is supported now. (save-excursion @@ -829,6 +885,9 @@ Returns new end position." (let ((pos from) newpos func (max to)) (narrow-to-region from to) (while (< pos max) + ;; FIXME: The below seems to assume + ;; composition-function-table holds functions? That is no + ;; longer true, since long ago. (setq func (aref composition-function-table (char-after pos))) (if (fboundp func) (setq newpos (funcall func pos nil) @@ -846,7 +905,7 @@ Returns new end position." ;;;###autoload (defun in-is13194-post-read-conversion (len) (let ((pos (point)) endpos) - (setq endpos (iscii-to-ucs-region pos (+ pos len))) + (setq endpos (indian-iscii-to-ucs-region pos (+ pos len))) (- endpos pos))) ;;;###autoload @@ -856,7 +915,7 @@ Returns new end position." (if (stringp from) (insert from) (insert-buffer-substring buf from to)) - (ucs-to-iscii-region (point-min) (point-max)) + (indian-ucs-to-iscii-region (point-min) (point-max)) nil)) @@ -1214,7 +1273,7 @@ Returns new end position." (interactive "r") (save-excursion (save-restriction - (let ((pos from) + (let (;; (pos from) (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) (narrow-to-region from to) (decompose-region from to) |