summaryrefslogtreecommitdiff
path: root/lisp/language
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>2004-04-16 12:51:06 +0000
committerKenichi Handa <handa@m17n.org>2004-04-16 12:51:06 +0000
commit6b61353c0a0320ee15bb6488149735381fed62ec (patch)
treee69adba60e504a5a37beb556ad70084de88a7aab /lisp/language
parentdc6a28319312fe81f7a1015e363174022313f0bd (diff)
downloademacs-6b61353c0a0320ee15bb6488149735381fed62ec.tar.gz
emacs-6b61353c0a0320ee15bb6488149735381fed62ec.tar.bz2
emacs-6b61353c0a0320ee15bb6488149735381fed62ec.zip
Sync to HEAD
Diffstat (limited to 'lisp/language')
-rw-r--r--lisp/language/china-util.el1
-rw-r--r--lisp/language/chinese.el7
-rw-r--r--lisp/language/cyril-util.el1
-rw-r--r--lisp/language/cyrillic.el29
-rw-r--r--lisp/language/czech.el1
-rw-r--r--lisp/language/devan-util.el23
-rw-r--r--lisp/language/devanagari.el1
-rw-r--r--lisp/language/english.el1
-rw-r--r--lisp/language/ethio-util.el18
-rw-r--r--lisp/language/ethiopic.el1
-rw-r--r--lisp/language/european.el12
-rw-r--r--lisp/language/georgian.el1
-rw-r--r--lisp/language/greek.el1
-rw-r--r--lisp/language/hebrew.el1
-rw-r--r--lisp/language/ind-util.el8
-rw-r--r--lisp/language/indian.el1
-rw-r--r--lisp/language/japan-util.el1
-rw-r--r--lisp/language/japanese.el1
-rw-r--r--lisp/language/kannada.el54
-rw-r--r--lisp/language/knd-util.el541
-rw-r--r--lisp/language/korea-util.el1
-rw-r--r--lisp/language/korean.el1
-rw-r--r--lisp/language/lao-util.el1
-rw-r--r--lisp/language/lao.el1
-rw-r--r--lisp/language/malayalam.el1
-rw-r--r--lisp/language/misc-lang.el1
-rw-r--r--lisp/language/mlm-util.el2
-rw-r--r--lisp/language/romanian.el1
-rw-r--r--lisp/language/slovak.el1
-rw-r--r--lisp/language/tamil.el1
-rw-r--r--lisp/language/thai-util.el1
-rw-r--r--lisp/language/thai.el1
-rw-r--r--lisp/language/tibet-util.el1
-rw-r--r--lisp/language/tibetan.el1
-rw-r--r--lisp/language/tml-util.el2
-rw-r--r--lisp/language/utf-8-lang.el2
-rw-r--r--lisp/language/viet-util.el1
-rw-r--r--lisp/language/vietnamese.el1
38 files changed, 693 insertions, 32 deletions
diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el
index 3f6dee9488f..03b41af7032 100644
--- a/lisp/language/china-util.el
+++ b/lisp/language/china-util.el
@@ -190,4 +190,5 @@ Return the length of resulting text."
;;
(provide 'china-util)
+;;; arch-tag: 5a47b084-b9ac-420e-8191-70c5b3a14836
;;; china-util.el ends here
diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el
index 2b96a9f8f40..7e03ed40dda 100644
--- a/lisp/language/chinese.el
+++ b/lisp/language/chinese.el
@@ -107,7 +107,8 @@
(input-method . "chinese-py-punct")
(features china-util)
(sample-text . "Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B")
- (documentation . "Support for Chinese GB2312 character set."))
+ (documentation . "Support for Chinese GB2312 character set.")
+ (tutorial . "TUTORIAL.cn"))
'("Chinese"))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -131,7 +132,8 @@
(input-method . "chinese-py-punct-b5")
(features china-util)
(sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B")
- (documentation . "Support for Chinese Big5 character set."))
+ (documentation . "Support for Chinese Big5 character set.")
+ (tutorial . "TUTORIAL.zh"))
'("Chinese"))
(define-coding-system 'chinese-big5-hkscs
@@ -253,4 +255,5 @@ converted to CNS)."))
(provide 'chinese)
+;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f
;;; chinese.el ends here
diff --git a/lisp/language/cyril-util.el b/lisp/language/cyril-util.el
index 5d53f224a0a..1a5c435328d 100644
--- a/lisp/language/cyril-util.el
+++ b/lisp/language/cyril-util.el
@@ -193,4 +193,5 @@ If the argument is nil, we return the display table to its standard state."
;; coding: iso-2022-7bit
;; End:
+;;; arch-tag: f6d9dd5d-685c-45d6-a5d8-1e2178228b7e
;;; cyril-util.el ends here
diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el
index f95a5427a12..e879b4d0b1a 100644
--- a/lisp/language/cyrillic.el
+++ b/lisp/language/cyrillic.el
@@ -122,6 +122,23 @@ Support for Russian using koi8-r and the russian-computer input method.")
(tutorial . "TUTORIAL.ru"))
'("Cyrillic"))
+(define-coding-system 'koi8-u
+ "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
+ :coding-type 'charset
+ :mnemonic ?U
+ :charset-list '(koi8-u)
+ :mime-charset 'koi8-u)
+
+(set-language-info-alist
+ "Ukrainian" `((charset koi8-u)
+ (coding-system koi8-u)
+ (coding-priority koi8-u)
+ (nonascii-translation . koi8-u)
+ (input-method . "ukrainian-computer")
+ (documentation
+ . "Support for Ukrainian with KOI8-U character set."))
+ '("Cyrillic"))
+
;;; ALTERNATIVNYJ stuff
(define-coding-system 'cyrillic-alternativnyj
@@ -220,13 +237,20 @@ Support for Russian using koi8-r and the russian-computer input method.")
(documentation . "Support for Tajik using KOI8-T."))
'("Cyrillic"))
+(let ((elt `("microsoft-cp1251" windows-1251 1
+ ,(get 'encode-windows-1251 'translation-table)))
+ (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
+ (if slot
+ (setcdr slot (cdr elt))
+ (push elt ctext-non-standard-encodings-alist)))
+
(set-language-info-alist
"Bulgarian" `((coding-system windows-1251)
(coding-priority windows-1251)
(nonascii-translation . windows-1251)
(charset windows-1251)
+ (ctext-non-standard-encodings "microsoft-cp1251")
(input-method . "bulgarian-bds")
- (features code-pages)
(documentation
. "Support for Bulgrian with windows-1251 character set."))
'("Cyrillic"))
@@ -236,8 +260,8 @@ Support for Russian using koi8-r and the russian-computer input method.")
(coding-priority windows-1251)
(nonascii-translation . windows-1251)
(charset windows-1251)
+ (ctext-non-standard-encodings "microsoft-cp1251")
(input-method . "belarusian")
- (features code-pages)
(documentation
. "Support for Belarusian with windows-1251 character set.
\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
@@ -253,4 +277,5 @@ Support for Russian using koi8-r and the russian-computer input method.")
(provide 'cyrillic)
+;;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
;;; cyrillic.el ends here
diff --git a/lisp/language/czech.el b/lisp/language/czech.el
index 0d3cb342d45..9aca0d16d49 100644
--- a/lisp/language/czech.el
+++ b/lisp/language/czech.el
@@ -46,4 +46,5 @@ and selects the Czech tutorial."))
(provide 'czech)
+;;; arch-tag: 45ac0d83-ca13-4b5e-9e82-821e44080c24
;;; czech.el ends here
diff --git a/lisp/language/devan-util.el b/lisp/language/devan-util.el
index 4e3fbc9a257..64dbf576af3 100644
--- a/lisp/language/devan-util.el
+++ b/lisp/language/devan-util.el
@@ -85,6 +85,7 @@
(devanagari-compose-region (point-min) (point-max))
(buffer-string)))
+;;;###autoload
(defun devanagari-post-read-conversion (len)
(save-excursion
(save-restriction
@@ -586,19 +587,14 @@ preferred rule from the sanskrit fonts." )
(setq preceding-r t)
(goto-char (+ 2 (point))))
;; translate the rest characters into glyphs
- (while (not (eobp))
- (if (looking-at dev-char-glyph-regexp)
- (let ((end (match-end 0)))
- (setq match-str (match-string 0)
- glyph-str
- (concat glyph-str
- (gethash match-str dev-char-glyph-hash)))
- ;; count the number of consonant-glyhs.
- (if (string-match devanagari-consonant match-str)
- (setq cons-num (1+ cons-num)))
- (goto-char end))
- (setq glyph-str (concat glyph-str (string (following-char))))
- (forward-char 1)))
+ (while (re-search-forward dev-char-glyph-regexp nil t)
+ (setq match-str (match-string 0))
+ (setq glyph-str
+ (concat glyph-str
+ (gethash match-str dev-char-glyph-hash)))
+ ;; count the number of consonant-glyhs.
+ (if (string-match devanagari-consonant match-str)
+ (setq cons-num (1+ cons-num))))
;; preceding-r must be attached before the anuswar if exists.
(if preceding-r
(if last-modifier
@@ -652,4 +648,5 @@ preferred rule from the sanskrit fonts." )
(provide 'devan-util)
+;;; arch-tag: 9bc4d6e3-f2b9-4110-886e-ff9b66b7eebc
;;; devan-util.el ends here
diff --git a/lisp/language/devanagari.el b/lisp/language/devanagari.el
index 94f11b403d4..6b1ff46a0af 100644
--- a/lisp/language/devanagari.el
+++ b/lisp/language/devanagari.el
@@ -51,4 +51,5 @@ are supported in this language environment."))
(provide 'devanagari)
+;;; arch-tag: fd13667d-868b-41e8-81ef-79dd28bbfed2
;;; devanagari.el ends here
diff --git a/lisp/language/english.el b/lisp/language/english.el
index 342dea6da98..6d135197460 100644
--- a/lisp/language/english.el
+++ b/lisp/language/english.el
@@ -65,4 +65,5 @@ Nothing special is needed to handle English.")
(set-language-info-alist
"ASCII" (cdr (assoc "English" language-info-alist)))
+;;; arch-tag: e440bdb0-91b0-4fb4-ae38-425780f8f745
;;; english.el ends here
diff --git a/lisp/language/ethio-util.el b/lisp/language/ethio-util.el
index 3b59ddca661..5a81f59f9dc 100644
--- a/lisp/language/ethio-util.el
+++ b/lisp/language/ethio-util.el
@@ -417,6 +417,9 @@ If nil, use uppercases.")
nil nil nil nil nil
])
+;; To avoid byte-compiler warnings. It should never be set globally.
+(defvar ethio-sera-being-called-by-w3)
+
;;;###autoload
(defun ethio-sera-to-fidel-region (beg end &optional secondary force)
"Convert the characters in region from SERA to FIDEL.
@@ -587,8 +590,8 @@ the conversion of \"a\"."
(cond
;; skip from "<" to ">" (or from "&" to ";") if in w3-mode
- ((and (boundp 'sera-being-called-by-w3)
- sera-being-called-by-w3
+ ((and (boundp 'ethio-sera-being-called-by-w3)
+ ethio-sera-being-called-by-w3
(or (= ch ?<) (= ch ?&)))
(search-forward (if (= ch ?<) ">" ";")
nil 0))
@@ -1174,8 +1177,8 @@ See also the descriptions of the variables
(goto-char (1+ (match-end 0)))) ; because we inserted one byte (\)
;; skip from "<" to ">" (or from "&" to ";") if called from w3
- ((and (boundp 'sera-being-called-by-w3)
- sera-being-called-by-w3
+ ((and (boundp 'ethio-sera-being-called-by-w3)
+ ethio-sera-being-called-by-w3
(or (= ch ?<) (= ch ?&)))
(search-forward (if (= ch ?<) ">" ";")
nil 0))
@@ -1826,7 +1829,7 @@ Otherwise, [0-9A-F]."
;;;###autoload
(defun ethio-find-file nil
- "Transcribe file content into Ethiopic dependig on filename suffix."
+ "Transcribe file content into Ethiopic depending on filename suffix."
(cond
((string-match "\\.sera$" (buffer-file-name))
@@ -1835,7 +1838,7 @@ Otherwise, [0-9A-F]."
(set-buffer-modified-p nil)))
((string-match "\\.html$" (buffer-file-name))
- (let ((sera-being-called-by-w3 t))
+ (let ((ethio-sera-being-called-by-w3 t))
(save-excursion
(ethio-sera-to-fidel-marker 'force)
(goto-char (point-min))
@@ -1872,7 +1875,7 @@ Otherwise, [0-9A-F]."
((string-match "\\.html$" (buffer-file-name))
(save-excursion
- (let ((sera-being-called-by-w3 t)
+ (let ((ethio-sera-being-called-by-w3 t)
(lq (aref ethio-fidel-to-sera-map 461))
(rq (aref ethio-fidel-to-sera-map 462)))
(aset ethio-fidel-to-sera-map 461 "&laquote;")
@@ -2005,4 +2008,5 @@ mark."
;;
(provide 'ethio-util)
+;;; arch-tag: c8feb3d6-39bf-4b0a-b6ef-26f03fbc8140
;;; ethio-util.el ends here
diff --git a/lisp/language/ethiopic.el b/lisp/language/ethiopic.el
index b198cf43084..a0140b585fb 100644
--- a/lisp/language/ethiopic.el
+++ b/lisp/language/ethiopic.el
@@ -80,4 +80,5 @@
(provide 'ethiopic)
+;;; arch-tag: e81329d9-1286-43ba-92fd-54ce5c7b213c
;;; ethiopic.el ends here
diff --git a/lisp/language/european.el b/lisp/language/european.el
index 2035d479487..3da1850a718 100644
--- a/lisp/language/european.el
+++ b/lisp/language/european.el
@@ -28,9 +28,8 @@
;;; Commentary:
-;; For European scripts, character sets ISO8859-1,2,3,4,9,10,13,14,15,
-;; windows-1250,2,4,7, mac-roman, adobe-standard-encoding, cp850 and
-;; next are supported.
+;; For European scripts, all the ISO Latin character sets are
+;; supported, along with various others.
;;; Code:
@@ -272,7 +271,7 @@ covered by other ISO-8859 character sets:
(unibyte-display . iso-latin-9)
(input-method . "latin-9-prefix")
(sample-text
- . "AVE. ,b&(48<=>(B ,b$(B")
+ . "AVE. ,B)9.>,b<=,_/(B ,b$(B")
(documentation . "\
This language environment is a generic one for the Latin-9 (ISO-8859-15)
character set which supports the same languages as Latin-1 with the
@@ -484,7 +483,7 @@ and it selects the Spanish tutorial."))
(nonascii-translation . iso-8859-9)
(unibyte-display . iso-latin-5)
(input-method . "turkish-postfix")
- (sample-text . "Turkish (T,M|(Brk,Mg(Be) Merhaba")
+ (sample-text . "Turkish (T,A|(Brk,Ag(Be) Merhaba")
(setup-function
. (lambda ()
(set-case-syntax-pair ?I ?,C9(B (standard-case-table))
@@ -510,7 +509,7 @@ method and applying Turkish case rules for the characters i, I, ,C9(B, ,C)(B
(nonascii-translation . iso-8859-2)
(unibyte-display . iso-8859-2)
(tutorial . "TUTORIAL.pl")
- (sample-text . "P,Bs(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy")
+ (sample-text . "P,As(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy")
(documentation . t))
'("European"))
@@ -724,4 +723,5 @@ or nil if no characters are composed."
(provide 'european)
+;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2
;;; european.el ends here
diff --git a/lisp/language/georgian.el b/lisp/language/georgian.el
index f38529d20aa..40f84c0dc68 100644
--- a/lisp/language/georgian.el
+++ b/lisp/language/georgian.el
@@ -49,4 +49,5 @@
(provide 'georgian)
+;;; arch-tag: 15499fbb-26d4-4a13-9d78-135eef7d32f5
;;; georgian.el ends here
diff --git a/lisp/language/greek.el b/lisp/language/greek.el
index 6061ed203c8..e3625b4c7c2 100644
--- a/lisp/language/greek.el
+++ b/lisp/language/greek.el
@@ -82,4 +82,5 @@
(provide 'greek)
+;;; arch-tag: 9ba48d79-84bc-45e1-9318-685dc3921410
;;; greek.el ends here
diff --git a/lisp/language/hebrew.el b/lisp/language/hebrew.el
index 871ec1b223e..bc59d23b3ee 100644
--- a/lisp/language/hebrew.el
+++ b/lisp/language/hebrew.el
@@ -85,4 +85,5 @@ Right-to-left writing is not yet supported.")))
(provide 'hebrew)
+;;; arch-tag: 3ca04f32-3f1e-498e-af46-8267498ba5d9
;;; hebrew.el ends here
diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 862ebf39e84..80be2a97e17 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -714,9 +714,10 @@ FUNCTION will be called 15 times."
(ucs-oriya-to-is13194-alist nil)
(ucs-tamil-to-is13194-alist nil)
(ucs-telugu-to-is13194-alist nil)
- (ucs-malayalam-to-is13194-alist nil))
+ (ucs-malayalam-to-is13194-alist nil)
+ (ucs-kannada-to-is13194-alist nil))
(dolist (script '(devanagari bengali assamese gurmukhi gujarati
- oriya tamil telugu malayalam))
+ oriya tamil telugu malayalam kannada))
(let ((hashtable (intern (concat "is13194-to-ucs-"
(symbol-name script) "-hashtbl" )))
(regexp (intern (concat "is13194-to-ucs-"
@@ -764,6 +765,8 @@ FUNCTION will be called 15 times."
(defvar is13194-to-ucs-telugu-regexp nil)
(defvar is13194-to-ucs-malayalam-hashtbl nil)
(defvar is13194-to-ucs-malayalam-regexp nil)
+(defvar is13194-to-ucs-kannada-hashtbl nil)
+(defvar is13194-to-ucs-kannada-regexp nil)
(defvar ucs-to-is13194-regexp
;; only Devanagari is supported now.
@@ -1221,4 +1224,5 @@ Returns new end position."
(provide 'ind-util)
+;;; arch-tag: 59aacd71-46c2-4cb3-bb26-e12bbad55545
;;; ind-util.el ends here
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
index d7b4c365bc8..a15df9c45aa 100644
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -157,4 +157,5 @@ The default value is `devanagari'.")
(provide 'indian)
+;;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f
;;; indian.el ends here
diff --git a/lisp/language/japan-util.el b/lisp/language/japan-util.el
index 7c2cc069241..e9a80fc99a7 100644
--- a/lisp/language/japan-util.el
+++ b/lisp/language/japan-util.el
@@ -320,4 +320,5 @@ If non-nil, second arg INITIAL-INPUT is a string to insert before reading."
;;
(provide 'japan-util)
+;;; arch-tag: b579595c-c9ad-4b57-9314-98cd8b214f89
;;; japan-util.el ends here
diff --git a/lisp/language/japanese.el b/lisp/language/japanese.el
index 961e724e968..b4ec979d425 100644
--- a/lisp/language/japanese.el
+++ b/lisp/language/japanese.el
@@ -244,4 +244,5 @@ eucJP-ms is defined in <http://www.opengroup.or.jp/jvc/cde/appendix.html>."
(provide 'japanese)
+;;; arch-tag: 450f5537-9d53-4d5e-b731-4cf116d8cbc9
;;; japanese.el ends here
diff --git a/lisp/language/kannada.el b/lisp/language/kannada.el
new file mode 100644
index 00000000000..1a3575f24db
--- /dev/null
+++ b/lisp/language/kannada.el
@@ -0,0 +1,54 @@
+;;; kannada.el --- Support for Kannada -*- coding: iso-2022-7bit; no-byte-compile: t -*-
+
+;; Copyright (C) 2003 Free Software Foundation, Inc.
+
+;; Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org>
+;; Keywords: multilingual, Indian, Kannada
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING. If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Commentary:
+
+;; This file defines language-info of Kannada script.
+
+;;; Code:
+
+(set-language-info-alist
+ "Kannada" '((charset mule-unicode-0100-24ff indian-is13194
+ indian-2-column indian-glyph ;; comment out later
+ )
+ (coding-system mule-utf-8)
+ (coding-priority mule-utf-8)
+ (input-method . "kannada-itrans")
+ (features knd-util)
+ (sample-text
+ . (kannada-compose-string
+ (copy-sequence "Kannada ($,1>u?(?M?(?!(B) $,1?(?.?8?M>u?>?0(B")))
+ (documentation . "\
+Kannada language and script is supported in this language
+environment."))
+ '("Indian"))
+
+;; For automatic composition.
+(set-char-table-range composition-function-table '(#x0c80 . #x0cff)
+ 'kannada-composition-function)
+
+(provide 'kannada)
+
+;;; arch-tag: 880ba90b-f6f5-4131-bc1d-930705b78416
+;;; kannada.el ends here
diff --git a/lisp/language/knd-util.el b/lisp/language/knd-util.el
new file mode 100644
index 00000000000..75e8b93d17a
--- /dev/null
+++ b/lisp/language/knd-util.el
@@ -0,0 +1,541 @@
+;;; knd-util.el --- Support for composing Kannada characters
+
+;; Copyright (C) 2003 Free Software Foundation, Inc.
+
+;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org>
+;; Keywords: multilingual, Kannada
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING. If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;; Created: Jul. 14. 2003
+
+;;; Commentary:
+
+;; This file provides character(Unicode) to glyph(CDAC) conversion and
+;; composition of Kannada script characters.
+
+;;; Code:
+
+;;;###autoload
+
+;; Kannada Composable Pattern
+;; C .. Consonants
+;; V .. Vowel
+;; H .. Virama
+;; M .. Matra
+;; V .. Vowel
+;; (N .. Zerowidth Non Joiner)
+;; (J .. Zerowidth Joiner. )
+;; 1. vowel
+;; V(A)?
+;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
+;; ((CH)?(CH)?(CH)?CH)?C(H|M?)?
+
+(defconst kannada-consonant
+ "[$,1>u(B-$,1?9(B]")
+
+(defconst kannada-consonant-needs-twirl
+ "[$,1>u>w(B-$,1>{>}(B-$,1>~? (B-$,1?"?$(B-$,1?+?-?0?3(B-$,1?9(B]\\($,1?M(B[$,1>u(B-$,1?9(B]\\)*[$,1?A?B?C?D>b(B]?$")
+
+(defconst kannada-composable-pattern
+ (concat
+ "\\([$,1>b(B-$,1>t?`>l(B]\\)\\|[$,1>c(B]"
+ "\\|\\("
+ "\\(?:\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?[$,1>u(B-$,1?9(B]$,1?M(B\\)?"
+ "[$,1>u(B-$,1?9(B]\\(?:$,1?M(B\\|[$,1?>(B-$,1?M?U?C(B]?\\)?"
+ "\\)")
+ "Regexp matching a composable sequence of Kannada characters.")
+
+;;;###autoload
+(defun kannada-compose-region (from to)
+ (interactive "r")
+ (save-excursion
+ (save-restriction
+ (narrow-to-region from to)
+ (goto-char (point-min))
+ (while (re-search-forward kannada-composable-pattern nil t)
+ (kannada-compose-syllable-region (match-beginning 0)
+ (match-end 0))))))
+;;;###autoload
+(defun kannada-compose-string (string)
+ (with-temp-buffer
+ (insert (decompose-string string))
+ (kannada-compose-region (point-min) (point-max))
+ (buffer-string)))
+
+;;;###autoload
+(defun kannada-post-read-conversion (len)
+ (save-excursion
+ (save-restriction
+ (let ((buffer-modified-p (buffer-modified-p)))
+ (narrow-to-region (point) (+ (point) len))
+ (kannada-compose-region (point-min) (point-max))
+ (set-buffer-modified-p buffer-modified-p)
+ (- (point-max) (point-min))))))
+
+(defun kannada-range (from to)
+ "Make the list of the integers of range FROM to TO."
+ (let (result)
+ (while (<= from to) (setq result (cons to result) to (1- to))) result))
+
+(defun kannada-regexp-of-hashtbl-keys (hashtbl)
+ "Return a regular expression that matches all keys in hashtable HASHTBL."
+ (let ((max-specpdl-size 1000))
+ (regexp-opt
+ (sort
+ (let (dummy)
+ (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
+ dummy)
+ (function (lambda (x y) (> (length x) (length y))))))))
+
+(defun kannada-regexp-of-hashtbl-vals (hashtbl)
+ "Return a regular expression that matches all values in hashtable HASHTBL."
+ (let ((max-specpdl-size 1000))
+ (regexp-opt
+ (sort
+ (let (dummy)
+ (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl)
+ dummy)
+ (function (lambda (x y) (> (length x) (length y))))))))
+
+;;;###autoload
+(defun kannada-composition-function (pos &optional string)
+ "Compose Kannada characters after the position POS.
+If STRING is not nil, it is a string, and POS is an index to the string.
+In this case, compose characters after POS of the string."
+ (if string
+ ;; Not yet implemented.
+ nil
+ (goto-char pos)
+ (if (looking-at kannada-composable-pattern)
+ (prog1 (match-end 0)
+ (kannada-compose-syllable-region pos (match-end 0))))))
+
+;; Notes on conversion steps.
+
+;; 1. chars to glyphs
+;;
+;; Rules will not be applied to the virama appeared at the end of the
+;; text. Also, the preceding/following "r" will be treated as special case.
+
+;; 2. glyphs reordering.
+;;
+;; The glyphs are split by virama, and each glyph groups are
+;; re-ordered in the following order.
+;;
+;; Note that `consonant-glyph' mentioned here does not contain the
+;; vertical bar (right modifier) attached at the right of the
+;; consonant.
+;;
+;; If the glyph-group contains right modifier,
+;; (1) consonant-glyphs/vowels
+;; (2) spacing
+;; (3) right modifier (may be matra)
+;; (4) top matra
+;; (5) preceding "r"
+;; (7) following "r"
+;; (8) bottom matra or virama.
+;;
+;; Otherwise,
+;; (1) consonant-glyph/vowels, with nukta sign
+;; (3) left matra
+;; (4) top matra
+;; (5) preceding "r"
+;; (7) following "r"
+;; (8) bottom matra or virama.
+;; (2) spacing
+
+;; 3. glyph to glyph
+;;
+;; For better display, some glyph display would be tuned.
+
+;; 4. Composition.
+;;
+;; left modifiers will be attached at the left.
+;; others will be attached right.
+
+;; Problem::
+;; Can we generalize this methods to other Indian scripts?
+
+(defvar knd-char-glyph
+ '(("$,1>e(B" . "$,43@(B")
+ ("$,1>f(B" . "$,43A(B")
+ ("$,1?>(B" . "$,44{(B")
+ ("$,1>g(B" . "$,43B(B")
+ ("$,1??(B" . nil)
+ ("$,1>h(B" . "$,43C(B")
+ ("$,1?@(B" . nil)
+ ("$,1>i(B" . "$,43D(B")
+ ("$,1?A(B" . "\$,44(B")
+ ("$,1>j(B" . "$,43E(B")
+ ("$,1?B(B" . "\$,45 (B")
+ ("$,1>k(B" . "$,43F4(B")
+ ("$,1?C(B" . "\$,45$(B")
+ ("$,1?`(B" . "$,43F5 (B")
+ ("$,1?D(B" . "\$,45%(B")
+ ;;("$,1>l(B" . nil) ; not implemented.
+ ;;("$,1?a(B" . nil)
+ ("$,1>n(B" . "$,43G(B")
+ ("$,1>o(B" . "$,43H(B")
+ ("$,1>p(B" . "$,43I(B")
+ ("$,1?F(B" . "\$,45&(B")
+ ("$,1?G(B" . "\$,45&4~(B")
+ ("$,1?H(B" . "\$,45&5'(B")
+ ("$,1>r(B" . "$,43J(B")
+ ("$,1?J(B" . "$,45&5 (B")
+ ("$,1>s(B" . "$,43K(B")
+ ("$,1?K(B" . "\$,45&5 4~(B")
+ ("$,1>t(B" . "$,43L(B")
+ ("$,1?L(B" . "\$,45((B")
+ ("$,1>b(B" . "$,43M(B")
+ ("$,1>c(B" . "$,43N(B")
+ ("$,1>u?M(B" . "$,43O5)(B") ("$,1>u(B" . "$,43O(B") ("$,1>u??(B" . "$,43P(B") ("$,1>u?@(B" . "$,43P4~(B")
+ ("$,1>v?M(B" . "$,43S5)(B") ("$,1>v(B" . "$,43S(B") ("$,1>v??(B" . "$,43T(B") ("$,1>v?@(B" . "$,43T4~(B") ("$,1>v?F(B" . "$,43S5&(B") ("$,1>v?G(B" . "$,43S5&4~(B") ("$,1>v?H(B" . "$,43S5&5'(B") ("$,1>v?J(B" . "$,43S5&5&5 (B") ("$,1>v?K(B" . "$,43S5&5&5 4~(B") ("$,1>v?L(B" . "$,43S5((B")
+ ("$,1>w?M(B" . "$,43V5)(B") ("$,1>w(B" . "$,43V(B") ("$,1>w??(B" . "$,43W(B") ("$,1>w?@(B" . "$,43W4~(B")
+ ("$,1>x?M(B" . "$,43Y5)(B") ("$,1>x(B" . "$,43Y(B") ("$,1>x??(B" . "$,43Z(B") ("$,1>x?@(B" . "$,43Z4~(B")
+ ("$,1>y?M(B" . "$,43\5)(B") ("$,1>y(B" . "$,43\(B")
+ ("$,1>z?M(B" . "$,43^5)(B") ("$,1>z(B" . "$,43^(B") ("$,1>z??(B" . "$,43_(B") ("$,1>z?@(B" . "$,43_4~(B")
+ ("$,1>{?M(B" . "$,43a5)(B") ("$,1>{(B" . "$,43a(B") ("$,1>{??(B" . "$,43b(B") ("$,1>{?@(B" . "$,43b4~(B")
+ ("$,1>|?M(B" . "$,43d5)(B") ("$,1>|(B" . "$,43d(B") ("$,1>|??(B" . "$,43f(B") ("$,1>|?@(B" . "$,43f4~(B") ("$,1>|?F(B" . "$,43e5&(B") ("$,1>|?G(B" . "$,43e5&4~(B") ("$,1>|?H(B" . "$,43e5&5'(B") ("$,1>|?J(B" . "$,43e5&5&5 (B") ("$,1>|?K(B" . "$,43e5&5&5 4~(B") ("$,1>|?L(B" . "$,43e5((B")
+ ("$,1>}?M(B" . "$,44a4z3h45)(B") ("$,1>}(B" . "$,44a4z3h4(B") ("$,1>}??(B" . "$,44b3h4(B") ("$,1>}?@(B" . "$,44b3h44~(B") ("$,1>}?B(B". "$,44a4z3h5 (B") ("$,1>}?J(B". "$,44a5&3h5 (B") ("$,1>}?K(B". "$,44a5&3h5 4~(B")
+ ("$,1>~?M(B" . "$,43j5)(B") ("$,1>~(B" . "$,43j(B")
+ ("$,1>?M(B" . "$,43m5)(B") ("$,1>(B" . "$,43l(B") ("$,1?#?>(B" . "$,43m4{(B") ("$,1>??(B" . "$,43n(B") ("$,1>?@(B" . "$,43n4~(B") ("$,1>?F(B" . "$,43m5&(B") ("$,1>?G(B" . "$,43m5&4~(B") ("$,1>?H(B" . "$,43m5&5'(B") ("$,1>?J(B" . "$,43m5&5&5 (B") ("$,1>?K(B" . "$,43m5&5&5 4~(B") ("$,1>?L(B" . "$,43m5((B")
+ ("$,1? ?M(B" . "$,43p5)(B") ("$,1? (B" . "$,43p(B") ("$,1? ??(B" . "$,43q(B") ("$,1? ?@(B" . "$,43q4~(B")
+ ("$,1?!?M(B" . "$,43s5)(B") ("$,1?!(B" . "$,43s(B") ("$,1?!??(B" . "$,43t(B") ("$,1?!?@(B" . "$,43t4~(B")
+ ("$,1?"?M(B" . "$,43v5)(B") ("$,1?"(B" . "$,43v(B") ("$,1?"??(B" . "$,43w(B") ("$,1?"?@(B" . "$,43w4~(B")
+ ("$,1?#?M(B" . "$,43z5)(B") ("$,1?#(B" . "$,43y(B") ("$,1?#?>(B" . "$,43z4{(B") ("$,1?#??(B" . "$,43{(B") ("$,1?#?@(B" . "$,43{4~(B") ("$,1?#?F(B" . "$,43z5&(B") ("$,1?#?G(B" . "$,43z5&4~(B") ("$,1?#?H(B" . "$,43z5&5'(B") ("$,1?#?J(B" . "$,43z5&5&5 (B") ("$,1?#?K(B" . "$,43z5&5&5 4~(B") ("$,1?#?L(B" . "$,43z5((B")
+ ("$,1?$?M(B" . "$,43}5)(B") ("$,1?$(B" . "$,43}(B") ("$,1?$??(B" . "$,43~(B") ("$,1?$?@(B" . "$,43~4~(B")
+ ("$,1?%?M(B" . "$,44B5)(B") ("$,1?%(B" . "$,44B(B") ("$,1?%??(B" . "$,44C(B") ("$,1?%?@(B" . "$,44C4~(B")
+ ("$,1?&?M(B" . "$,44E5)(B") ("$,1?&(B" . "$,44E(B") ("$,1?&??(B" . "$,44F(B") ("$,1?&?@(B" . "$,44F4~(B")
+ ("$,1?'?M(B" . "$,44H5)(B") ("$,1?'(B" . "$,44H(B") ("$,1?'??(B" . "$,44I(B") ("$,1?'?@(B" . "$,44I4~(B")
+ ("$,1?(?M(B" . "$,44K5)(B") ("$,1?((B" . "$,44K(B") ("$,1?(??(B" . "$,44L(B") ("$,1?(?@(B" . "$,44L4~(B")
+ ("$,1?*?M(B" . "$,44N5)(B") ("$,1?*(B" . "$,44N(B") ("$,1?*??(B" . "$,44O(B") ("$,1?*?@(B" . "$,44O4~(B") ("$,1?*?A(B" . "$,44N5"(B") ("$,1?*?B(B" . "$,44N5#(B") ("$,1?*?J(B" . "$,44N5&5#(B") ("$,1?*?K(B" . "$,44N5&5#4~(B")
+ ("$,1?+?M(B" . "$,44Q5)(B") ("$,1?+(B" . "$,44Q(B") ("$,1?+??(B" . "$,44R(B") ("$,1?+?@(B" . "$,44R4~(B") ("$,1?+?A(B" . "$,44Q5"(B") ("$,1?+?B(B" . "$,44Q5#(B") ("$,1?+?J(B" . "$,44Q5&5#(B") ("$,1?+?K(B" . "$,44Q5&5#4~(B")
+ ("$,1?,?M(B" . "$,44W5)(B") ("$,1?,(B" . "$,44V(B") ("$,1?,?>(B". "$,44W4{(B") ("$,1?,??(B" . "$,44X(B") ("$,1?,?@(B" . "$,44X4~(B") ("$,1?,?F(B" . "$,44W5&(B") ("$,1?,?G(B" . "$,44W5&4~(B") ("$,1?,?H(B" . "$,44W5&5'(B") ("$,1?,?J(B" . "$,44W5&5&5 (B") ("$,1?,?K(B" . "$,44W5&5&5 4~(B") ("$,1?,?L(B" . "$,44W5((B")
+ ("$,1?-?M(B" . "$,44Z5)(B") ("$,1?-(B" . "$,44Z(B") ("$,1?-??(B" . "$,44[(B") ("$,1?-?@(B" . "$,44[4~(B")
+ ("$,1?.?M(B" . "$,44h5!5)(B") ("$,1?.(B" . "$,44h4z4(B") ("$,1?.?>(B" . "$,44h4z5!4{(B") ("$,1?.??(B" . "$,44i4(B") ("$,1?.?@(B" . "$,44i44~(B") ("$,1?.?J(B". "$,44h5&5 (B") ("$,1?.?K(B". "$,44h5&5 4~(B")
+ ("$,1?/?M(B" . "$,44^4z5!5)(B") ("$,1?/(B" . "$,44^4z4(B") ("$,1?/?>(B" . "$,44^4z5!4{(B")("$,1?/??(B" . "$,44_4(B") ("$,1?/?@(B" . "$,44_44~(B") ("$,1?/?J(B" . "$,44^5&5 (B") ("$,1?/?K(B" . "$,44^5&5 4~(B")
+ ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B")
+ ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B")
+ ("$,1?2?M(B" . "$,44e5)(B") ("$,1?2(B" . "$,44d(B") ("$,1?2?>(B" . "$,44e4{(B") ("$,1?2??(B" . "$,44f(B") ("$,1?2?@(B" . "$,44f4~(B") ("$,1?2?F(B" . "$,44e5&(B") ("$,1?2?G(B" . "$,44e5&4~(B") ("$,1?2?H(B" . "$,44e5&5'(B") ("$,1?2?J(B" . "$,44e5&5&5 (B") ("$,1?2?K(B" . "$,44e5&5&5 4~(B") ("$,1?2?L(B" . "$,44e5((B")
+ ("$,1?5?M(B" . "$,44h5)(B") ("$,1?5(B" . "$,44h(B") ("$,1?5??(B" . "$,44i(B") ("$,1?5?@(B" . "$,44i4~(B") ("$,1?5?A(B" . "$,44h5"(B") ("$,1?5?B(B" . "$,44h5#(B") ("$,1?5?J(B" . "$,44h5&5#(B") ("$,1?5?K(B" . "$,44h5&5#4~(B")
+ ("$,1?6?M(B" . "$,44k5)(B") ("$,1?6(B" . "$,44k(B") ("$,1?6??(B" . "$,44l(B") ("$,1?6?@(B" . "$,44l4~(B")
+ ("$,1?7?M(B" . "$,44n5)(B") ("$,1?7(B" . "$,44n(B") ("$,1?7??(B" . "$,44o(B") ("$,1?7?@(B" . "$,44o4~(B")
+ ("$,1?8?M(B" . "$,44q5)(B") ("$,1?8(B" . "$,44q(B") ("$,1?8??(B" . "$,44r(B") ("$,1?8?@(B" . "$,44r4~(B")
+ ("$,1?9?M(B" . "$,44t5)(B") ("$,1?9(B" . "$,44t(B") ("$,1?9??(B" . "$,44u(B") ("$,1?9?@(B" . "$,44u4~(B")
+ ("$,1?3?M(B" . "$,44w5)(B") ("$,1?3(B" . "$,44w(B") ("$,1?3??(B" . "$,44x(B") ("$,1?3?@(B" . "$,44x4~(B"))
+"Kannada characters to glyphs conversion table.
+Default value contains only the basic rules.")
+
+(defvar knd-char-glyph-hash
+ (let* ((hash (make-hash-table :test 'equal)))
+ (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
+ knd-char-glyph)
+ hash))
+
+(defvar knd-char-glyph-regexp
+ (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash))
+
+(defvar knd-conjunct-glyph
+ '(("$,1>u(B" . "$,43Q(B") ("$,1>v(B" . "$,43U(B") ("$,1>w(B" . "$,43X(B") ("$,1>x(B" . "$,43[(B") ("$,1>y(B" . "$,43](B")
+ ("$,1>z(B" . "$,43`(B") ("$,1>{(B" . "$,43c(B") ("$,1>|(B" . "$,43g(B") ("$,1>}(B" . "$,43i(B") ("$,1>~(B" . "$,43k(B")
+ ("$,1>(B" . "$,43o(B") ("$,1? (B" . "$,43r(B") ("$,1?!(B" . "$,43u(B") ("$,1?"(B" . "$,43x(B") ("$,1?#(B" . "$,43|(B")
+ ("$,1?$(B" . "$,44A(B") ("$,1?%(B" . "$,44D(B") ("$,1?&(B" . "$,44G(B") ("$,1?'(B" . "$,44J(B") ("$,1?((B" . "$,44M(B")
+ ("$,1?*(B" . "$,44P(B") ("$,1?+(B" . "$,44U(B") ("$,1?,(B" . "$,44Y(B") ("$,1?-(B" . "$,44\(B") ("$,1?.(B" . "$,44](B")
+ ("$,1?/(B" . "$,44`(B") ("$,1?0(B" . "$,44c(B") ("$,1?2(B" . "$,44g(B") ("$,1?3(B" . "$,44y(B") ("$,1?5(B" . "$,44j(B")
+ ("$,1?6(B" . "$,44m(B") ("$,1?7(B" . "$,44p(B") ("$,1?8(B" . "$,44s(B") ("$,1?9(B" . "$,44v(B"))
+"Kannada characters to conjunct glyphs conversion table.")
+
+(defvar knd-conjunct-glyph-hash
+ (let* ((hash (make-hash-table :test 'equal)))
+ (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
+ knd-conjunct-glyph)
+ hash))
+
+(defvar knd-conjunct-glyph-regexp
+ (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash))
+
+(mapc
+ (function (lambda (x)
+ (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3))))
+ knd-conjunct-glyph)
+
+;; glyph-to-glyph conversion table.
+;; it is supposed that glyphs are ordered in
+;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar].
+
+(defvar knd-glyph-glyph
+ '(("$,45$4A(B" . "$,45*(B")
+ ("$,45'4A(B" . "$,45+(B")
+ ("$,44A3g(B" . "$,45,(B")
+ ("$,45$3Q(B" . "$,45-(B")))
+
+(defvar knd-glyph-glyph-hash
+ (let* ((hash (make-hash-table :test 'equal)))
+ (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
+ knd-glyph-glyph)
+ hash))
+(defvar knd-glyph-glyph-regexp
+ (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash))
+
+(defun knd-charseq (from &optional to)
+ (if (null to) (setq to from))
+ (number-sequence (decode-char 'kannada-cdac from)
+ (decode-char 'kannada-cdac to)))
+
+(defvar knd-glyph-cv
+ (append
+ (knd-charseq #x40 #x50)
+ (knd-charseq #x52 #x54)
+ (knd-charseq #x56 #x57)
+ (knd-charseq #x59 #x5a)
+ (knd-charseq #x5c)
+ (knd-charseq #x5e #x5f)
+ (knd-charseq #x61 #x62)
+ (knd-charseq #x64 #x66)
+ (knd-charseq #x6a)
+ (knd-charseq #x6c #x6e)
+ (knd-charseq #x70 #x71)
+ (knd-charseq #x73 #x74)
+ (knd-charseq #x76 #x77)
+ (knd-charseq #x79 #x7b)
+ (knd-charseq #x7d #x7e)
+ (knd-charseq #xa2 #xa3)
+ (knd-charseq #xa5 #xa6)
+ (knd-charseq #xa8 #xa9)
+ (knd-charseq #xab #xac)
+ (knd-charseq #xae #xaf)
+ (knd-charseq #xb1 #xb2)
+ (knd-charseq #xb6 #xb8)
+ (knd-charseq #xb6 #xb8)
+ (knd-charseq #xba #xbb)
+ (knd-charseq #xbe #xbf)
+ (knd-charseq #xc1 #xc2)
+ (knd-charseq #xc4 #xc6)
+ (knd-charseq #xc8 #xc9)
+ (knd-charseq #xcb #xcc)
+ (knd-charseq #xce #xcf)
+ (knd-charseq #xd1 #xd2)
+ (knd-charseq #xd4 #xd5)
+ (knd-charseq #xd7 #xd8)
+ (knd-charseq #xc3))
+ "Kannada Consonants/Vowels/Nukta Glyphs")
+
+(defvar knd-glyph-space
+ (knd-charseq #xb3 #xb4)
+ "Kannada Spacing Glyphs")
+
+(defvar knd-glyph-right-modifier
+ (append
+ (knd-charseq #xdb #xdd)
+ (knd-charseq #xdf)
+ (knd-charseq #xe0 #xe3)
+ (knd-charseq #xe9))
+ "Kannada Modifiers attached at the right side.")
+
+(defvar knd-glyph-right-modifier-regexp
+ (concat "[" knd-glyph-right-modifier "]"))
+
+(defvar knd-glyph-jha-tail
+ (knd-charseq #x68)
+ "Kannada tail for jha.")
+
+(defvar knd-glyph-top-matra
+ (append
+ (knd-charseq #xda)
+ (knd-charseq #xdd)
+ (knd-charseq #xe6)
+ (knd-charseq #xe8))
+ "Kannada Matras attached at the top side.")
+
+(defvar knd-glyph-bottom-matra
+ (append
+ (knd-charseq #xe4 #xe5)
+ (knd-charseq #xe7))
+ "Kannada Matras attached at the bottom.")
+
+(defvar knd-glyph-end-marks
+ (append
+ (knd-charseq #x25)
+ (knd-charseq #x4d #x4e)
+ (knd-charseq #xde))
+ "Kannada end marks: arkavattu, virama, au and diirghaa.")
+
+(defvar knd-glyph-bottom-modifier
+ (append
+ (knd-charseq #x51)
+ (knd-charseq #x55)
+ (knd-charseq #x58)
+ (knd-charseq #x5b)
+ (knd-charseq #x5d)
+ (knd-charseq #x60)
+ (knd-charseq #x63)
+ (knd-charseq #x67)
+ (knd-charseq #x69)
+ (knd-charseq #x6b)
+ (knd-charseq #x6f)
+ (knd-charseq #x72)
+ (knd-charseq #x75)
+ (knd-charseq #x78)
+ (knd-charseq #x7c)
+ (knd-charseq #xa1)
+ (knd-charseq #xa4)
+ (knd-charseq #xa7)
+ (knd-charseq #xaa)
+ (knd-charseq #xad)
+ (knd-charseq #xb0)
+ (knd-charseq #xb5)
+ (knd-charseq #xb9)
+ (knd-charseq #xbc #xbd)
+ (knd-charseq #xc0)
+ (knd-charseq #xc3)
+ (knd-charseq #xc7)
+ (knd-charseq #xca)
+ (knd-charseq #xcd)
+ (knd-charseq #xd0)
+ (knd-charseq #xd3)
+ (knd-charseq #xd6)
+ (knd-charseq #xd9)
+ (knd-charseq #xea #xef))
+ "Kannada Modifiers attached at the bottom.")
+
+(defvar knd-glyph-order
+ `((,knd-glyph-cv . 1)
+ (,knd-glyph-top-matra . 2)
+ (,knd-glyph-jha-tail . 3)
+ (,knd-glyph-right-modifier . 4)
+ (,knd-glyph-space . 5)
+ (,knd-glyph-bottom-modifier . 5)
+ (,knd-glyph-bottom-matra . 6)
+ (,knd-glyph-end-marks . 7)
+ ))
+
+(mapc
+ (function (lambda (x)
+ (mapc
+ (function (lambda (y)
+ (put-char-code-property y 'composition-order (cdr x))))
+ (car x))))
+ knd-glyph-order)
+
+(defun kannada-compose-syllable-string (string)
+ (with-temp-buffer
+ (insert (decompose-string string))
+ (kannada-compose-syllable-region (point-min) (point-max))
+ (buffer-string)))
+
+;; kch
+(defun kannada-compose-syllable-region (from to)
+ "Compose kannada syllable in region FROM to TO."
+ (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil)
+ (last-virama nil) (preceding-r nil) (last-modifier nil)
+ (last-char (char-before to)) match-str pos
+ glyph-block split-pos (conj nil) (rest nil))
+ (save-excursion
+ (save-restriction
+ ;;; *** char-to-glyph conversion ***
+ ;; Special rule 1. -- Last virama must be preserved.
+ (if (eq last-char ?$,1?M(B)
+ (progn
+ (setq last-virama t)
+ (narrow-to-region from (1- to)))
+ (narrow-to-region from to))
+ (goto-char (point-min))
+ ;; Special rule 2. -- preceding "r virama" must be modifier.
+ (when (looking-at "$,1?0?M(B.")
+ (setq preceding-r t)
+ (goto-char (+ 2 (point))))
+ ;; remove conjunct consonants
+ (while (re-search-forward knd-char-glyph-regexp nil t)
+ (setq match-str (match-string 0))
+ (if (and (string-match kannada-consonant match-str)
+ (> cons-num 0))
+ (progn
+ (setq conj (concat conj (gethash (match-string 0 match-str)
+ knd-conjunct-glyph-hash)))
+ (setq match-str (replace-match "" t nil match-str))
+ (if (string-match "$,1?M(B" rest)
+ (setq rest (replace-match "" t nil rest)))))
+ (setq rest (concat rest match-str))
+ ;; count the number of consonant-glyhs.
+ (if (string-match kannada-consonant match-str)
+ (setq cons-num (1+ cons-num))))
+ ;; translate the rest characters into glyphs
+ (setq pos 0)
+ (while (string-match knd-char-glyph-regexp rest pos)
+ (setq match-str (match-string 0 rest))
+ (setq pos (match-end 0))
+ (setq glyph-str
+ (concat glyph-str (gethash match-str knd-char-glyph-hash))))
+
+ (if conj (setq glyph-str (concat glyph-str conj)))
+ (if last-virama (setq glyph-str (concat glyph-str "$,45)(B"))
+ (goto-char (point-min))
+ (if (re-search-forward kannada-consonant-needs-twirl nil t)
+ (progn
+ (setq match-str (match-string 0))
+ (setq glyph-str (concat glyph-str "$,44z(B")))))
+ ;; preceding-r must be attached
+ (if preceding-r
+ (setq glyph-str (concat glyph-str "$,43%(B")))
+ ;;; *** glyph-to-glyph conversion ***
+ (when (string-match knd-glyph-glyph-regexp glyph-str)
+ (setq glyph-str
+ (replace-match (gethash (match-string 0 glyph-str)
+ knd-glyph-glyph-hash)
+ nil t glyph-str)))
+ ;;; *** glyph reordering ***
+ (while (setq split-pos (string-match "$,45)(B\\|.$" glyph-str))
+ (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
+ (setq glyph-str (substring glyph-str (1+ split-pos)))
+ (setq
+ glyph-block
+ (sort (string-to-list glyph-block)
+ (function (lambda (x y)
+ (< (get-char-code-property x 'composition-order)
+ (get-char-code-property y 'composition-order))))))
+ (setq glyph-str-list (nconc glyph-str-list glyph-block)))
+ ;;; *** insert space glyphs for kerning ***
+ (if (> cons-num 0)
+ (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co)
+ (while curr
+ (setq co (get-char-code-property
+ (car curr) 'composition-order)
+ bott (or (eq co 5) (eq co 6)))
+ (if (and bott last-bott)
+ (setcdr prev (cons ?$,44T(B curr)))
+ (setq last-bott bott prev curr curr (cdr curr)))))
+ ;; concatenate and attach reference-points.
+ (setq glyph-str
+ (cdr
+ (apply
+ 'nconc
+ (mapcar
+ (function (lambda (x)
+ (list
+ (or (get-char-code-property x 'reference-point)
+ '(5 . 3) ;; default reference point.
+ )
+ x)))
+ glyph-str-list))))))
+ (compose-region from to glyph-str)))
+
+(provide 'knd-util)
+
+;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc
+;;; knd-util.el ends here
diff --git a/lisp/language/korea-util.el b/lisp/language/korea-util.el
index 9a945668c76..222832022ac 100644
--- a/lisp/language/korea-util.el
+++ b/lisp/language/korea-util.el
@@ -135,4 +135,5 @@
;;
(provide 'korea-util)
+;;; arch-tag: b17d0981-05da-4577-99f8-1db87fff8b44
;;; korea-util.el ends here
diff --git a/lisp/language/korean.el b/lisp/language/korean.el
index f010de69898..9595ab4ed02 100644
--- a/lisp/language/korean.el
+++ b/lisp/language/korean.el
@@ -73,4 +73,5 @@ The following key bindings are available while using Korean input methods:
(provide 'korean)
+;;; arch-tag: ca7c7348-5ca3-4623-887a-7fd33d725d0e
;;; korean.el ends here
diff --git a/lisp/language/lao-util.el b/lisp/language/lao-util.el
index 4db213dab02..ad0253648ea 100644
--- a/lisp/language/lao-util.el
+++ b/lisp/language/lao-util.el
@@ -518,4 +518,5 @@ syllable. In that case, FROM and TO are indexes to STR."
;;
(provide 'lao-util)
+;;; arch-tag: 1f828781-3cb8-4695-88af-8f33222338ce
;;; lao-util.el ends here
diff --git a/lisp/language/lao.el b/lisp/language/lao.el
index 8edc282a46c..ec5c07342e4 100644
--- a/lisp/language/lao.el
+++ b/lisp/language/lao.el
@@ -53,4 +53,5 @@
(provide 'lao)
+;;; arch-tag: ba540fd9-6352-4449-a9cd-669afd21fa57
;;; lao.el ends here
diff --git a/lisp/language/malayalam.el b/lisp/language/malayalam.el
index 27bf122fd7c..4c8fad08fe4 100644
--- a/lisp/language/malayalam.el
+++ b/lisp/language/malayalam.el
@@ -48,4 +48,5 @@ South Indian language Malayalam is supported in this language environment."))
(provide 'malayalam)
+;;; arch-tag: 5f500e53-1e4f-4bb2-aa93-ad8736f0349f
;;; malayalam.el ends here
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
index a4c72742d78..537267ce9ba 100644
--- a/lisp/language/misc-lang.el
+++ b/lisp/language/misc-lang.el
@@ -43,4 +43,5 @@ and Italian.")))
(provide 'misc-lang)
+;;; arch-tag: 6953585c-1a1a-4c09-be82-a2518afb6074
;;; misc-lang.el ends here
diff --git a/lisp/language/mlm-util.el b/lisp/language/mlm-util.el
index b492d269ff2..16246aaa2ba 100644
--- a/lisp/language/mlm-util.el
+++ b/lisp/language/mlm-util.el
@@ -74,6 +74,7 @@
(malayalam-compose-region (point-min) (point-max))
(buffer-string)))
+;;;###autoload
(defun malayalam-post-read-conversion (len)
(save-excursion
(save-restriction
@@ -409,4 +410,5 @@ In this case, compose characters after POS of the string."
(provide 'mlm-util)
+;;; arch-tag: 7f25ee67-8f9d-49f2-837b-35c412c00eba
;;; devan-util.el ends here
diff --git a/lisp/language/romanian.el b/lisp/language/romanian.el
index a89a9ab777c..ef7e4cf74c4 100644
--- a/lisp/language/romanian.el
+++ b/lisp/language/romanian.el
@@ -53,4 +53,5 @@ An environment for generic Latin-10 encoding is also available."))
(provide 'romanian)
+;;; arch-tag: a0bf93ee-2f02-4678-a477-c08acc35366b
;;; romanian.el ends here
diff --git a/lisp/language/slovak.el b/lisp/language/slovak.el
index 42983b72628..308be4286e3 100644
--- a/lisp/language/slovak.el
+++ b/lisp/language/slovak.el
@@ -47,4 +47,5 @@ and selects the Slovak tutorial."))
(provide 'slovak)
+;;; arch-tag: 1bae098a-33b2-4426-8c29-59e44fe05484
;;; slovak.el ends here
diff --git a/lisp/language/tamil.el b/lisp/language/tamil.el
index 04f3eacc5e5..c62093098e3 100644
--- a/lisp/language/tamil.el
+++ b/lisp/language/tamil.el
@@ -43,4 +43,5 @@ South Indian Language Tamil supported in this language environment."))
'tamil-composition-function)
(provide 'tamil)
+;;; arch-tag: 2201ac78-7d1e-4674-9bcb-9923c7a2bd9c
;;; tamil.el ends here
diff --git a/lisp/language/thai-util.el b/lisp/language/thai-util.el
index 251c1fee5bc..259a102c61e 100644
--- a/lisp/language/thai-util.el
+++ b/lisp/language/thai-util.el
@@ -205,4 +205,5 @@ positions (integers or markers) specifying the region."
;;
(provide 'thai-util)
+;;; arch-tag: 59425d6a-8cf9-4e06-a6ab-8ab7dc7a7a97
;;; thai-util.el ends here
diff --git a/lisp/language/thai.el b/lisp/language/thai.el
index 0723c3d182b..40bd298aeec 100644
--- a/lisp/language/thai.el
+++ b/lisp/language/thai.el
@@ -80,4 +80,5 @@ This is the same as `thai-tis620' with the addition of no-break-space."
(provide 'thai)
+;;; arch-tag: c7eb0e91-4db0-4619-81f8-8762e7d51e15
;;; thai.el ends here
diff --git a/lisp/language/tibet-util.el b/lisp/language/tibet-util.el
index 260cf7efe54..ec53d121f02 100644
--- a/lisp/language/tibet-util.el
+++ b/lisp/language/tibet-util.el
@@ -434,4 +434,5 @@ before writing buffer in Unicode. See also
(provide 'tibet-util)
+;;; arch-tag: 7a7333e8-1584-446c-b39c-a02b9def265d
;;; tibet-util.el ends here
diff --git a/lisp/language/tibetan.el b/lisp/language/tibetan.el
index ab9516f73d1..4b580f4a81c 100644
--- a/lisp/language/tibetan.el
+++ b/lisp/language/tibetan.el
@@ -613,4 +613,5 @@ This also matches some punctuation characters which need conversion.")
(provide 'tibetan)
+;;; arch-tag: 8d37c8d7-f95d-450f-9ec2-819e61fc79a7
;;; tibetan.el ends here
diff --git a/lisp/language/tml-util.el b/lisp/language/tml-util.el
index 34c18741e97..e9670e3d1a6 100644
--- a/lisp/language/tml-util.el
+++ b/lisp/language/tml-util.el
@@ -76,6 +76,7 @@
(tamil-compose-region (point-min) (point-max))
(buffer-string)))
+;;;###autoload
(defun tamil-post-read-conversion (len)
(save-excursion
(save-restriction
@@ -368,4 +369,5 @@ In this case, compose characters after POS of the string."
(provide 'tml-util)
+;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
;;; tml-util.el ends here
diff --git a/lisp/language/utf-8-lang.el b/lisp/language/utf-8-lang.el
index 3aa845921ad..70475846dbb 100644
--- a/lisp/language/utf-8-lang.el
+++ b/lisp/language/utf-8-lang.el
@@ -51,4 +51,6 @@ encoded in UTF-8."))
nil)
(provide 'utf-8-lang)
+
+;;; arch-tag: dfa339e1-296f-4b1e-9fe8-2b65279ec813
;;; utf-8-lang.el ends here
diff --git a/lisp/language/viet-util.el b/lisp/language/viet-util.el
index 04c26f14756..c2f1218a536 100644
--- a/lisp/language/viet-util.el
+++ b/lisp/language/viet-util.el
@@ -292,4 +292,5 @@ positions (integers or markers) specifying the stretch of the region."
;;;
(provide 'viet-util)
+;;; arch-tag: 082a4d3b-168f-45b4-b3e1-82bfa1b5a194
;;; viet-util.el ends here
diff --git a/lisp/language/vietnamese.el b/lisp/language/vietnamese.el
index 8f403f112e0..a5360d781f5 100644
--- a/lisp/language/vietnamese.el
+++ b/lisp/language/vietnamese.el
@@ -105,4 +105,5 @@ Telex, VIQR is the default setting.")))
(provide 'vietnamese)
+;;; arch-tag: 5bd4f1aa-2d4e-4f33-b7d8-0679c6a19ee6
;;; vietnamese.el ends here