summaryrefslogtreecommitdiff
path: root/lisp/language/indian.el
diff options
context:
space:
mode:
authorRichard M. Stallman <rms@gnu.org>2001-11-25 02:40:11 +0000
committerRichard M. Stallman <rms@gnu.org>2001-11-25 02:40:11 +0000
commit1bec6fdbd774d8a17b04ed41d7d2ed659e029443 (patch)
treed4652d18b9a9c553f4fd7222bc4c804f78caeecc /lisp/language/indian.el
parent16808556788c40f60b0c330396237dd62ba4de83 (diff)
downloademacs-1bec6fdbd774d8a17b04ed41d7d2ed659e029443.tar.gz
emacs-1bec6fdbd774d8a17b04ed41d7d2ed659e029443.tar.bz2
emacs-1bec6fdbd774d8a17b04ed41d7d2ed659e029443.zip
Completely re-written.
Diffstat (limited to 'lisp/language/indian.el')
-rw-r--r--lisp/language/indian.el376
1 files changed, 90 insertions, 286 deletions
diff --git a/lisp/language/indian.el b/lisp/language/indian.el
index 617c20123be..28d313215db 100644
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -1,10 +1,9 @@
-;;; indian.el --- support for Indian Languages -*- coding: iso-2022-7bit; -*-
+;;; indian.el --- Indian languages support -*- coding: iso-2022-7bit; -*-
-;; Copyright (C) 1995 Free Software Foundation, Inc.
+;; Copyright (C) 1999, 2001 Free Software Foundation, Inc.
-;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
-
-;; Keywords: multilingual, Indian
+;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp>
+;; Keywords: multilingual, Indian
;; This file is part of GNU Emacs.
@@ -25,290 +24,95 @@
;;; Commentary:
-;; History:
-;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
-
-;; For Indian, the character set IS 13194 is supported.
-;;
-;; IS 13194 does not specifically assign glyphs for each characters.
-;; Following code is not specific to each Indian language.
-;;
-;; Eventually, this code will support generic information about
-;; following scripts.
-;;
-;; Devanagari
-;; Bengali
-;; Gurmukhi
-;; Gujarati
-;; Oriya
-;; Tamil
-;; Telgu
-;; Kannada
-;; Malayalam
-;;
-;; In this file, charsets other than charset-ascii and charset-indian-is13194
-;; should not be used except in the comment.
+;; This file defines in-is13194 coding system and relationship between
+;; indian-glyph character-set and various CDAC fonts.
;;; Code:
-;; Followings are what you see when you refer to the Emacs
-;; representations of IS 13194 charcters. However, this is merely
-;; tentative apperance, and you must convert them by
-;; indian-to-xxxxxx(specific script) function to use them.
-;; Devanagari is not an exception of this rule.
-
-;; 0xa0 //(5!"#$%&'()*+,-./(B
-;; 0xb0 (50123456789:;<=>?(B
-;; 0xc0 (5@ABCDEFGHIJKLMNO(B
-;; 0xd0 (5PQRSTUVWXYZ[\]^_(B
-;; 0xe0 (5`abcdefghijklmno(B
-;; 0xf0 (5pqrstuvwxyz{|}~(B//
-
-;; Note - In IS 13194, several symbols are obtained by special
-;; combination of several characters and Nukta sign.
-;;
-;; Sanskrit Vowel R -> (5*(B + (5i(B
-;; Sanskrit Vowel L -> (5&(B + (5i(B
-;; Sanskrit Vowel LL -> (5'(B + (5i(B
-;; Sanskrit Avagrah -> (5j(B + (5i(B
-;; OM -> (5!(B + (5i(B
-;;
-;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are
-;; not used in Emacs.
-;;
-;; Note - the above characters DO NOT represent any script. For
-;; example, if you want to obtain Devanagari character, you must do
-;; something like the following.
-;;
-;; (char-to-string (indian-to-devanagari ?(5$(B))
-;; "$(5!$(B"
-
-;;; ITRANS
-;;
-;; ITRANS is one of the most popular method to exchange indian scripts
-;; electronically. Here is the table to convert between ITRANS code and
-;; IS 13194 code.
-
-(defvar indian-itrans-consonant-alist
- '(
- ("k" . "(53(B")
- ("kh" . "(54(B")
- ("g" . "(55(B")
- ("gh" . "(56(B")
- ("N^" . "(57(B")
- ("ch" . "(58(B")
- ("chh" . "(59(B")
- ("j" . "(5:(B")
- ("jh" . "(5;(B")
- ("JN" . "(5<(B")
- ("T" . "(5=(B")
- ("Th" . "(5>(B")
- ("D" . "(5?(B")
- ("Dh" . "(5@(B")
- ("N" . "(5A(B")
- ("t" . "(5B(B")
- ("th" . "(5C(B")
- ("d" . "(5D(B")
- ("dh" . "(5E(B")
- ("n" . "(5F(B")
- ("nh" . "(5G(B") ; For transcription of non-Devanagari Languages.
- ("p" . "(5H(B")
- ("ph" . "(5I(B")
- ("b" . "(5J(B")
- ("bh" . "(5K(B")
- ("m" . "(5L(B")
- ("y" . "(5M(B")
- ("yh" . "(5N(B") ; For transcription of non-Devanagari Languages.
- ("r" . "(5O(B")
- ("rh" . "(5P(B") ; For transcription of non-Devanagari Languages.
- ("l" . "(5Q(B")
- ("v" . "(5T(B")
- ("sh" . "(5U(B")
- ("shh" . "(5V(B")
- ("s" . "(5W(B")
- ("h" . "(5X(B")
- ("ld" . "(5R(B")
- ("L" . "(5R(B")
- ("ksh" . "$(5!3!h!V(B")
- ("GY" . "***GY***") ; Must check out later.
- ;; special consonants
- ("q" . "(53i(B")
- ("K" . "(54i(B")
- ("G" . "(55i(B")
- ("z" . "(5:i(B")
- ("f" . "(5Ii(B")
- (".D" . "(5?i(B")
- (".Dh" . "(5@i(B")
- ))
-
-(defvar indian-itrans-vowel-sign-alist
- '(
- ;; Special treatment unique to IS 13194 Transliteration
- ("" . "(5h(B")
- ("a" . "")
- ;; Matra (Vowel Sign)
- ("aa" . "(5Z(B")
- ("A" . "(5Z(B")
- ("i" . "(5[(B")
- ("ii" . "(5\(B")
- ("I" . "(5\(B")
- ("u" . "(5](B")
- ("uu" . "(5^(B")
- ("U" . "(5^(B")
- ("R^i" . "(5_(B") ; These must be checked out later.
- ("R^I" . "(5_i(B")
- ("L^i" . "(5[i(B")
- ("L^I" . "(5\i(B")
- ("E" . "(5`(B") ; For transcription of non-Devanangri Languages.
- ("e" . "(5a(B")
- ("ai" . "(5b(B")
- ;; ("e.c" . "(5c(B") ; Tentatively suppressed.
- ("O" . "(5d(B") ; For transcription of non-Devanagari Languages.
- ("o" . "(5e(B")
- ("au" . "(5f(B")
- ;; ("o.c" . "(5g(B") ; Tentatively suppressed.
- ))
-
-;;
-;; Independent vowels and other signs.
-;;
-
-(defvar indian-itrans-other-letters-alist
- '(
- ("a" . "(5$(B")
- ("aa" . "(5%(B")
- ("A" . "(5%(B")
- ("i" . "(5&(B")
- ("ii" . "(5'(B")
- ("I" . "(5'(B")
- ("u" . "(5((B")
- ("uu" . "(5)(B")
- ("U" . "(5)(B")
- ("R^i" . "(5*(B")
- ("R^I" . "(5*i(B")
- ("L^i" . "(5&i(B")
- ("L^I" . "(5'i(B")
- ("E" . "(5+(B") ; For transcription of non-Devanagari Languages.
- ("e" . "(5,(B")
- ("ai" . "(5-(B")
- ;; ("e.c" . "(5.(B") ; Candra E
- ("O" . "(5/(B") ; For transcription of non-Devanagari Languages.
- ("o" . "(50(B")
- ("au" . "(51(B")
- ;; ("o.c" . "(52(B") ; Candra O
- ("M" . "(5$(B")
- ("H" . "(5#(B")
- ("AUM" . "(5!i(B")
- ("OM" . "(5!i(B")
- (".r" . "(5Oh(B")
- (".n" . "(5"(B")
- (".N" . "(5!(B")
- (".h" . "(5h(B") ; Halant
- (".." . "(5j(B")
- (".a" . "(5ji(B") ; Avagrah
- ("0" . "(5q(B")
- ("1" . "(5r(B")
- ("2" . "(5s(B")
- ("3" . "(5t(B")
- ("4" . "(5u(B")
- ("5" . "(5v(B")
- ("6" . "(5w(B")
- ("7" . "(5x(B")
- ("8" . "(5y(B")
- ("9" . "(5z(B")
- ))
-
-;; Regular expression matching single Indian character represented
-;; by ITRANS.
-
-(defvar indian-itrans-regexp
- (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)")
- (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]")
- (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)")
- (lpre "\\(") (rpre "\\)") (orre "\\|"))
- (concat lpre misc rpre orre
- lpre lpre consonant rpre "?" lpre vowel rpre rpre orre
- lpre consonant rpre )))
-
-;;
-;; Regular expression matching single ITRANS unit for IS 13194 characters.
-;;
-
-(defvar itrans-indian-regexp
- (let ((vowel "[(5$(B-(52(B]")
- (consonant "[(53(B-(5X(B]")
- (matra "[(5Z(B-(5g(B]")
- (misc "[(5q(B-(5z(B]")
- (lpre "\\(") (rpre "\\)") (orre "\\|"))
- (concat misc orre
- lpre consonant matra "?" rpre orre
- vowel)))
-
-;;
-;; IS13194 - ITRANS conversion table for string matching above regexp.
-;;
-
-(defvar indian-itrans-alist
- (let ((cl indian-itrans-consonant-alist)
- (ml indian-itrans-other-letters-alist) rules)
- (while cl
- (let ((vl indian-itrans-vowel-sign-alist))
- (while vl
- (setq rules
- (cons (cons (concat (car (car cl)) (car (car vl)))
- (concat (cdr (car cl)) (cdr (car vl))))
- rules))
- (setq vl (cdr vl))))
- (setq cl (cdr cl)))
- (while ml
- (setq rules (cons (cons (car (car ml))
- (cdr (car ml)))
- rules))
- (setq ml (cdr ml)))
- rules))
-
-;;
-;; Utility program to convert from ITRANS to IS 13194 in specified region.
-;;
-
-(defun indian-decode-itrans-region (from to)
- "Convert `ITRANS' mnemonics of the current region to Indian characters.
-When called from a program, expects two arguments,
-positions (integers or markers) specifying the stretch of the region."
- (interactive "r")
- (save-restriction
- (narrow-to-region from to)
- (goto-char (point-min))
- (while (re-search-forward indian-itrans-regexp nil t)
- (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0)))
- (ch (cdr (assoc itrans indian-itrans-alist))))
- (if ch
- (progn
- (delete-region (match-beginning 0) (match-end 0))
- (insert ch)))))
- (goto-char (point-min))
- (while (re-search-forward "\\((5h(B\\)[^\\c0]" nil t)
- (delete-region (match-beginning 1) (match-end 1)))))
-
-;;
-;; Utility program to convert from IS 13194 to ITRANS in specified region.
-;;
-
-(defun indian-encode-itrans-region (from to)
- "Convert indian region to ITRANS mnemonics."
- (interactive "r")
- (save-restriction
- (narrow-to-region from to)
- (goto-char (point-min))
- (while (re-search-forward itrans-indian-regexp nil t)
- (let* ((indian (buffer-substring (match-beginning 0) (match-end 0)))
- (ch (car (rassoc indian indian-itrans-alist))))
- (if ch
- (progn
- (delete-region (match-beginning 0) (match-end 0))
- (insert ch)))))
- (goto-char (point-min))))
+(make-coding-system
+ 'in-is13194 2 ?D
+ "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)"
+ '(ascii indian-is13194 nil nil
+ nil ascii-eol)
+ '((safe-charsets ascii indian-is13194)
+ (post-read-conversion . in-is13194-post-read-conversion)
+ (pre-write-conversion . in-is13194-pre-write-conversion)))
+
+(defvar indian-script-table
+ '[
+ devanagari
+ sanskrit
+ bengali
+ tamil
+ telugu
+ assamese
+ oriya
+ kannada
+ malayalam
+ gujarati
+ punjabi
+ ]
+ "Vector of Indian script names.")
+
+(let ((len (length indian-script-table))
+ (i 0))
+ (while (< i len)
+ (put (aref indian-script-table i) 'indian-glyph-code-offset (* 256 i))
+ (setq i (1+ i))))
+
+(defvar indian-default-script 'devanagari
+ "Default script for Indian languages.
+Each Indian language environment sets this value
+to one of `indian-script-table' (which see).
+The default value is `devanagari'.")
+
+(defun indian-glyph-char (index &optional script)
+ "Return character of charset `indian-glyph' made from glyph index INDEX.
+The variable `indian-default-script' specifies the script of the glyph.
+Optional argument SCRIPT, if non-nil, overrides `indian-default-script'.
+See also the function `indian-char-glyph'."
+ (or script
+ (setq script indian-default-script))
+ (let ((offset (get script 'indian-glyph-code-offset)))
+ (or (integerp offset)
+ (error "Invalid script name: %s" script))
+ (or (and (>= index 0) (< index 256))
+ (error "Invalid glyph index: %d" index))
+ (setq index (+ offset index))
+ (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32))))
+
+(defvar indian-glyph-max-char
+ (indian-glyph-char
+ 255 (aref indian-script-table (1- (length indian-script-table))))
+ "The maximum valid code of characters in the charset `indian-glyph'")
+
+(defun indian-char-glyph (char)
+ "Return information about the glphy code for CHAR of `indian-glyph' charset.
+The value is (INDEX . SCRIPT), where INDEX is the glyph index
+in the font that Indian script name SCRIPT specifies.
+See also the function `indian-glyph-char'."
+ (let ((split (split-char char))
+ code)
+ (or (eq (car split) 'indian-glyph)
+ (error "Charset of `%c' is not indian-glyph" char))
+ (or (<= char indian-glyph-max-char)
+ (error "Invalid indian-glyph char: %d" char))
+ (setq code (+ (* (- (nth 1 split) 32) 96) (nth 2 split) -32))
+ (cons (% code 256) (aref indian-script-table (/ code 256)))))
+
+(define-ccl-program ccl-encode-indian-glyph-font
+ `(0
+ ;; Shorten (r1 = (((((r1 - 32) * 96) + r2) - 32) % 256))
+ (r1 = ((((r1 * 96) + r2) - ,(+ (* 32 96) 32)) % 256))))
+
+(setq font-ccl-encoder-alist
+ (cons (cons "-CDAC" 'ccl-encode-indian-glyph-font)
+ font-ccl-encoder-alist))
+
+(setq font-ccl-encoder-alist
+ (cons '("ISO10646.*-1" . ccl-encode-unicode-font)
+ font-ccl-encoder-alist))
(provide 'indian)
-
+
;;; indian.el ends here