diff options
author | Richard M. Stallman <rms@gnu.org> | 2001-11-25 02:40:11 +0000 |
---|---|---|
committer | Richard M. Stallman <rms@gnu.org> | 2001-11-25 02:40:11 +0000 |
commit | 1bec6fdbd774d8a17b04ed41d7d2ed659e029443 (patch) | |
tree | d4652d18b9a9c553f4fd7222bc4c804f78caeecc /lisp/language/indian.el | |
parent | 16808556788c40f60b0c330396237dd62ba4de83 (diff) | |
download | emacs-1bec6fdbd774d8a17b04ed41d7d2ed659e029443.tar.gz emacs-1bec6fdbd774d8a17b04ed41d7d2ed659e029443.tar.bz2 emacs-1bec6fdbd774d8a17b04ed41d7d2ed659e029443.zip |
Completely re-written.
Diffstat (limited to 'lisp/language/indian.el')
-rw-r--r-- | lisp/language/indian.el | 376 |
1 files changed, 90 insertions, 286 deletions
diff --git a/lisp/language/indian.el b/lisp/language/indian.el index 617c20123be..28d313215db 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@ -1,10 +1,9 @@ -;;; indian.el --- support for Indian Languages -*- coding: iso-2022-7bit; -*- +;;; indian.el --- Indian languages support -*- coding: iso-2022-7bit; -*- -;; Copyright (C) 1995 Free Software Foundation, Inc. +;; Copyright (C) 1999, 2001 Free Software Foundation, Inc. -;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> - -;; Keywords: multilingual, Indian +;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> +;; Keywords: multilingual, Indian ;; This file is part of GNU Emacs. @@ -25,290 +24,95 @@ ;;; Commentary: -;; History: -;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> - -;; For Indian, the character set IS 13194 is supported. -;; -;; IS 13194 does not specifically assign glyphs for each characters. -;; Following code is not specific to each Indian language. -;; -;; Eventually, this code will support generic information about -;; following scripts. -;; -;; Devanagari -;; Bengali -;; Gurmukhi -;; Gujarati -;; Oriya -;; Tamil -;; Telgu -;; Kannada -;; Malayalam -;; -;; In this file, charsets other than charset-ascii and charset-indian-is13194 -;; should not be used except in the comment. +;; This file defines in-is13194 coding system and relationship between +;; indian-glyph character-set and various CDAC fonts. ;;; Code: -;; Followings are what you see when you refer to the Emacs -;; representations of IS 13194 charcters. However, this is merely -;; tentative apperance, and you must convert them by -;; indian-to-xxxxxx(specific script) function to use them. -;; Devanagari is not an exception of this rule. - -;; 0xa0 //(5!"#$%&'()*+,-./(B -;; 0xb0 (50123456789:;<=>?(B -;; 0xc0 (5@ABCDEFGHIJKLMNO(B -;; 0xd0 (5PQRSTUVWXYZ[\]^_(B -;; 0xe0 (5`abcdefghijklmno(B -;; 0xf0 (5pqrstuvwxyz{|}~(B// - -;; Note - In IS 13194, several symbols are obtained by special -;; combination of several characters and Nukta sign. -;; -;; Sanskrit Vowel R -> (5*(B + (5i(B -;; Sanskrit Vowel L -> (5&(B + (5i(B -;; Sanskrit Vowel LL -> (5'(B + (5i(B -;; Sanskrit Avagrah -> (5j(B + (5i(B -;; OM -> (5!(B + (5i(B -;; -;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are -;; not used in Emacs. -;; -;; Note - the above characters DO NOT represent any script. For -;; example, if you want to obtain Devanagari character, you must do -;; something like the following. -;; -;; (char-to-string (indian-to-devanagari ?(5$(B)) -;; "$(5!$(B" - -;;; ITRANS -;; -;; ITRANS is one of the most popular method to exchange indian scripts -;; electronically. Here is the table to convert between ITRANS code and -;; IS 13194 code. - -(defvar indian-itrans-consonant-alist - '( - ("k" . "(53(B") - ("kh" . "(54(B") - ("g" . "(55(B") - ("gh" . "(56(B") - ("N^" . "(57(B") - ("ch" . "(58(B") - ("chh" . "(59(B") - ("j" . "(5:(B") - ("jh" . "(5;(B") - ("JN" . "(5<(B") - ("T" . "(5=(B") - ("Th" . "(5>(B") - ("D" . "(5?(B") - ("Dh" . "(5@(B") - ("N" . "(5A(B") - ("t" . "(5B(B") - ("th" . "(5C(B") - ("d" . "(5D(B") - ("dh" . "(5E(B") - ("n" . "(5F(B") - ("nh" . "(5G(B") ; For transcription of non-Devanagari Languages. - ("p" . "(5H(B") - ("ph" . "(5I(B") - ("b" . "(5J(B") - ("bh" . "(5K(B") - ("m" . "(5L(B") - ("y" . "(5M(B") - ("yh" . "(5N(B") ; For transcription of non-Devanagari Languages. - ("r" . "(5O(B") - ("rh" . "(5P(B") ; For transcription of non-Devanagari Languages. - ("l" . "(5Q(B") - ("v" . "(5T(B") - ("sh" . "(5U(B") - ("shh" . "(5V(B") - ("s" . "(5W(B") - ("h" . "(5X(B") - ("ld" . "(5R(B") - ("L" . "(5R(B") - ("ksh" . "$(5!3!h!V(B") - ("GY" . "***GY***") ; Must check out later. - ;; special consonants - ("q" . "(53i(B") - ("K" . "(54i(B") - ("G" . "(55i(B") - ("z" . "(5:i(B") - ("f" . "(5Ii(B") - (".D" . "(5?i(B") - (".Dh" . "(5@i(B") - )) - -(defvar indian-itrans-vowel-sign-alist - '( - ;; Special treatment unique to IS 13194 Transliteration - ("" . "(5h(B") - ("a" . "") - ;; Matra (Vowel Sign) - ("aa" . "(5Z(B") - ("A" . "(5Z(B") - ("i" . "(5[(B") - ("ii" . "(5\(B") - ("I" . "(5\(B") - ("u" . "(5](B") - ("uu" . "(5^(B") - ("U" . "(5^(B") - ("R^i" . "(5_(B") ; These must be checked out later. - ("R^I" . "(5_i(B") - ("L^i" . "(5[i(B") - ("L^I" . "(5\i(B") - ("E" . "(5`(B") ; For transcription of non-Devanangri Languages. - ("e" . "(5a(B") - ("ai" . "(5b(B") - ;; ("e.c" . "(5c(B") ; Tentatively suppressed. - ("O" . "(5d(B") ; For transcription of non-Devanagari Languages. - ("o" . "(5e(B") - ("au" . "(5f(B") - ;; ("o.c" . "(5g(B") ; Tentatively suppressed. - )) - -;; -;; Independent vowels and other signs. -;; - -(defvar indian-itrans-other-letters-alist - '( - ("a" . "(5$(B") - ("aa" . "(5%(B") - ("A" . "(5%(B") - ("i" . "(5&(B") - ("ii" . "(5'(B") - ("I" . "(5'(B") - ("u" . "(5((B") - ("uu" . "(5)(B") - ("U" . "(5)(B") - ("R^i" . "(5*(B") - ("R^I" . "(5*i(B") - ("L^i" . "(5&i(B") - ("L^I" . "(5'i(B") - ("E" . "(5+(B") ; For transcription of non-Devanagari Languages. - ("e" . "(5,(B") - ("ai" . "(5-(B") - ;; ("e.c" . "(5.(B") ; Candra E - ("O" . "(5/(B") ; For transcription of non-Devanagari Languages. - ("o" . "(50(B") - ("au" . "(51(B") - ;; ("o.c" . "(52(B") ; Candra O - ("M" . "(5$(B") - ("H" . "(5#(B") - ("AUM" . "(5!i(B") - ("OM" . "(5!i(B") - (".r" . "(5Oh(B") - (".n" . "(5"(B") - (".N" . "(5!(B") - (".h" . "(5h(B") ; Halant - (".." . "(5j(B") - (".a" . "(5ji(B") ; Avagrah - ("0" . "(5q(B") - ("1" . "(5r(B") - ("2" . "(5s(B") - ("3" . "(5t(B") - ("4" . "(5u(B") - ("5" . "(5v(B") - ("6" . "(5w(B") - ("7" . "(5x(B") - ("8" . "(5y(B") - ("9" . "(5z(B") - )) - -;; Regular expression matching single Indian character represented -;; by ITRANS. - -(defvar indian-itrans-regexp - (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)") - (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]") - (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)") - (lpre "\\(") (rpre "\\)") (orre "\\|")) - (concat lpre misc rpre orre - lpre lpre consonant rpre "?" lpre vowel rpre rpre orre - lpre consonant rpre ))) - -;; -;; Regular expression matching single ITRANS unit for IS 13194 characters. -;; - -(defvar itrans-indian-regexp - (let ((vowel "[(5$(B-(52(B]") - (consonant "[(53(B-(5X(B]") - (matra "[(5Z(B-(5g(B]") - (misc "[(5q(B-(5z(B]") - (lpre "\\(") (rpre "\\)") (orre "\\|")) - (concat misc orre - lpre consonant matra "?" rpre orre - vowel))) - -;; -;; IS13194 - ITRANS conversion table for string matching above regexp. -;; - -(defvar indian-itrans-alist - (let ((cl indian-itrans-consonant-alist) - (ml indian-itrans-other-letters-alist) rules) - (while cl - (let ((vl indian-itrans-vowel-sign-alist)) - (while vl - (setq rules - (cons (cons (concat (car (car cl)) (car (car vl))) - (concat (cdr (car cl)) (cdr (car vl)))) - rules)) - (setq vl (cdr vl)))) - (setq cl (cdr cl))) - (while ml - (setq rules (cons (cons (car (car ml)) - (cdr (car ml))) - rules)) - (setq ml (cdr ml))) - rules)) - -;; -;; Utility program to convert from ITRANS to IS 13194 in specified region. -;; - -(defun indian-decode-itrans-region (from to) - "Convert `ITRANS' mnemonics of the current region to Indian characters. -When called from a program, expects two arguments, -positions (integers or markers) specifying the stretch of the region." - (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward indian-itrans-regexp nil t) - (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0))) - (ch (cdr (assoc itrans indian-itrans-alist)))) - (if ch - (progn - (delete-region (match-beginning 0) (match-end 0)) - (insert ch))))) - (goto-char (point-min)) - (while (re-search-forward "\\((5h(B\\)[^\\c0]" nil t) - (delete-region (match-beginning 1) (match-end 1))))) - -;; -;; Utility program to convert from IS 13194 to ITRANS in specified region. -;; - -(defun indian-encode-itrans-region (from to) - "Convert indian region to ITRANS mnemonics." - (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward itrans-indian-regexp nil t) - (let* ((indian (buffer-substring (match-beginning 0) (match-end 0))) - (ch (car (rassoc indian indian-itrans-alist)))) - (if ch - (progn - (delete-region (match-beginning 0) (match-end 0)) - (insert ch))))) - (goto-char (point-min)))) +(make-coding-system + 'in-is13194 2 ?D + "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)" + '(ascii indian-is13194 nil nil + nil ascii-eol) + '((safe-charsets ascii indian-is13194) + (post-read-conversion . in-is13194-post-read-conversion) + (pre-write-conversion . in-is13194-pre-write-conversion))) + +(defvar indian-script-table + '[ + devanagari + sanskrit + bengali + tamil + telugu + assamese + oriya + kannada + malayalam + gujarati + punjabi + ] + "Vector of Indian script names.") + +(let ((len (length indian-script-table)) + (i 0)) + (while (< i len) + (put (aref indian-script-table i) 'indian-glyph-code-offset (* 256 i)) + (setq i (1+ i)))) + +(defvar indian-default-script 'devanagari + "Default script for Indian languages. +Each Indian language environment sets this value +to one of `indian-script-table' (which see). +The default value is `devanagari'.") + +(defun indian-glyph-char (index &optional script) + "Return character of charset `indian-glyph' made from glyph index INDEX. +The variable `indian-default-script' specifies the script of the glyph. +Optional argument SCRIPT, if non-nil, overrides `indian-default-script'. +See also the function `indian-char-glyph'." + (or script + (setq script indian-default-script)) + (let ((offset (get script 'indian-glyph-code-offset))) + (or (integerp offset) + (error "Invalid script name: %s" script)) + (or (and (>= index 0) (< index 256)) + (error "Invalid glyph index: %d" index)) + (setq index (+ offset index)) + (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) + +(defvar indian-glyph-max-char + (indian-glyph-char + 255 (aref indian-script-table (1- (length indian-script-table)))) + "The maximum valid code of characters in the charset `indian-glyph'") + +(defun indian-char-glyph (char) + "Return information about the glphy code for CHAR of `indian-glyph' charset. +The value is (INDEX . SCRIPT), where INDEX is the glyph index +in the font that Indian script name SCRIPT specifies. +See also the function `indian-glyph-char'." + (let ((split (split-char char)) + code) + (or (eq (car split) 'indian-glyph) + (error "Charset of `%c' is not indian-glyph" char)) + (or (<= char indian-glyph-max-char) + (error "Invalid indian-glyph char: %d" char)) + (setq code (+ (* (- (nth 1 split) 32) 96) (nth 2 split) -32)) + (cons (% code 256) (aref indian-script-table (/ code 256))))) + +(define-ccl-program ccl-encode-indian-glyph-font + `(0 + ;; Shorten (r1 = (((((r1 - 32) * 96) + r2) - 32) % 256)) + (r1 = ((((r1 * 96) + r2) - ,(+ (* 32 96) 32)) % 256)))) + +(setq font-ccl-encoder-alist + (cons (cons "-CDAC" 'ccl-encode-indian-glyph-font) + font-ccl-encoder-alist)) + +(setq font-ccl-encoder-alist + (cons '("ISO10646.*-1" . ccl-encode-unicode-font) + font-ccl-encoder-alist)) (provide 'indian) - + ;;; indian.el ends here |