summaryrefslogtreecommitdiff
path: root/lisp/language
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/language')
-rw-r--r--lisp/language/ethio-util.el2
-rw-r--r--lisp/language/hebrew.el188
-rw-r--r--lisp/language/misc-lang.el18
-rw-r--r--lisp/language/tai-viet.el10
-rw-r--r--lisp/language/tv-util.el3
5 files changed, 203 insertions, 18 deletions
diff --git a/lisp/language/ethio-util.el b/lisp/language/ethio-util.el
index 6aa316c8820..a2577ced1b0 100644
--- a/lisp/language/ethio-util.el
+++ b/lisp/language/ethio-util.el
@@ -869,7 +869,7 @@ Otherwise, [0-9A-F]."
(goto-char (point-min))
(while (re-search-forward "[ሀ-፼]" nil t)
(setq ucode (preceding-char))
- (delete-backward-char 1)
+ (delete-char -1)
(insert
(format (if ethio-java-save-lowercase "\\u%4x" "\\u%4X")
ucode)))))
diff --git a/lisp/language/hebrew.el b/lisp/language/hebrew.el
index 3ff2538469d..ad079c2d70b 100644
--- a/lisp/language/hebrew.el
+++ b/lisp/language/hebrew.el
@@ -1,4 +1,4 @@
-;;; hebrew.el --- support for Hebrew -*- coding: iso-2022-7bit; no-byte-compile: t -*-
+;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*-
;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; Free Software Foundation, Inc.
@@ -46,28 +46,27 @@
(define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit)
;; These are for Explicit and Implicit directionality information, as
-;; defined in RFC 1556. We don't yet support directional information
-;; in bidi languages, so these aliases are a lie, especially as far as
-;; iso-8859-8-e is concerned. FIXME.
+;; defined in RFC 1556.
(define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit)
(define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit)
(set-language-info-alist
- "Hebrew" '((charset iso-8859-8)
+ "Hebrew" '((tutorial . "TUTORIAL.he")
+ (charset iso-8859-8)
(coding-priority hebrew-iso-8bit)
(coding-system hebrew-iso-8bit windows-1255 cp862)
(nonascii-translation . iso-8859-8)
(input-method . "hebrew")
(unibyte-display . hebrew-iso-8bit)
- (sample-text . "Hebrew ,Hylem(B")
- (documentation . "Right-to-left writing is not yet supported.")))
+ (sample-text . "Hebrew שלום")
+ (documentation . "Bidirectional editing is supported.")))
(set-language-info-alist
"Windows-1255" '((coding-priority windows-1255)
(coding-system windows-1255)
(documentation . "\
Support for Windows-1255 encoding, e.g. for Yiddish.
-Right-to-left writing is not yet supported.")))
+Bidirectional editing is supported.")))
(define-coding-system 'windows-1255
"windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)"
@@ -85,6 +84,179 @@ Right-to-left writing is not yet supported.")))
:mime-charset 'cp862)
(define-coding-system-alias 'ibm862 'cp862)
+;; Return a nested alist of Hebrew character sequences vs the
+;; corresponding glyph of FONT-OBJECT.
+(defun hebrew-font-get-precomposed (font-object)
+ (let ((precomposed (font-get font-object 'hebrew-precomposed))
+ ;; Vector of Hebrew precomposed characters.
+ (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31
+ #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A
+ #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46
+ #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E])
+ ;; Vector of decomposition character sequences corresponding
+ ;; to the above vector.
+ (decomposed
+ [[#x05E9 #x05C1]
+ [#x05E9 #x05C2]
+ [#x05E9 #x05BC #x05C1]
+ [#x05E9 #x05BC #x05C2]
+ [#x05D0 #x05B7]
+ [#x05D0 #x05B8]
+ [#x05D0 #x05BC]
+ [#x05D1 #x05BC]
+ [#x05D2 #x05BC]
+ [#x05D3 #x05BC]
+ [#x05D4 #x05BC]
+ [#x05D5 #x05BC]
+ [#x05D6 #x05BC]
+ [#x05D8 #x05BC]
+ [#x05D9 #x05BC]
+ [#x05DA #x05BC]
+ [#x05DB #x05BC]
+ [#x05DC #x05BC]
+ [#x05DE #x05BC]
+ [#x05E0 #x05BC]
+ [#x05E1 #x05BC]
+ [#x05E3 #x05BC]
+ [#x05E4 #x05BC]
+ [#x05E6 #x05BC]
+ [#x05E7 #x05BC]
+ [#x05E8 #x05BC]
+ [#x05E9 #x05BC]
+ [#x05EA #x05BC]
+ [#x05D5 #x05B9]
+ [#x05D1 #x05BF]
+ [#x05DB #x05BF]
+ [#x05E4 #x05BF]]))
+ (unless precomposed
+ (setq precomposed (list t))
+ (let ((gvec (font-get-glyphs font-object 0 (length chars) chars)))
+ (dotimes (i (length chars))
+ (if (aref gvec i)
+ (set-nested-alist (aref decomposed i) (aref gvec i)
+ precomposed))))
+ ;; Cache the result in FONT-OBJECT's property.
+ (font-put font-object 'hebrew-precomposed precomposed))
+ precomposed))
+
+;; Composition function for hebrew. GSTRING is made of a Hebrew base
+;; character followed by Hebrew diacritical marks, or is made of
+;; single Hebrew diacritical mark. Adjust GSTRING to display that
+;; sequence properly. The basic strategy is:
+;;
+;; (1) If there's single diacritical, add padding space to the left
+;; and right of the glyph.
+;;
+;; (2) If the font has OpenType features for Hebrew, ask the OTF
+;; driver the whole work.
+;;
+;; (3) If the font has precomposed glyphs, use them as far as
+;; possible. Adjust the remaining glyphs artificially.
+
+(defun hebrew-shape-gstring (gstring)
+ (let* ((font (lgstring-font gstring))
+ (otf (font-get font :otf))
+ (nchars (lgstring-char-len gstring))
+ header nglyphs base-width glyph precomposed val idx)
+ (cond
+ ((= nchars 1)
+ ;; Independent diacritical mark. Add padding space to left or
+ ;; right so that the glyph doesn't overlap with the surrounding
+ ;; chars.
+ (setq glyph (lgstring-glyph gstring 0))
+ (let ((width (lglyph-width glyph))
+ bearing)
+ (if (< (setq bearing (lglyph-lbearing glyph)) 0)
+ (lglyph-set-adjustment glyph bearing 0 (- width bearing)))
+ (if (> (setq bearing (lglyph-rbearing glyph)) width)
+ (lglyph-set-adjustment glyph 0 0 bearing))))
+
+ ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf)))
+ ;; FONT has OpenType features for Hebrew.
+ (font-shape-gstring gstring))
+
+ (t
+ ;; FONT doesn't have OpenType features for Hebrew.
+ ;; Try a precomposed glyph.
+ ;; Now GSTRING is in this form:
+ ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...]
+ (setq precomposed (hebrew-font-get-precomposed font)
+ header (lgstring-header gstring)
+ val (lookup-nested-alist header precomposed nil 1))
+ (if (and (consp val) (vectorp (car val)))
+ ;; All characters can be displayed by a single precomposed glyph.
+ ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...]
+ (let ((glyph (copy-sequence (car val))))
+ (lglyph-set-from-to glyph 0 (1- nchars))
+ (lgstring-set-glyph gstring 0 glyph)
+ (lgstring-set-glyph gstring 1 nil))
+ (if (and (integerp val) (> val 2)
+ (setq glyph (lookup-nested-alist header precomposed val 1))
+ (consp glyph) (vectorp (car glyph)))
+ ;; The first (1- VAL) characters can be displayed by a
+ ;; precomposed glyph. Provided that VAL is 3, the first
+ ;; two glyphs should be replaced by the precomposed glyph.
+ ;; In that case, reform GSTRING to:
+ ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...]
+ (let* ((ncmp (1- val)) ; number of composed glyphs
+ (diff (1- ncmp))) ; number of reduced glyphs
+ (setq glyph (copy-sequence (car glyph)))
+ (lglyph-set-from-to glyph 0 (1- nchars))
+ (lgstring-set-glyph gstring 0 glyph)
+ (setq idx ncmp)
+ (while (< idx nchars)
+ (setq glyph (lgstring-glyph gstring idx))
+ (lglyph-set-from-to glyph 0 (1- nchars))
+ (lgstring-set-glyph gstring (- idx diff) glyph)
+ (setq idx (1+ idx)))
+ (lgstring-set-glyph gstring (- idx diff) nil)
+ (setq idx (- ncmp diff)
+ nglyphs (- nchars diff)))
+ (setq glyph (lgstring-glyph gstring 0))
+ (lglyph-set-from-to glyph 0 (1- nchars))
+ (setq idx 1 nglyphs nchars))
+ ;; Now IDX is an index to the first non-precomposed glyph.
+ ;; Adjust positions of the remaining glyphs artificially.
+ (setq base-width (lglyph-width (lgstring-glyph gstring 0)))
+ (while (< idx nglyphs)
+ (setq glyph (lgstring-glyph gstring idx))
+ (lglyph-set-from-to glyph 0 (1- nchars))
+ (if (>= (lglyph-lbearing glyph) (lglyph-width glyph))
+ ;; It seems that this glyph is designed to be rendered
+ ;; before the base glyph.
+ (lglyph-set-adjustment glyph (- base-width) 0 0)
+ (if (>= (lglyph-lbearing glyph) 0)
+ ;; Align the horizontal center of this glyph to the
+ ;; horizontal center of the base glyph.
+ (let ((width (- (lglyph-rbearing glyph)
+ (lglyph-lbearing glyph))))
+ (lglyph-set-adjustment glyph
+ (- (/ (- base-width width) 2)
+ (lglyph-lbearing glyph)
+ base-width) 0 0))))
+ (setq idx (1+ idx))))))
+ gstring))
+
+(let* ((base "[\u05D0-\u05F2]")
+ (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+")
+ (pattern1 (concat base combining))
+ (pattern2 (concat base "\u200D" combining)))
+ (set-char-table-range
+ composition-function-table '(#x591 . #x5C7)
+ (list (vector pattern2 3 'hebrew-shape-gstring)
+ (vector pattern2 2 'hebrew-shape-gstring)
+ (vector pattern1 1 'hebrew-shape-gstring)
+ [nil 0 hebrew-shape-gstring]))
+ ;; Exclude non-combining characters.
+ (set-char-table-range
+ composition-function-table #x5BE nil)
+ (set-char-table-range
+ composition-function-table #x5C0 nil)
+ (set-char-table-range
+ composition-function-table #x5C3 nil)
+ (set-char-table-range
+ composition-function-table #x5C6 nil))
+
(provide 'hebrew)
;; arch-tag: 3ca04f32-3f1e-498e-af46-8267498ba5d9
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
index eded44e6613..e099ea542bb 100644
--- a/lisp/language/misc-lang.el
+++ b/lisp/language/misc-lang.el
@@ -40,8 +40,9 @@
IPA is International Phonetic Alphabet for English, French, German
and Italian.")))
-;; This is for Arabic. But, as we still don't have Arabic language
-;; support, we at least define a coding system here.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Arabic
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-coding-system 'iso-8859-6
"ISO-8859-6 based encoding (MIME:ISO-8859-6)."
@@ -58,6 +59,19 @@ and Italian.")))
:mime-charset 'windows-1256)
(define-coding-system-alias 'cp1256 'windows-1256)
+(set-language-info-alist
+ "Arabic" '((charset unicode)
+ (coding-system utf-8 iso-8859-6 windows-1256)
+ (coding-priority utf-8 iso-8859-6 windows-1256)
+ (input-method . "arabic")
+ (sample-text . "Arabic السّلام عليكم")
+ (documentation . "Bidirectional editing is supported.")))
+
+(set-char-table-range
+ composition-function-table
+ '(#x600 . #x6FF)
+ (list ["[\u0600-\u06FF]+" 0 font-shape-gstring]))
+
(provide 'misc-lang)
;; arch-tag: 6953585c-1a1a-4c09-be82-a2518afb6074
diff --git a/lisp/language/tai-viet.el b/lisp/language/tai-viet.el
index a9b44e49dd6..a7029c659a8 100644
--- a/lisp/language/tai-viet.el
+++ b/lisp/language/tai-viet.el
@@ -37,7 +37,7 @@
(coding-system utf-8)
(coding-priority utf-8)
(input-method . "tai-sonla")
- (sample-text . "TaiViet (ꪁꪫꪱꪣ ꪽꪕ)\t\tꪅꪰꪙ꫃ ꪨꪮ꫃ ꪁꪫꪱ / ꪅꪾ ꪨ� ꪁꪫꪱ")
+ (sample-text . "TaiViet (ꪁꪫꪱꪣ ꪼꪕ)\t\tꪅꪰꪙꫂ ꪨꪮꫂ ꪁꪫꪱ / ꪅꪽ ꪨꪷ ꪁꪫꪱ")
(documentation . "\
TaiViet refers to the Tai language used by Tai people in
Vietnam, and also refers to the script used for this language.
@@ -45,15 +45,15 @@ Both the script and language have the same origin as that of Thai
language/script used in Thailand, but now they differ from each
other in a significant way (especially the scripts are).
-The language name is spelled as \"ꪁꪫꪱꪣ ꪽꪕ\", and the script name is
-spelled as \"ꪎ� ꪽꪕ\" in the modern form, \"ꪎꪴ ꪽꪕ\" in the traditional
-from.
+The language name is spelled as \"ꪁꪫꪱꪣ ꪼꪕ\", and the script name is
+spelled as \"ꪎ ꪼꪕ\" in the modern form, \"ꪎꪳ ꪼꪕ\" in the traditional
+form.
As the proposal for TaiViet script to the Unicode is still on
the progress, we use the Private Use Area for TaiViet
characters (U+F000..U+F07E). A TaiViet font encoded accordingly
is available at this web page:
- http://www.m17n.org/TaiViet/
+ http://www.m17n.org/viettai/
")))
(provide 'tai-viet)
diff --git a/lisp/language/tv-util.el b/lisp/language/tv-util.el
index 1e6a4cee281..15eb51a3c58 100644
--- a/lisp/language/tv-util.el
+++ b/lisp/language/tv-util.el
@@ -24,8 +24,7 @@
;;; Code
;; Regexp matching with a sequence of Tai Viet characters.
-(defconst tai-viet-re
- (format "[\xaa80-\xaac2\xaadb-\xaadf-]+"))
+(defconst tai-viet-re "[\xaa80-\xaac2\xaadb-\xaadf]+")
;; Char-table of information about glyph type of Tai Viet characters.
(defconst tai-viet-glyph-info