summaryrefslogtreecommitdiff
path: root/admin/unidata/emoji-zwj.awk
blob: 7ef0efafb11dd0740c8c73803687ddc6d0cee127 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/awk -f

## Copyright (C) 2020 Free Software Foundation, Inc.

## Author: Robert Pluim <rpluim@gmail.com>

## This file is part of GNU Emacs.

## GNU Emacs is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## GNU Emacs is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

### Commentary:

## This script takes as input Unicode's emoji-zwj-sequences.txt
## and produces output for Emacs's lisp/international/emoji-zwj.el.
## It also outputs the composition sequences for flags, UK flags, and
## skin tones which have been derived from emoji-sequences.txt by hand.

## For additional details, see <https://debbugs.gnu.org/39799#8>.

## Things to do after installing a new version of
## emoji-zwj-sequences.txt and emoji-sequences.txt
## Check the output against the old output.  See if there are any new
## composition sequences in emoji-sequences.txt that that need to be
## added Rebuild emacs, visit emoji-zwj-sequences.txt and
## emoji-sequences.txt and check that the various sequences are being
## composed properly.  Don't forget to install an appropriate font,
## such as Noto Color Emoji.

### Code:

/^[0-9A-F]/ {
    sub(/ *;.*/, "", $0)
    num = split($0, elts)
    if (ch[elts[1]] == "")
    {
        vec[elts[1]] = ""
        ch[elts[1]] = elts[1]
    }
     else
     {
         vec[elts[1]] = vec[elts[1]] "\n"
     }
     vec[elts[1]] = vec[elts[1]] "\""
    for (j = 1; j <= num; j++)
    {
        c = sprintf("\\N{U+%s}", elts[j])
        vec[elts[1]] = vec[elts[1]] c
    }
    vec[elts[1]] = vec[elts[1]] "\""
}

END {
     print ";;; emoji-zwj.el --- emoji zwj character composition table"
     print ";;; Automatically generated from admin/unidata/emoji-zwj-sequences.txt"
     print "(eval-when-compile (require 'regexp-opt))"
     print "(dolist (elt `("

     for (elt in ch)
    {
        printf("(#x%s .\n,(eval-when-compile (regexp-opt\n'(\n%s\n))))\n", elt, vec[elt])
    }
     print "))"
     print "  (set-char-table-range composition-function-table"
     print "                        (car elt)"
     print "                        (nconc (char-table-range composition-function-table (car elt))"
     print "                               (list (vector (cdr elt)"
     print "                                             0"
     print "                                             'compose-gstring-for-graphic))))"

     print ";; The following three blocks are derived by hand from emoji-sequences.txt"
     print ";; FIXME: add support for Emoji_Keycap_Sequence once we learn how to respect FE0F/VS-16"
     print ";; for ASCII characters."

     print ";; Flags"
     print "(set-char-table-range composition-function-table"
     print "                      '(#x1F1E6 . #x1F1FF)"
     print "                      (nconc (char-table-range composition-function-table '(#x1F1E6 . #x1F1FF))"
     print "                             (list (vector \"[\\U0001F1E6-\\U0001F1FF][\\U0001F1E6-\\U0001F1FF]\""
     print "                                           0"
     print "                                    'compose-gstring-for-graphic))))"

     print ";; UK Flags"
     print "(set-char-table-range composition-function-table"
     print "                      #x1F3F4"
     print "                      (nconc (char-table-range composition-function-table #x1F3F4)"
     print "                             (list (vector \"\\U0001F3F4\\U000E0067\\U000E0062\\(?:\\U000E0065\\U000E006E\\U000E0067\\|\\U000E0073\\U000E0063\\U000E0074\\|\\U000E0077\\U000E006C\\U000E0073\\)\\U000E007F\""
     print "                                           0"
     print "                                    'compose-gstring-for-graphic))))"

     print ";; Skin tones"
     print "(set-char-table-range composition-function-table"
     print "                      '(#x1F3FB . #x1F3FF)"
     print "                      (nconc (char-table-range composition-function-table '(#x1F3FB . #x1F3FF))"
     print "                             (list (vector \".[\\U0001F3FB-\\U0001F3FF]\""
     print "                                           1"
     print "                                    'compose-gstring-for-graphic)))))"

     print "\n"
     print "(provide 'emoji-zwj)"
}