aboutsummaryrefslogtreecommitdiffstats
path: root/vncviewer/keysym2ucs.c
blob: 6607e3065c0097e56e163ada323bf5f773ead541 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/* $XFree86$
 * This module converts keysym values into the corresponding ISO 10646
 * (UCS, Unicode) values.
 *
 * The keysym -> UTF-8 conversion will hopefully one day be provided
 * by Xlib via XmbLookupString() and should ideally not have to be
 * done in X applications. But we are not there yet.
 *
 * We allow to represent any UCS character in the range U-00000000 to
 * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff.
 * This admittedly does not cover the entire 31-bit space of UCS, but
 * it does cover all of the characters up to U-10FFFF, which can be
 * represented by UTF-16, and more, and it is very unlikely that higher
 * UCS codes will ever be assigned by ISO. So to get Unicode character
 * U+ABCD you can directly use keysym 0x0100abcd.
 *
 * Author: Markus G. Kuhn <http://www.cl.cam.ac.uk/~mgk25/>,
 *         University of Cambridge, April 2001
 *
 * Special thanks to Richard Verhoeven <river@win.tue.nl> for preparing
 * an initial draft of the mapping table.
 *
 * This software is in the public domain. Share and enjoy!
 */

#include "keysym2ucs.h"
#include "keyucsmap.h"

#define NoSymbol 0

struct combiningpair {
  unsigned short spacing;
  unsigned short combining;
};

static const struct codepair deadtab[] = {
  { 0xfe50, 0x0300 }, /*                  dead_grave ` COMBINING GRAVE ACCENT */
  { 0xfe51, 0x0301 }, /*                  dead_acute ´ COMBINING ACUTE ACCENT */
  { 0xfe52, 0x0302 }, /*             dead_circumflex ^ COMBINING CIRCUMFLEX ACCENT */
  { 0xfe53, 0x0303 }, /*                  dead_tilde ~ COMBINING TILDE */
  { 0xfe54, 0x0304 }, /*                 dead_macron ¯ COMBINING MACRON */
  { 0xfe55, 0x0306 }, /*                  dead_breve ˘ COMBINING BREVE */
  { 0xfe56, 0x0307 }, /*               dead_abovedot ˙ COMBINING DOT ABOVE */
  { 0xfe57, 0x0308 }, /*              dead_diaeresis ¨ COMBINING DIAERESIS */
  { 0xfe58, 0x030a }, /*              dead_abovering ˚ COMBINING RING ABOVE */
  { 0xfe59, 0x030b }, /*            dead_doubleacute ˝ COMBINING DOUBLE ACUTE ACCENT */
  { 0xfe5a, 0x030c }, /*                  dead_caron ˇ COMBINING CARON */
  { 0xfe5b, 0x0327 }, /*                dead_cedilla ¸ COMBINING CEDILLA */
  { 0xfe5c, 0x0328 }, /*                 dead_ogonek ¸ COMBINING OGONEK */
  { 0xfe5d, 0x0345 }, /*                   dead_iota ͺ COMBINING GREEK YPOGEGRAMMENI */
  { 0xfe5e, 0x3099 }, /*           dead_voiced_sound ゛ COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK */
  { 0xfe5f, 0x309a }, /*       dead_semivoiced_sound ゜ COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
  { 0xfe60, 0x0323 }, /*               dead_belowdot . COMBINING DOT BELOW */
};

static const struct combiningpair combinetab[] = {
  { 0x0060, 0x0300 }, /*                GRAVE ACCENT ` COMBINING GRAVE ACCENT */
  { 0x00b4, 0x0301 }, /*                ACUTE ACCENT ´ COMBINING ACUTE ACCENT */
  { 0x0027, 0x0301 }, /*                  APOSTROPHE ' COMBINING ACUTE ACCENT */
  { 0x0384, 0x0301 }, /*                 GREEK TONOS ΄ COMBINING ACUTE ACCENT */
  { 0x005e, 0x0302 }, /*           CIRCUMFLEX ACCENT ^ COMBINING CIRCUMFLEX ACCENT */
  { 0x007e, 0x0303 }, /*                       TILDE ~ COMBINING TILDE */
  { 0x00af, 0x0304 }, /*                      MACRON ¯ COMBINING MACRON */
  { 0x02d8, 0x0306 }, /*                       BREVE ˘ COMBINING BREVE */
  { 0x02d9, 0x0307 }, /*                   DOT ABOVE ˙ COMBINING DOT ABOVE */
  { 0x00a8, 0x0308 }, /*                   DIAERESIS ¨ COMBINING DIAERESIS */
  { 0x0022, 0x0308 }, /*              QUOTATION MARK " COMBINING DIAERESIS */
  { 0x02da, 0x030a }, /*                  RING ABOVE ˚ COMBINING RING ABOVE */
  { 0x00b0, 0x030a }, /*                 DEGREE SIGN ° COMBINING RING ABOVE */
  { 0x02dd, 0x030b }, /*         DOUBLE ACUTE ACCENT ˝ COMBINING DOUBLE ACUTE ACCENT */
  { 0x02c7, 0x030c }, /*                       CARON ˇ COMBINING CARON */
  { 0x00b8, 0x0327 }, /*                     CEDILLA ¸ COMBINING CEDILLA */
  { 0x02db, 0x0328 }, /*                      OGONEK ¸ COMBINING OGONEK */
  { 0x037a, 0x0345 }, /*         GREEK YPOGEGRAMMENI ͺ COMBINING GREEK YPOGEGRAMMENI */
  { 0x309b, 0x3099 }, /* KATAKANA-HIRAGANA VOICED SOUND MARK ゛COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK */
  { 0x309c, 0x309a }, /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK ゜COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
  { 0x002e, 0x0323 }, /*                   FULL STOP . COMBINING DOT BELOW */
  { 0x0385, 0x0344 }, /*       GREEK DIALYTIKA TONOS ΅ COMBINING GREEK DIALYTIKA TONOS */
};

static unsigned find_ucs(unsigned keysym,
                         const struct codepair *table, int entries)
{
  int min = 0;
  int max = entries - 1;
  int mid;

  /* binary search in table */
  while (max >= min) {
    mid = (min + max) / 2;
    if (table[mid].keysym < keysym)
      min = mid + 1;
    else if (table[mid].keysym > keysym)
      max = mid - 1;
    else {
      /* found it */
      return table[mid].ucs;
    }
  }

  return -1;
}

static unsigned find_sym(unsigned ucs,
                         const struct codepair *table, int entries)
{
  int cur = 0;
  int max = entries - 1;

  /* linear search in table */
  while (cur <= max) {
    if (table[cur].ucs == ucs)
      return table[cur].keysym;
    cur++;
  }

  return NoSymbol;
}

unsigned keysym2ucs(unsigned keysym)
{
  unsigned ucs;

  /* first check for Latin-1 characters (1:1 mapping) */
  if ((keysym >= 0x0020 && keysym <= 0x007e) ||
      (keysym >= 0x00a0 && keysym <= 0x00ff))
    return keysym;

  /* also check for directly encoded 24-bit UCS characters */
  if ((keysym & 0xff000000) == 0x01000000)
    return keysym & 0x00ffffff;

  /* normal key? */
  ucs = find_ucs(keysym, keysymtab,
                 sizeof(keysymtab) / sizeof(struct codepair));
  if (ucs != (unsigned)-1)
    return ucs;

  /* dead key? */
  ucs = find_ucs(keysym, deadtab,
                 sizeof(deadtab) / sizeof(struct codepair));
  if (ucs != (unsigned)-1)
    return ucs;

  /* no matching Unicode value found */
  return -1;
}

unsigned ucs2keysym(unsigned ucs)
{
  unsigned keysym;

  /* first check for Latin-1 characters (1:1 mapping) */
  if ((ucs >= 0x0020 && ucs <= 0x007e) ||
      (ucs >= 0x00a0 && ucs <= 0x00ff))
    return ucs;

  /* normal key? */
  keysym = find_sym(ucs, keysymtab,
                    sizeof(keysymtab) / sizeof(struct codepair));
  if (keysym != NoSymbol)
    return keysym;

  /* dead key? */
  keysym = find_sym(ucs, deadtab,
                    sizeof(deadtab) / sizeof(struct codepair));
  if (keysym != NoSymbol)
    return keysym;

  /* us the directly encoded 24-bit UCS character */
  if ((ucs & 0xff000000) == 0)
    return ucs | 0x01000000;

  /* no matching keysym value found */
  return NoSymbol;
}

unsigned ucs2combining(unsigned spacing)
{
  int cur = 0;
  int max = sizeof(combinetab) / sizeof(struct combiningpair) - 1;

  /* linear search in table */
  while (cur <= max) {
    if (combinetab[cur].spacing == spacing)
      return combinetab[cur].combining;
    cur++;
  }

  /* no matching Unicode value found */
  return -1;
}