/* $XFree86$
* This module converts keysym values into the corresponding ISO 10646
* (UCS, Unicode) values.
*
* The keysym -> UTF-8 conversion will hopefully one day be provided
* by Xlib via XmbLookupString() and should ideally not have to be
* done in X applications. But we are not there yet.
*
* We allow to represent any UCS character in the range U-00000000 to
* U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff.
* This admittedly does not cover the entire 31-bit space of UCS, but
* it does cover all of the characters up to U-10FFFF, which can be
* represented by UTF-16, and more, and it is very unlikely that higher
* UCS codes will ever be assigned by ISO. So to get Unicode character
* U+ABCD you can directly use keysym 0x0100abcd.
*
* Author: Markus G. Kuhn ,
* University of Cambridge, April 2001
*
* Special thanks to Richard Verhoeven for preparing
* an initial draft of the mapping table.
*
* This software is in the public domain. Share and enjoy!
*/
#include "keysym2ucs.h"
#include "keyucsmap.h"
#define NoSymbol 0
struct combiningpair {
unsigned short spacing;
unsigned short combining;
};
static const struct codepair deadtab[] = {
{ 0xfe50, 0x0300 }, /* dead_grave ` COMBINING GRAVE ACCENT */
{ 0xfe51, 0x0301 }, /* dead_acute ´ COMBINING ACUTE ACCENT */
{ 0xfe52, 0x0302 }, /* dead_circumflex ^ COMBINING CIRCUMFLEX ACCENT */
{ 0xfe53, 0x0303 }, /* dead_tilde ~ COMBINING TILDE */
{ 0xfe54, 0x0304 }, /* dead_macron ¯ COMBINING MACRON */
{ 0xfe55, 0x0306 }, /* dead_breve ˘ COMBINING BREVE */
{ 0xfe56, 0x0307 }, /* dead_abovedot ˙ COMBINING DOT ABOVE */
{ 0xfe57, 0x0308 }, /* dead_diaeresis ¨ COMBINING DIAERESIS */
{ 0xfe58, 0x030a }, /* dead_abovering ˚ COMBINING RING ABOVE */
{ 0xfe59, 0x030b }, /* dead_doubleacute ˝ COMBINING DOUBLE ACUTE ACCENT */
{ 0xfe5a, 0x030c }, /* dead_caron ˇ COMBINING CARON */
{ 0xfe5b, 0x0327 }, /* dead_cedilla ¸ COMBINING CEDILLA */
{ 0xfe5c, 0x0328 }, /* dead_ogonek ¸ COMBINING OGONEK */
{ 0xfe5d, 0x0345 }, /* dead_iota ͺ COMBINING GREEK YPOGEGRAMMENI */
{ 0xfe5e, 0x3099 }, /* dead_voiced_sound ゛ COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK */
{ 0xfe5f, 0x309a }, /* dead_semivoiced_sound ゜ COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
{ 0xfe60, 0x0323 }, /* dead_belowdot . COMBINING DOT BELOW */
};
static const struct combiningpair combinetab[] = {
{ 0x0060, 0x0300 }, /* GRAVE ACCENT ` COMBINING GRAVE ACCENT */
{ 0x00b4, 0x0301 }, /* ACUTE ACCENT ´ COMBINING ACUTE ACCENT */
{ 0x0027, 0x0301 }, /* APOSTROPHE ' COMBINING ACUTE ACCENT */
{ 0x0384, 0x0301 }, /* GREEK TONOS ΄ COMBINING ACUTE ACCENT */
{ 0x005e, 0x0302 }, /* CIRCUMFLEX ACCENT ^ COMBINING CIRCUMFLEX ACCENT */
{ 0x007e, 0x0303 }, /* TILDE ~ COMBINING TILDE */
{ 0x00af, 0x0304 }, /* MACRON ¯ COMBINING MACRON */
{ 0x02d8, 0x0306 }, /* BREVE ˘ COMBINING BREVE */
{ 0x02d9, 0x0307 }, /* DOT ABOVE ˙ COMBINING DOT ABOVE */
{ 0x00a8, 0x0308 }, /* DIAERESIS ¨ COMBINING DIAERESIS */
{ 0x0022, 0x0308 }, /* QUOTATION MARK " COMBINING DIAERESIS */
{ 0x02da, 0x030a }, /* RING ABOVE ˚ COMBINING RING ABOVE */
{ 0x00b0, 0x030a }, /* DEGREE SIGN ° COMBINING RING ABOVE */
{ 0x02dd, 0x030b }, /* DOUBLE ACUTE ACCENT ˝ COMBINING DOUBLE ACUTE ACCENT */
{ 0x02c7, 0x030c }, /* CARON ˇ COMBINING CARON */
{ 0x00b8, 0x0327 }, /* CEDILLA ¸ COMBINING CEDILLA */
{ 0x02db, 0x0328 }, /* OGONEK ¸ COMBINING OGONEK */
{ 0x037a, 0x0345 }, /* GREEK YPOGEGRAMMENI ͺ COMBINING GREEK YPOGEGRAMMENI */
{ 0x309b, 0x3099 }, /* KATAKANA-HIRAGANA VOICED SOUND MARK ゛COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK */
{ 0x309c, 0x309a }, /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK ゜COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
{ 0x002e, 0x0323 }, /* FULL STOP . COMBINING DOT BELOW */
{ 0x0385, 0x0344 }, /* GREEK DIALYTIKA TONOS ΅ COMBINING GREEK DIALYTIKA TONOS */
};
static unsigned find_ucs(unsigned keysym,
const struct codepair *table, int entries)
{
int min = 0;
int max = entries - 1;
int mid;
/* binary search in table */
while (max >= min) {
mid = (min + max) / 2;
if (table[mid].keysym < keysym)
min = mid + 1;
else if (table[mid].keysym > keysym)
max = mid - 1;
else {
/* found it */
return table[mid].ucs;
}
}
return -1;
}
static unsigned find_sym(unsigned ucs,
const struct codepair *table, int entries)
{
int cur = 0;
int max = entries - 1;
/* linear search in table */
while (cur <= max) {
if (table[cur].ucs == ucs)
return table[cur].keysym;
cur++;
}
return NoSymbol;
}
unsigned keysym2ucs(unsigned keysym)
{
unsigned ucs;
/* first check for Latin-1 characters (1:1 mapping) */
if ((keysym >= 0x0020 && keysym <= 0x007e) ||
(keysym >= 0x00a0 && keysym <= 0x00ff))
return keysym;
/* also check for directly encoded 24-bit UCS characters */
if ((keysym & 0xff000000) == 0x01000000)
return keysym & 0x00ffffff;
/* normal key? */
ucs = find_ucs(keysym, keysymtab,
sizeof(keysymtab) / sizeof(struct codepair));
if (ucs != (unsigned)-1)
return ucs;
/* dead key? */
ucs = find_ucs(keysym, deadtab,
sizeof(deadtab) / sizeof(struct codepair));
if (ucs != (unsigned)-1)
return ucs;
/* no matching Unicode value found */
return -1;
}
unsigned ucs2keysym(unsigned ucs)
{
unsigned keysym;
/* first check for Latin-1 characters (1:1 mapping) */
if ((ucs >= 0x0020 && ucs <= 0x007e) ||
(ucs >= 0x00a0 && ucs <= 0x00ff))
return ucs;
/* normal key? */
keysym = find_sym(ucs, keysymtab,
sizeof(keysymtab) / sizeof(struct codepair));
if (keysym != NoSymbol)
return keysym;
/* dead key? */
keysym = find_sym(ucs, deadtab,
sizeof(deadtab) / sizeof(struct codepair));
if (keysym != NoSymbol)
return keysym;
/* us the directly encoded 24-bit UCS character */
if ((ucs & 0xff000000) == 0)
return ucs | 0x01000000;
/* no matching keysym value found */
return NoSymbol;
}
unsigned ucs2combining(unsigned spacing)
{
int cur = 0;
int max = sizeof(combinetab) / sizeof(struct combiningpair) - 1;
/* linear search in table */
while (cur <= max) {
if (combinetab[cur].spacing == spacing)
return combinetab[cur].combining;
cur++;
}
/* no matching Unicode value found */
return -1;
}