/* $XFree86$ * This module converts keysym values into the corresponding ISO 10646 * (UCS, Unicode) values. * * The keysym -> UTF-8 conversion will hopefully one day be provided * by Xlib via XmbLookupString() and should ideally not have to be * done in X applications. But we are not there yet. * * We allow to represent any UCS character in the range U-00000000 to * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff. * This admittedly does not cover the entire 31-bit space of UCS, but * it does cover all of the characters up to U-10FFFF, which can be * represented by UTF-16, and more, and it is very unlikely that higher * UCS codes will ever be assigned by ISO. So to get Unicode character * U+ABCD you can directly use keysym 0x0100abcd. * * Author: Markus G. Kuhn , * University of Cambridge, April 2001 * * Special thanks to Richard Verhoeven for preparing * an initial draft of the mapping table. * * This software is in the public domain. Share and enjoy! */ #include "keysym2ucs.h" #include "keyucsmap.h" #define NoSymbol 0 struct combiningpair { unsigned short spacing; unsigned short combining; }; static const struct codepair deadtab[] = { { 0xfe50, 0x0300 }, /* dead_grave ` COMBINING GRAVE ACCENT */ { 0xfe51, 0x0301 }, /* dead_acute ´ COMBINING ACUTE ACCENT */ { 0xfe52, 0x0302 }, /* dead_circumflex ^ COMBINING CIRCUMFLEX ACCENT */ { 0xfe53, 0x0303 }, /* dead_tilde ~ COMBINING TILDE */ { 0xfe54, 0x0304 }, /* dead_macron ¯ COMBINING MACRON */ { 0xfe55, 0x0306 }, /* dead_breve ˘ COMBINING BREVE */ { 0xfe56, 0x0307 }, /* dead_abovedot ˙ COMBINING DOT ABOVE */ { 0xfe57, 0x0308 }, /* dead_diaeresis ¨ COMBINING DIAERESIS */ { 0xfe58, 0x030a }, /* dead_abovering ˚ COMBINING RING ABOVE */ { 0xfe59, 0x030b }, /* dead_doubleacute ˝ COMBINING DOUBLE ACUTE ACCENT */ { 0xfe5a, 0x030c }, /* dead_caron ˇ COMBINING CARON */ { 0xfe5b, 0x0327 }, /* dead_cedilla ¸ COMBINING CEDILLA */ { 0xfe5c, 0x0328 }, /* dead_ogonek ¸ COMBINING OGONEK */ { 0xfe5d, 0x0345 }, /* dead_iota ͺ COMBINING GREEK YPOGEGRAMMENI */ { 0xfe5e, 0x3099 }, /* dead_voiced_sound ゛ COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK */ { 0xfe5f, 0x309a }, /* dead_semivoiced_sound ゜ COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ { 0xfe60, 0x0323 }, /* dead_belowdot . COMBINING DOT BELOW */ }; static const struct combiningpair combinetab[] = { { 0x0060, 0x0300 }, /* GRAVE ACCENT ` COMBINING GRAVE ACCENT */ { 0x00b4, 0x0301 }, /* ACUTE ACCENT ´ COMBINING ACUTE ACCENT */ { 0x0027, 0x0301 }, /* APOSTROPHE ' COMBINING ACUTE ACCENT */ { 0x0384, 0x0301 }, /* GREEK TONOS ΄ COMBINING ACUTE ACCENT */ { 0x005e, 0x0302 }, /* CIRCUMFLEX ACCENT ^ COMBINING CIRCUMFLEX ACCENT */ { 0x007e, 0x0303 }, /* TILDE ~ COMBINING TILDE */ { 0x00af, 0x0304 }, /* MACRON ¯ COMBINING MACRON */ { 0x02d8, 0x0306 }, /* BREVE ˘ COMBINING BREVE */ { 0x02d9, 0x0307 }, /* DOT ABOVE ˙ COMBINING DOT ABOVE */ { 0x00a8, 0x0308 }, /* DIAERESIS ¨ COMBINING DIAERESIS */ { 0x0022, 0x0308 }, /* QUOTATION MARK " COMBINING DIAERESIS */ { 0x02da, 0x030a }, /* RING ABOVE ˚ COMBINING RING ABOVE */ { 0x00b0, 0x030a }, /* DEGREE SIGN ° COMBINING RING ABOVE */ { 0x02dd, 0x030b }, /* DOUBLE ACUTE ACCENT ˝ COMBINING DOUBLE ACUTE ACCENT */ { 0x02c7, 0x030c }, /* CARON ˇ COMBINING CARON */ { 0x00b8, 0x0327 }, /* CEDILLA ¸ COMBINING CEDILLA */ { 0x02db, 0x0328 }, /* OGONEK ¸ COMBINING OGONEK */ { 0x037a, 0x0345 }, /* GREEK YPOGEGRAMMENI ͺ COMBINING GREEK YPOGEGRAMMENI */ { 0x309b, 0x3099 }, /* KATAKANA-HIRAGANA VOICED SOUND MARK ゛COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK */ { 0x309c, 0x309a }, /* KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK ゜COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ { 0x002e, 0x0323 }, /* FULL STOP . COMBINING DOT BELOW */ { 0x0385, 0x0344 }, /* GREEK DIALYTIKA TONOS ΅ COMBINING GREEK DIALYTIKA TONOS */ }; static unsigned find_ucs(unsigned keysym, const struct codepair *table, int entries) { int min = 0; int max = entries - 1; int mid; /* binary search in table */ while (max >= min) { mid = (min + max) / 2; if (table[mid].keysym < keysym) min = mid + 1; else if (table[mid].keysym > keysym) max = mid - 1; else { /* found it */ return table[mid].ucs; } } return -1; } static unsigned find_sym(unsigned ucs, const struct codepair *table, int entries) { int cur = 0; int max = entries - 1; /* linear search in table */ while (cur <= max) { if (table[cur].ucs == ucs) return table[cur].keysym; cur++; } return NoSymbol; } unsigned keysym2ucs(unsigned keysym) { unsigned ucs; /* first check for Latin-1 characters (1:1 mapping) */ if ((keysym >= 0x0020 && keysym <= 0x007e) || (keysym >= 0x00a0 && keysym <= 0x00ff)) return keysym; /* also check for directly encoded 24-bit UCS characters */ if ((keysym & 0xff000000) == 0x01000000) return keysym & 0x00ffffff; /* normal key? */ ucs = find_ucs(keysym, keysymtab, sizeof(keysymtab) / sizeof(struct codepair)); if (ucs != (unsigned)-1) return ucs; /* dead key? */ ucs = find_ucs(keysym, deadtab, sizeof(deadtab) / sizeof(struct codepair)); if (ucs != (unsigned)-1) return ucs; /* no matching Unicode value found */ return -1; } unsigned ucs2keysym(unsigned ucs) { unsigned keysym; /* first check for Latin-1 characters (1:1 mapping) */ if ((ucs >= 0x0020 && ucs <= 0x007e) || (ucs >= 0x00a0 && ucs <= 0x00ff)) return ucs; /* normal key? */ keysym = find_sym(ucs, keysymtab, sizeof(keysymtab) / sizeof(struct codepair)); if (keysym != NoSymbol) return keysym; /* dead key? */ keysym = find_sym(ucs, deadtab, sizeof(deadtab) / sizeof(struct codepair)); if (keysym != NoSymbol) return keysym; /* us the directly encoded 24-bit UCS character */ if ((ucs & 0xff000000) == 0) return ucs | 0x01000000; /* no matching keysym value found */ return NoSymbol; } unsigned ucs2combining(unsigned spacing) { int cur = 0; int max = sizeof(combinetab) / sizeof(struct combiningpair) - 1; /* linear search in table */ while (cur <= max) { if (combinetab[cur].spacing == spacing) return combinetab[cur].combining; cur++; } /* no matching Unicode value found */ return -1; }