From 56fa7821560a60db39195e8c81d16b46e8f972c2 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Fri, 22 Jan 2016 16:40:59 +0100 Subject: [PATCH] Add UTF-8 to/from ISO 8859-1 conversion routines We convert between UTF-8 and ISO 8859-1 (latin 1) in several places so create some common routines for this. --- common/rfb/util.cxx | 166 +++++++++++++++++++++++++++++ common/rfb/util.h | 9 ++ unix/xserver/hw/vnc/RFBGlue.cc | 18 ++++ unix/xserver/hw/vnc/RFBGlue.h | 4 + unix/xserver/hw/vnc/vncSelection.c | 73 ++----------- vncviewer/Viewport.cxx | 42 +++----- vncviewer/Viewport.h | 4 +- 7 files changed, 220 insertions(+), 96 deletions(-) diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx index f43a9453..deb68ca1 100644 --- a/common/rfb/util.cxx +++ b/common/rfb/util.cxx @@ -163,6 +163,172 @@ namespace rfb { return buffer; } + size_t ucs4ToUTF8(unsigned src, char* dst) { + if (src < 0x80) { + *dst++ = src; + *dst++ = '\0'; + return 1; + } else if (src < 0x800) { + *dst++ = 0xc0 | (src >> 6); + *dst++ = 0x80 | (src & 0x3f); + *dst++ = '\0'; + return 2; + } else if (src < 0x10000) { + *dst++ = 0xe0 | (src >> 12); + *dst++ = 0x80 | ((src >> 6) & 0x3f); + *dst++ = 0x80 | (src & 0x3f); + *dst++ = '\0'; + return 3; + } else if (src < 0x110000) { + *dst++ = 0xf0 | (src >> 18); + *dst++ = 0x80 | ((src >> 12) & 0x3f); + *dst++ = 0x80 | ((src >> 6) & 0x3f); + *dst++ = 0x80 | (src & 0x3f); + *dst++ = '\0'; + return 4; + } else { + return ucs4ToUTF8(0xfffd, dst); + } + } + + size_t utf8ToUCS4(const char* src, size_t max, unsigned* dst) { + size_t count, consumed; + + *dst = 0xfffd; + + if (max == 0) + return 0; + + consumed = 1; + + if ((*src & 0x80) == 0) { + *dst = *src; + count = 0; + } else if ((*src & 0xe0) == 0xc0) { + *dst = *src & 0x1f; + count = 1; + } else if ((*src & 0xf0) == 0xe0) { + *dst = *src & 0x0f; + count = 2; + } else if ((*src & 0xf8) == 0xf0) { + *dst = *src & 0x07; + count = 3; + } else { + // Invalid sequence, consume all continuation characters + src++; + max--; + while ((max-- > 0) && ((*src++ & 0xc0) == 0x80)) + consumed++; + return consumed; + } + + src++; + max--; + + while (count--) { + // Invalid or truncated sequence? + if ((max == 0) || ((*src & 0xc0) != 0x80)) { + *dst = 0xfffd; + return consumed; + } + + *dst <<= 6; + *dst |= *src & 0x3f; + + src++; + max--; + } + + return consumed; + } + + char* latin1ToUTF8(const char* src, size_t bytes) { + char* buffer; + size_t sz; + + char* out; + const char* in; + size_t in_len; + + // Always include space for a NULL + sz = 1; + + // Compute output size + in = src; + in_len = bytes; + while ((*in != '\0') && (in_len > 0)) { + char buf[5]; + sz += ucs4ToUTF8(*in, buf); + in++; + in_len--; + } + + // Alloc + buffer = new char[sz]; + memset(buffer, 0, sz); + + // And convert + out = buffer; + in = src; + in_len = bytes; + while ((*in != '\0') && (in_len > 0)) { + out += ucs4ToUTF8(*in, out); + in++; + in_len--; + } + + return buffer; + } + + char* utf8ToLatin1(const char* src, size_t bytes) { + char* buffer; + size_t sz; + + char* out; + const char* in; + size_t in_len; + + // Always include space for a NULL + sz = 1; + + // Compute output size + in = src; + in_len = bytes; + while ((*in != '\0') && (in_len > 0)) { + size_t len; + unsigned ucs; + + len = utf8ToUCS4(in, in_len, &ucs); + in += len; + in_len -= len; + sz++; + } + + // Alloc + buffer = new char[sz]; + memset(buffer, 0, sz); + + // And convert + out = buffer; + in = src; + in_len = bytes; + while ((*in != '\0') && (in_len > 0)) { + size_t len; + unsigned ucs; + + len = utf8ToUCS4(in, in_len, &ucs); + in += len; + in_len -= len; + + if (ucs > 0xff) + *out++ = '?'; + else + *out++ = (unsigned char)ucs; + } + + return buffer; + } + unsigned msBetween(const struct timeval *first, const struct timeval *second) { diff --git a/common/rfb/util.h b/common/rfb/util.h index de096692..7bd5cc01 100644 --- a/common/rfb/util.h +++ b/common/rfb/util.h @@ -88,6 +88,15 @@ namespace rfb { char* convertLF(const char* src, size_t bytes = (size_t)-1); + // Convertions between various Unicode formats. The returned strings are + // always null terminated and must be freed using strFree(). + + size_t ucs4ToUTF8(unsigned src, char* dst); + size_t utf8ToUCS4(const char* src, size_t max, unsigned* dst); + + char* latin1ToUTF8(const char* src, size_t bytes = (size_t)-1); + char* utf8ToLatin1(const char* src, size_t bytes = (size_t)-1); + // HELPER functions for timeout handling // soonestTimeout() is a function to help work out the soonest of several diff --git a/unix/xserver/hw/vnc/RFBGlue.cc b/unix/xserver/hw/vnc/RFBGlue.cc index d9c456e8..f108fae4 100644 --- a/unix/xserver/hw/vnc/RFBGlue.cc +++ b/unix/xserver/hw/vnc/RFBGlue.cc @@ -220,6 +220,24 @@ char* vncConvertLF(const char* src, size_t bytes) } } +char* vncLatin1ToUTF8(const char* src, size_t bytes) +{ + try { + return latin1ToUTF8(src, bytes); + } catch (...) { + return NULL; + } +} + +char* vncUTF8ToLatin1(const char* src, size_t bytes) +{ + try { + return utf8ToLatin1(src, bytes); + } catch (...) { + return NULL; + } +} + void vncStrFree(char* str) { strFree(str); diff --git a/unix/xserver/hw/vnc/RFBGlue.h b/unix/xserver/hw/vnc/RFBGlue.h index 8e70c680..112405b8 100644 --- a/unix/xserver/hw/vnc/RFBGlue.h +++ b/unix/xserver/hw/vnc/RFBGlue.h @@ -50,6 +50,10 @@ int vncGetSocketPort(int fd); int vncIsTCPPortUsed(int port); char* vncConvertLF(const char* src, size_t bytes); + +char* vncLatin1ToUTF8(const char* src, size_t bytes); +char* vncUTF8ToLatin1(const char* src, size_t bytes); + void vncStrFree(char* str); #ifdef __cplusplus diff --git a/unix/xserver/hw/vnc/vncSelection.c b/unix/xserver/hw/vnc/vncSelection.c index 8f4146d9..3438ac86 100644 --- a/unix/xserver/hw/vnc/vncSelection.c +++ b/unix/xserver/hw/vnc/vncSelection.c @@ -247,36 +247,16 @@ static int vncConvertSelection(ClientPtr client, Atom selection, if (rc != Success) return rc; } else if (target == xaUTF8_STRING) { - unsigned char* buffer; - unsigned char* out; - size_t len; + char* buffer; - const unsigned char* in; - size_t in_len; - - buffer = malloc(strlen(clientCutText)*2); + buffer = vncLatin1ToUTF8(clientCutText, (size_t)-1); if (buffer == NULL) return BadAlloc; - out = buffer; - len = 0; - in = clientCutText; - while (*in != '\0') { - if (*in & 0x80) { - *out++ = 0xc0 | (*in >> 6); - *out++ = 0x80 | (*in & 0x3f); - len += 2; - in++; - } else { - *out++ = *in++; - len++; - } - } - rc = dixChangeWindowProperty(serverClient, pWin, realProperty, xaUTF8_STRING, 8, PropModeReplace, - len, buffer, TRUE); - free(buffer); + strlen(buffer), buffer, TRUE); + vncStrFree(buffer); if (rc != Success) return rc; } else { @@ -424,56 +404,19 @@ static void vncHandleSelection(Atom selection, Atom target, vncStrFree(filtered); } else if (target == xaUTF8_STRING) { char *filtered; - unsigned char* buffer; - unsigned char* out; - size_t len; - - const unsigned char* in; - size_t in_len; + char* buffer; if (prop->format != 8) return; if (prop->type != xaUTF8_STRING) return; - buffer = malloc(prop->size); + buffer = vncUTF8ToLatin1(prop->data, prop->size); if (buffer == NULL) return; - out = buffer; - len = 0; - in = prop->data; - in_len = prop->size; - while (in_len > 0) { - if ((*in & 0x80) == 0x00) { - *out++ = *in++; - len++; - in_len--; - } else if ((*in & 0xe0) == 0xc0) { - unsigned ucs; - ucs = (*in++ & 0x1f) << 6; - in_len--; - if (in_len > 0) { - ucs |= (*in++ & 0x3f); - in_len--; - } - if (ucs <= 0xff) - *out++ = ucs; - else - *out++ = '?'; - len++; - } else { - *out++ = '?'; - len++; - do { - in++; - in_len--; - } while ((in_len > 0) && ((*in & 0xc0) == 0x80)); - } - } - - filtered = vncConvertLF(buffer, len); - free(buffer); + filtered = vncConvertLF(buffer, (size_t)-1); + vncStrFree(buffer); if (filtered == NULL) return; diff --git a/vncviewer/Viewport.cxx b/vncviewer/Viewport.cxx index 5e495992..151ecb47 100644 --- a/vncviewer/Viewport.cxx +++ b/vncviewer/Viewport.cxx @@ -235,25 +235,17 @@ void Viewport::updateWindow() void Viewport::serverCutText(const char* str) { char *buffer; - int size, ret; + size_t len; clearPendingClipboard(); if (!acceptClipboard) return; - size = fl_utf8froma(NULL, 0, str, strlen(str)); - if (size <= 0) - return; - - size++; - - buffer = new char[size]; + buffer = latin1ToUTF8(str); + len = strlen(buffer); - ret = fl_utf8froma(buffer, size, str, strlen(str)); - assert(ret < size); - - vlog.debug("Got clipboard data (%d bytes)", (int)strlen(buffer)); + vlog.debug("Got clipboard data (%d bytes)", (int)len); if (!hasFocus()) { pendingServerCutText = buffer; @@ -264,11 +256,11 @@ void Viewport::serverCutText(const char* str) // dump the data into both variants. #if !defined(WIN32) && !defined(__APPLE__) if (setPrimary) - Fl::copy(buffer, ret, 0); + Fl::copy(buffer, len, 0); #endif - Fl::copy(buffer, ret, 1); + Fl::copy(buffer, len, 1); - delete [] buffer; + strFree(buffer); } static const char * dotcursor_xpm[] = { @@ -550,27 +542,19 @@ void Viewport::resize(int x, int y, int w, int h) int Viewport::handle(int event) { char *buffer, *filtered; - int ret; int buttonMask, wheelMask; DownMap::const_iterator iter; switch (event) { case FL_PASTE: - buffer = new char[Fl::event_length() + 1]; - clearPendingClipboard(); - // This is documented as to ASCII, but actually does to 8859-1 - ret = fl_utf8toa(Fl::event_text(), Fl::event_length(), buffer, - Fl::event_length() + 1); - assert(ret < (Fl::event_length() + 1)); - filtered = convertLF(buffer, ret); - delete [] buffer; + buffer = utf8ToLatin1(Fl::event_text(), Fl::event_length()); + filtered = convertLF(buffer); + strFree(buffer); if (!hasFocus()) { - pendingClientCutText = new char[strlen(filtered) + 1]; - strcpy((char*)pendingClientCutText, filtered); - strFree(filtered); + pendingClientCutText = filtered; return 1; } @@ -747,9 +731,9 @@ void Viewport::handleClipboardChange(int source, void *data) void Viewport::clearPendingClipboard() { - delete [] pendingServerCutText; + strFree(pendingServerCutText); pendingServerCutText = NULL; - delete [] pendingClientCutText; + strFree(pendingClientCutText); pendingClientCutText = NULL; } diff --git a/vncviewer/Viewport.h b/vncviewer/Viewport.h index c2c9872e..8b9b469b 100644 --- a/vncviewer/Viewport.h +++ b/vncviewer/Viewport.h @@ -114,8 +114,8 @@ private: bool firstLEDState; - const char* pendingServerCutText; - const char* pendingClientCutText; + char* pendingServerCutText; + char* pendingClientCutText; rdr::U32 menuKeySym; int menuKeyCode, menuKeyFLTK; -- 2.39.5