From: Pierre Ossman Date: Sat, 18 Mar 2023 12:53:26 +0000 (+0100) Subject: Clean up string encoding handling X-Git-Tag: v1.13.90~70^2~1 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=c061a78dc1f7242cfcaf42049d5248e4eed39ff4;p=tigervnc.git Clean up string encoding handling We should handle this in the low-level protocol code as much as possible to avoid mistakes. This way the rest of the code can assume that strings are always UTF-8 with \n line endings. --- diff --git a/common/rfb/CConnection.cxx b/common/rfb/CConnection.cxx index c07df216..a043f9bd 100644 --- a/common/rfb/CConnection.cxx +++ b/common/rfb/CConnection.cxx @@ -543,7 +543,7 @@ void CConnection::serverCutText(const char* str) { hasLocalClipboard = false; - serverClipboard = latin1ToUTF8(str); + serverClipboard = str; hasRemoteClipboard = true; handleClipboardAnnounce(true); @@ -604,6 +604,11 @@ void CConnection::handleClipboardProvide(uint32_t flags, return; } + // FIXME: This conversion magic should be in CMsgReader + if (!isValidUTF8((const char*)data[0], lengths[0])) { + vlog.error("Invalid UTF-8 sequence in clipboard - ignoring"); + return; + } serverClipboard = convertLF((const char*)data[0], lengths[0]); hasRemoteClipboard = true; @@ -674,6 +679,7 @@ void CConnection::announceClipboard(bool available) void CConnection::sendClipboardData(const char* data) { if (server.clipboardFlags() & rfb::clipboardProvide) { + // FIXME: This conversion magic should be in CMsgWriter std::string filtered(convertCRLF(data)); size_t sizes[1] = { filtered.size() + 1 }; const uint8_t* data[1] = { (const uint8_t*)filtered.c_str() }; @@ -690,9 +696,7 @@ void CConnection::sendClipboardData(const char* data) writer()->writeClipboardProvide(rfb::clipboardUTF8, sizes, data); } else { - std::string latin1(utf8ToLatin1(data)); - - writer()->writeClientCutText(latin1.c_str()); + writer()->writeClientCutText(data); } } diff --git a/common/rfb/CMsgReader.cxx b/common/rfb/CMsgReader.cxx index c0a96690..006645df 100644 --- a/common/rfb/CMsgReader.cxx +++ b/common/rfb/CMsgReader.cxx @@ -76,7 +76,12 @@ bool CMsgReader::readServerInit() std::vector name(len + 1); is->readBytes((uint8_t*)name.data(), len); name[len] = '\0'; - handler->serverInit(width, height, pf, name.data()); + + if (isValidUTF8(name.data())) + handler->serverInit(width, height, pf, name.data()); + else + handler->serverInit(width, height, pf, + latin1ToUTF8(name.data()).c_str()); return true; } @@ -275,9 +280,13 @@ bool CMsgReader::readServerCutText() vlog.error("cut text too long (%d bytes) - ignoring",len); return true; } + std::vector ca(len); is->readBytes((uint8_t*)ca.data(), len); - std::string filtered(convertLF(ca.data(), len)); + + std::string utf8(latin1ToUTF8(ca.data(), ca.size())); + std::string filtered(convertLF(utf8.data(), utf8.size())); + handler->serverCutText(filtered.c_str()); return true; @@ -768,10 +777,16 @@ bool CMsgReader::readSetDesktopName(int x, int y, int w, int h) if (x || y || w || h) { vlog.error("Ignoring DesktopName rect with non-zero position/size"); - } else { - handler->setName(name.data()); + return true; } + if (!isValidUTF8(name.data())) { + vlog.error("Ignoring DesktopName rect with invalid UTF-8 sequence"); + return true; + } + + handler->setName(name.data()); + return true; } diff --git a/common/rfb/CMsgWriter.cxx b/common/rfb/CMsgWriter.cxx index 0ff81926..e941aaa7 100644 --- a/common/rfb/CMsgWriter.cxx +++ b/common/rfb/CMsgWriter.cxx @@ -36,6 +36,7 @@ #include #include #include +#include using namespace rfb; @@ -191,16 +192,15 @@ void CMsgWriter::writePointerEvent(const Point& pos, int buttonMask) void CMsgWriter::writeClientCutText(const char* str) { - size_t len; - if (strchr(str, '\r') != NULL) throw Exception("Invalid carriage return in clipboard data"); - len = strlen(str); + std::string latin1(utf8ToLatin1(str)); + startMsg(msgTypeClientCutText); os->pad(3); - os->writeU32(len); - os->writeBytes((const uint8_t*)str, len); + os->writeU32(latin1.size()); + os->writeBytes((const uint8_t*)latin1.data(), latin1.size()); endMsg(); } diff --git a/common/rfb/SConnection.cxx b/common/rfb/SConnection.cxx index 2b18d8c1..33b2d850 100644 --- a/common/rfb/SConnection.cxx +++ b/common/rfb/SConnection.cxx @@ -383,7 +383,7 @@ void SConnection::clientCutText(const char* str) { hasLocalClipboard = false; - clientClipboard = latin1ToUTF8(str); + clientClipboard = str; hasRemoteClipboard = true; handleClipboardAnnounce(true); @@ -429,6 +429,11 @@ void SConnection::handleClipboardProvide(uint32_t flags, return; } + // FIXME: This conversion magic should be in SMsgReader + if (!isValidUTF8((const char*)data[0], lengths[0])) { + vlog.error("Invalid UTF-8 sequence in clipboard - ignoring"); + return; + } clientClipboard = convertLF((const char*)data[0], lengths[0]); hasRemoteClipboard = true; @@ -592,6 +597,7 @@ void SConnection::sendClipboardData(const char* data) { if (client.supportsEncoding(pseudoEncodingExtendedClipboard) && (client.clipboardFlags() & rfb::clipboardProvide)) { + // FIXME: This conversion magic should be in SMsgWriter std::string filtered(convertCRLF(data)); size_t sizes[1] = { filtered.size() + 1 }; const uint8_t* data[1] = { (const uint8_t*)filtered.c_str() }; @@ -608,9 +614,7 @@ void SConnection::sendClipboardData(const char* data) writer()->writeClipboardProvide(rfb::clipboardUTF8, sizes, data); } else { - std::string latin1(utf8ToLatin1(data)); - - writer()->writeServerCutText(latin1.c_str()); + writer()->writeServerCutText(data); } } diff --git a/common/rfb/SMsgReader.cxx b/common/rfb/SMsgReader.cxx index 68c9365b..0792639a 100644 --- a/common/rfb/SMsgReader.cxx +++ b/common/rfb/SMsgReader.cxx @@ -316,7 +316,10 @@ bool SMsgReader::readClientCutText() std::vector ca(len); is->readBytes((uint8_t*)ca.data(), len); - std::string filtered(convertLF(ca.data(), len)); + + std::string utf8(latin1ToUTF8(ca.data(), ca.size())); + std::string filtered(convertLF(utf8.data(), utf8.size())); + handler->clientCutText(filtered.c_str()); return true; diff --git a/common/rfb/SMsgWriter.cxx b/common/rfb/SMsgWriter.cxx index 95f85352..8592e6f4 100644 --- a/common/rfb/SMsgWriter.cxx +++ b/common/rfb/SMsgWriter.cxx @@ -38,6 +38,7 @@ #include #include #include +#include using namespace rfb; @@ -92,16 +93,15 @@ void SMsgWriter::writeBell() void SMsgWriter::writeServerCutText(const char* str) { - size_t len; - if (strchr(str, '\r') != NULL) throw Exception("Invalid carriage return in clipboard data"); - len = strlen(str); + std::string latin1(utf8ToLatin1(str)); + startMsg(msgTypeServerCutText); os->pad(3); - os->writeU32(len); - os->writeBytes((const uint8_t*)str, len); + os->writeU32(latin1.size()); + os->writeBytes((const uint8_t*)latin1.data(), latin1.size()); endMsg(); } diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx index c5c00bbd..a3f16443 100644 --- a/common/rfb/util.cxx +++ b/common/rfb/util.cxx @@ -564,6 +564,40 @@ namespace rfb { return out; } + bool isValidUTF8(const char* str, size_t bytes) + { + while ((bytes > 0) && (*str != '\0')) { + size_t len; + unsigned ucs; + + len = utf8ToUCS4(str, bytes, &ucs); + str += len; + bytes -= len; + + if (ucs == 0xfffd) + return false; + } + + return true; + } + + bool isValidUTF16(const wchar_t* wstr, size_t units) + { + while ((units > 0) && (*wstr != '\0')) { + size_t len; + unsigned ucs; + + len = utf16ToUCS4(wstr, units, &ucs); + wstr += len; + units -= len; + + if (ucs == 0xfffd) + return false; + } + + return true; + } + unsigned msBetween(const struct timeval *first, const struct timeval *second) { diff --git a/common/rfb/util.h b/common/rfb/util.h index 34811e3f..cafea209 100644 --- a/common/rfb/util.h +++ b/common/rfb/util.h @@ -68,6 +68,9 @@ namespace rfb { std::string utf16ToUTF8(const wchar_t* src, size_t units = (size_t)-1); std::wstring utf8ToUTF16(const char* src, size_t bytes = (size_t)-1); + bool isValidUTF8(const char* str, size_t bytes = (size_t)-1); + bool isValidUTF16(const wchar_t* wstr, size_t units = (size_t)-1); + // HELPER functions for timeout handling // soonestTimeout() is a function to help work out the soonest of several diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx index 4618135d..d4e567e9 100644 --- a/tests/unit/unicode.cxx +++ b/tests/unit/unicode.cxx @@ -84,6 +84,27 @@ struct _utf8utf16 utf8utf16[] = { { "\xed\xa1\xbf", L"\xfffd" }, }; +const char *validutf8[] = { + "abc", + "\xc3\xa5\xc3\xa4\xc3\xb6", + "\xf0\xad\x80\x86", +}; + +const char *invalidutf8[] = { + "\xe5\xe4\xf6", + "\xf8\xa1\xa1\xa1\xa1", +}; + +const wchar_t *validutf16[] = { + L"abc", + L"\xe5\xe4\xf6", + L"\xd83d\xde38\xd83d\xde41\xd83d\xde42", +}; + +const wchar_t *invalidutf16[] = { + L"\xdc40\xdc12", +}; + #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a)) int main(int /*argc*/, char** /*argv*/) @@ -196,6 +217,34 @@ int main(int /*argc*/, char** /*argv*/) } } + for (i = 0;i < ARRAY_SIZE(validutf8);i++) { + if (!rfb::isValidUTF8(validutf8[i])) { + printf("FAILED: isValidUTF8() #%d\n", (int)i+1); + failures++; + } + } + + for (i = 0;i < ARRAY_SIZE(invalidutf8);i++) { + if (rfb::isValidUTF8(invalidutf8[i])) { + printf("FAILED: ! isValidUTF8() #%d\n", (int)i+1); + failures++; + } + } + + for (i = 0;i < ARRAY_SIZE(validutf16);i++) { + if (!rfb::isValidUTF16(validutf16[i])) { + printf("FAILED: isValidUTF16() #%d\n", (int)i+1); + failures++; + } + } + + for (i = 0;i < ARRAY_SIZE(invalidutf16);i++) { + if (rfb::isValidUTF16(invalidutf16[i])) { + printf("FAILED: ! isValidUTF16() #%d\n", (int)i+1); + failures++; + } + } + if (failures == 0) { printf("OK\n"); } else { diff --git a/unix/xserver/hw/vnc/RFBGlue.cc b/unix/xserver/hw/vnc/RFBGlue.cc index 2fbc27ee..25431b65 100644 --- a/unix/xserver/hw/vnc/RFBGlue.cc +++ b/unix/xserver/hw/vnc/RFBGlue.cc @@ -248,3 +248,12 @@ char* vncUTF8ToLatin1(const char* src, size_t bytes) return NULL; } } + +int vncIsValidUTF8(const char* str, size_t bytes) +{ + try { + return isValidUTF8(str, bytes); + } catch (...) { + return 0; + } +} diff --git a/unix/xserver/hw/vnc/RFBGlue.h b/unix/xserver/hw/vnc/RFBGlue.h index d2b2e8aa..30c13bd2 100644 --- a/unix/xserver/hw/vnc/RFBGlue.h +++ b/unix/xserver/hw/vnc/RFBGlue.h @@ -53,6 +53,8 @@ char* vncConvertLF(const char* src, size_t bytes); char* vncLatin1ToUTF8(const char* src, size_t bytes); char* vncUTF8ToLatin1(const char* src, size_t bytes); +int vncIsValidUTF8(const char* str, size_t bytes); + #ifdef __cplusplus } #endif diff --git a/unix/xserver/hw/vnc/vncSelection.c b/unix/xserver/hw/vnc/vncSelection.c index 1ed35149..f7f3c51d 100644 --- a/unix/xserver/hw/vnc/vncSelection.c +++ b/unix/xserver/hw/vnc/vncSelection.c @@ -611,6 +611,11 @@ static void vncHandleSelection(Atom selection, Atom target, if (prop->type != xaUTF8_STRING) return; + if (!vncIsValidUTF8(prop->data, prop->size)) { + LOG_ERROR("Invalid UTF-8 sequence in clipboard"); + return; + } + filtered = vncConvertLF(prop->data, prop->size); if (filtered == NULL) return; diff --git a/vncviewer/Viewport.cxx b/vncviewer/Viewport.cxx index c501764a..ff85ee5d 100644 --- a/vncviewer/Viewport.cxx +++ b/vncviewer/Viewport.cxx @@ -567,6 +567,11 @@ int Viewport::handle(int event) switch (event) { case FL_PASTE: + if (!isValidUTF8(Fl::event_text(), Fl::event_length())) { + vlog.error("Invalid UTF-8 sequence in system clipboard"); + return 1; + } + filtered = convertLF(Fl::event_text(), Fl::event_length()); vlog.debug("Sending clipboard data (%d bytes)", (int)filtered.size());