Don't allow surrugate code points in UTF-8

These are not valid outside of UTF-16 so seeing them in a UTF-8 sequence means that something is wrong with that sequence. Best to filter them out rather than letting them propagate and have unknown effects.
author: Pierre Ossman <ossman@cendio.se> 2023-03-18 14:59:27 +0100
committer: Pierre Ossman <ossman@cendio.se> 2023-06-30 21:39:52 +0200
commit: 94d6a693eac3401c50723ccc75aa11fc7017782d (patch)
tree: d5a40fe882f2a9d316d0a12b39ec270257543b50
parent: c061a78dc1f7242cfcaf42049d5248e4eed39ff4 (diff)
download: tigervnc-94d6a693eac3401c50723ccc75aa11fc7017782d.tar.gz
tigervnc-94d6a693eac3401c50723ccc75aa11fc7017782d.zip
2 files changed, 11 insertions, 0 deletions
diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx
index a3f16443..d1a8cc33 100644
--- a/common/rfb/util.cxx
+++ b/common/rfb/util.cxx
@@ -266,6 +266,8 @@ namespace rfb {
       *dst++ = 0x80 | (src & 0x3f);
       *dst++ = '\0';
       return 2;
+    } else if ((src >= 0xd800) && (src < 0xe000)) {
+      return ucs4ToUTF8(0xfffd, dst);
     } else if (src < 0x10000) {
       *dst++ = 0xe0 | (src >> 12);
       *dst++ = 0x80 | ((src >> 6) & 0x3f);
@@ -334,6 +336,10 @@ namespace rfb {
       max--;
     }
 
+    // UTF-16 surrogate code point?
+    if ((*dst >= 0xd800) && (*dst < 0xe000))
+      *dst = 0xfffd;
+
     return consumed;
   }
 
diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx
index d4e567e9..cb15e7e1 100644
--- a/tests/unit/unicode.cxx
+++ b/tests/unit/unicode.cxx
@@ -53,6 +53,10 @@ struct _ucs4utf8 ucs4utf8[] = {
     { 0x1f638, "\xf0\x9f\x98\xb8" },
     { 0x2d006, "\xf0\xad\x80\x86" },
     { 0xfffd, "\xe5\xe4" },
+    { 0xfffd, "\xed\xa2\x80" },
+    { 0xfffd, "\xed\xbb\xbf" },
+    { 0xd880, "\xef\xbf\xbd" },
+    { 0xdeff, "\xef\xbf\xbd" },
     { 0x110200, "\xef\xbf\xbd" },
 };
 
@@ -93,6 +97,7 @@ const char *validutf8[] = {
 const char *invalidutf8[] = {
     "\xe5\xe4\xf6",
     "\xf8\xa1\xa1\xa1\xa1",
+    "\xed\xa2\x80",
 };
 
 const wchar_t *validutf16[] = {
author	Pierre Ossman <ossman@cendio.se>	2023-03-18 14:59:27 +0100
committer	Pierre Ossman <ossman@cendio.se>	2023-06-30 21:39:52 +0200
commit	94d6a693eac3401c50723ccc75aa11fc7017782d (patch)
tree	d5a40fe882f2a9d316d0a12b39ec270257543b50
parent	c061a78dc1f7242cfcaf42049d5248e4eed39ff4 (diff)
download	tigervnc-94d6a693eac3401c50723ccc75aa11fc7017782d.tar.gz tigervnc-94d6a693eac3401c50723ccc75aa11fc7017782d.zip