These are not valid outside of UTF-16 so seeing them in a UTF-8 sequence means that something is wrong with that sequence. Best to filter them out rather than letting them propagate and have unknown effects.

1年前 · 94d6a693ea
--- a/common/rfb/util.cxx
+++ b/common/rfb/util.cxx
      *dst++ = 0x80 | (src & 0x3f);
      *dst++ = '\0';
      return 2;
    } else if ((src >= 0xd800) && (src < 0xe000)) {
      return ucs4ToUTF8(0xfffd, dst);
    } else if (src < 0x10000) {
      *dst++ = 0xe0 | (src >> 12);
      *dst++ = 0x80 | ((src >> 6) & 0x3f);
      max--;
    }
    // UTF-16 surrogate code point?
    if ((*dst >= 0xd800) && (*dst < 0xe000))
      *dst = 0xfffd;
    return consumed;
  }
--- a/tests/unit/unicode.cxx
+++ b/tests/unit/unicode.cxx
    { 0x1f638, "\xf0\x9f\x98\xb8" },
    { 0x2d006, "\xf0\xad\x80\x86" },
    { 0xfffd, "\xe5\xe4" },
    { 0xfffd, "\xed\xa2\x80" },
    { 0xfffd, "\xed\xbb\xbf" },
    { 0xd880, "\xef\xbf\xbd" },
    { 0xdeff, "\xef\xbf\xbd" },
    { 0x110200, "\xef\xbf\xbd" },
 };
 const char *invalidutf8[] = {
    "\xe5\xe4\xf6",
    "\xf8\xa1\xa1\xa1\xa1",
    "\xed\xa2\x80",
 };
 const wchar_t *validutf16[] = {