Browse Source

Don't allow surrugate code points in UTF-8

These are not valid outside of UTF-16 so seeing them in a UTF-8 sequence
means that something is wrong with that sequence. Best to filter them
out rather than letting them propagate and have unknown effects.
pull/1640/head
Pierre Ossman 1 year ago
parent
commit
94d6a693ea
2 changed files with 11 additions and 0 deletions
  1. 6
    0
      common/rfb/util.cxx
  2. 5
    0
      tests/unit/unicode.cxx

+ 6
- 0
common/rfb/util.cxx View File

@@ -266,6 +266,8 @@ namespace rfb {
*dst++ = 0x80 | (src & 0x3f);
*dst++ = '\0';
return 2;
} else if ((src >= 0xd800) && (src < 0xe000)) {
return ucs4ToUTF8(0xfffd, dst);
} else if (src < 0x10000) {
*dst++ = 0xe0 | (src >> 12);
*dst++ = 0x80 | ((src >> 6) & 0x3f);
@@ -334,6 +336,10 @@ namespace rfb {
max--;
}

// UTF-16 surrogate code point?
if ((*dst >= 0xd800) && (*dst < 0xe000))
*dst = 0xfffd;

return consumed;
}


+ 5
- 0
tests/unit/unicode.cxx View File

@@ -53,6 +53,10 @@ struct _ucs4utf8 ucs4utf8[] = {
{ 0x1f638, "\xf0\x9f\x98\xb8" },
{ 0x2d006, "\xf0\xad\x80\x86" },
{ 0xfffd, "\xe5\xe4" },
{ 0xfffd, "\xed\xa2\x80" },
{ 0xfffd, "\xed\xbb\xbf" },
{ 0xd880, "\xef\xbf\xbd" },
{ 0xdeff, "\xef\xbf\xbd" },
{ 0x110200, "\xef\xbf\xbd" },
};

@@ -93,6 +97,7 @@ const char *validutf8[] = {
const char *invalidutf8[] = {
"\xe5\xe4\xf6",
"\xf8\xa1\xa1\xa1\xa1",
"\xed\xa2\x80",
};

const wchar_t *validutf16[] = {

Loading…
Cancel
Save