From 703eb4d13016090d1b50294faaf9b57c6ed9a738 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 5 Oct 2020 16:07:27 +0200 Subject: Correctly handle invalid UTF-16 code points Some code points are reserved for the UTF-16 coding itself and must not appear as input data to the algorithm. --- common/rfb/util.cxx | 2 +- tests/unit/unicode.cxx | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx index cc5b7690..ecab8485 100644 --- a/common/rfb/util.cxx +++ b/common/rfb/util.cxx @@ -314,7 +314,7 @@ namespace rfb { *dst++ = src; *dst++ = L'\0'; return 1; - } else if (src < 0x110000) { + } else if ((src >= 0x10000) && (src < 0x110000)) { src -= 0x10000; *dst++ = 0xd800 | ((src >> 10) & 0x03ff); *dst++ = 0xdc00 | (src & 0x03ff); diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx index bb2525de..52548e00 100644 --- a/tests/unit/unicode.cxx +++ b/tests/unit/unicode.cxx @@ -59,6 +59,7 @@ struct _ucs4utf16 ucs4utf16[] = { { 0x2d006, L"\xd874\xdc06" }, { 0xfffd, L"\xdc40\xdc12" }, { 0x110200, L"\xfffd" }, + { 0xd87f, L"\xfffd" }, }; struct _latin1utf8 latin1utf8[] = { @@ -75,6 +76,7 @@ struct _utf8utf16 utf8utf16[] = { { "\xf0\x9f\x98\xb8\xf0\x9f\x99\x81\xf0\x9f\x99\x82", L"\xd83d\xde38\xd83d\xde41\xd83d\xde42" }, { "\xf0\xad\x80\x86\xf0\xad\x80\x88", L"\xd874\xdc06\xd874\xdc08" }, { "\xef\xbf\xbd\xc3\xa5", L"\xd840\xe5" }, + { "\xed\xa1\xbf", L"\xfffd" }, }; #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a)) -- cgit v1.2.3