aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorPierre Ossman <ossman@cendio.se>2023-03-18 14:59:27 +0100
committerPierre Ossman <ossman@cendio.se>2023-06-30 21:39:52 +0200
commit94d6a693eac3401c50723ccc75aa11fc7017782d (patch)
treed5a40fe882f2a9d316d0a12b39ec270257543b50 /tests
parentc061a78dc1f7242cfcaf42049d5248e4eed39ff4 (diff)
downloadtigervnc-94d6a693eac3401c50723ccc75aa11fc7017782d.tar.gz
tigervnc-94d6a693eac3401c50723ccc75aa11fc7017782d.zip
Don't allow surrugate code points in UTF-8
These are not valid outside of UTF-16 so seeing them in a UTF-8 sequence means that something is wrong with that sequence. Best to filter them out rather than letting them propagate and have unknown effects.
Diffstat (limited to 'tests')
-rw-r--r--tests/unit/unicode.cxx5
1 files changed, 5 insertions, 0 deletions
diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx
index d4e567e9..cb15e7e1 100644
--- a/tests/unit/unicode.cxx
+++ b/tests/unit/unicode.cxx
@@ -53,6 +53,10 @@ struct _ucs4utf8 ucs4utf8[] = {
{ 0x1f638, "\xf0\x9f\x98\xb8" },
{ 0x2d006, "\xf0\xad\x80\x86" },
{ 0xfffd, "\xe5\xe4" },
+ { 0xfffd, "\xed\xa2\x80" },
+ { 0xfffd, "\xed\xbb\xbf" },
+ { 0xd880, "\xef\xbf\xbd" },
+ { 0xdeff, "\xef\xbf\xbd" },
{ 0x110200, "\xef\xbf\xbd" },
};
@@ -93,6 +97,7 @@ const char *validutf8[] = {
const char *invalidutf8[] = {
"\xe5\xe4\xf6",
"\xf8\xa1\xa1\xa1\xa1",
+ "\xed\xa2\x80",
};
const wchar_t *validutf16[] = {