From: Pierre Ossman Date: Mon, 5 Oct 2020 14:01:55 +0000 (+0200) Subject: Fix UTF-16 encoding/decoding of high code points X-Git-Tag: v1.11.90~99^2~3 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=6345c0f60f37f598a40536578938a6cd623b6e7f;p=tigervnc.git Fix UTF-16 encoding/decoding of high code points Everything outside of BMP was handled incorrectly and was coded as completely different code points. --- diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx index 00e2831c..85b668fc 100644 --- a/common/rfb/util.cxx +++ b/common/rfb/util.cxx @@ -315,8 +315,9 @@ namespace rfb { *dst++ = L'\0'; return 1; } else if (src < 0x110000) { - *dst++ = 0xd800 | ((src >> 10) & 0x07ff); - *dst++ = 0xdc00 | (src & 0x07ff); + src -= 0x10000; + *dst++ = 0xd800 | ((src >> 10) & 0x03ff); + *dst++ = 0xdc00 | (src & 0x03ff); *dst++ = L'\0'; return 2; } else { @@ -358,7 +359,7 @@ namespace rfb { return 1; } - *dst = 0x10000 | ((*dst & 0x03ff) << 10); + *dst = 0x10000 + ((*dst & 0x03ff) << 10); *dst |= *src & 0x3ff; return 2; diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx index 748f4347..4bcb65b3 100644 --- a/tests/unit/unicode.cxx +++ b/tests/unit/unicode.cxx @@ -50,6 +50,8 @@ struct _ucs4utf16 ucs4utf16[] = { { 0x0061, L"a" }, { 0x00f6, L"\xf6" }, { 0x263a, L"\x263a" }, + { 0x1f638, L"\xd83d\xde38" }, + { 0x2d006, L"\xd874\xdc06" }, { 0xfffd, L"\xdc40\xdc12" }, { 0x110200, L"\xfffd" }, }; @@ -58,6 +60,8 @@ struct _utf8utf16 utf8utf16[] = { { "abc", L"abc" }, { "\xc3\xa5\xc3\xa4\xc3\xb6", L"\xe5\xe4\xf6" }, { "\xe2\x98\xb9\xe2\x98\xba\xe2\x98\xbb", L"\x2639\x263a\x263b" }, + { "\xf0\x9f\x98\xb8\xf0\x9f\x99\x81\xf0\x9f\x99\x82", L"\xd83d\xde38\xd83d\xde41\xd83d\xde42" }, + { "\xf0\xad\x80\x86\xf0\xad\x80\x88", L"\xd874\xdc06\xd874\xdc08" }, { "\xef\xbf\xbd\xc3\xa5", L"\xd840\xe5" }, };