From c061a78dc1f7242cfcaf42049d5248e4eed39ff4 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 18 Mar 2023 13:53:26 +0100 Subject: Clean up string encoding handling We should handle this in the low-level protocol code as much as possible to avoid mistakes. This way the rest of the code can assume that strings are always UTF-8 with \n line endings. --- tests/unit/unicode.cxx | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'tests/unit/unicode.cxx') diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx index 4618135d..d4e567e9 100644 --- a/tests/unit/unicode.cxx +++ b/tests/unit/unicode.cxx @@ -84,6 +84,27 @@ struct _utf8utf16 utf8utf16[] = { { "\xed\xa1\xbf", L"\xfffd" }, }; +const char *validutf8[] = { + "abc", + "\xc3\xa5\xc3\xa4\xc3\xb6", + "\xf0\xad\x80\x86", +}; + +const char *invalidutf8[] = { + "\xe5\xe4\xf6", + "\xf8\xa1\xa1\xa1\xa1", +}; + +const wchar_t *validutf16[] = { + L"abc", + L"\xe5\xe4\xf6", + L"\xd83d\xde38\xd83d\xde41\xd83d\xde42", +}; + +const wchar_t *invalidutf16[] = { + L"\xdc40\xdc12", +}; + #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a)) int main(int /*argc*/, char** /*argv*/) @@ -196,6 +217,34 @@ int main(int /*argc*/, char** /*argv*/) } } + for (i = 0;i < ARRAY_SIZE(validutf8);i++) { + if (!rfb::isValidUTF8(validutf8[i])) { + printf("FAILED: isValidUTF8() #%d\n", (int)i+1); + failures++; + } + } + + for (i = 0;i < ARRAY_SIZE(invalidutf8);i++) { + if (rfb::isValidUTF8(invalidutf8[i])) { + printf("FAILED: ! isValidUTF8() #%d\n", (int)i+1); + failures++; + } + } + + for (i = 0;i < ARRAY_SIZE(validutf16);i++) { + if (!rfb::isValidUTF16(validutf16[i])) { + printf("FAILED: isValidUTF16() #%d\n", (int)i+1); + failures++; + } + } + + for (i = 0;i < ARRAY_SIZE(invalidutf16);i++) { + if (rfb::isValidUTF16(invalidutf16[i])) { + printf("FAILED: ! isValidUTF16() #%d\n", (int)i+1); + failures++; + } + } + if (failures == 0) { printf("OK\n"); } else { -- cgit v1.2.3