Fix conversion of latin-1 to UTF-8

author Pierre Ossman <ossman@cendio.se>

Mon, 5 Oct 2020 14:05:15 +0000 (16:05 +0200)

committer Pierre Ossman <ossman@cendio.se>

Mon, 5 Oct 2020 14:05:15 +0000 (16:05 +0200)
author Pierre Ossman <ossman@cendio.se>
Mon, 5 Oct 2020 14:05:15 +0000 (16:05 +0200)
committer Pierre Ossman <ossman@cendio.se>
Mon, 5 Oct 2020 14:05:15 +0000 (16:05 +0200)
diff --git a/common/rfb/util.cxx b/common/rfb/util.cxx

index 85b668fc69aa51a659119eda12ec3ca1995d522f..cc5b76905d527875e14f49b61d09e2afdad2d038 100644 (file)
--- a/common/rfb/util.cxx
+++ b/common/rfb/util.cxx
@@ -381,7 +381,7 @@ namespace rfb {
      in_len = bytes;
      while ((in_len > 0) && (*in != '\0')) {
        char buf[5];
-      sz += ucs4ToUTF8(*in, buf);
+      sz += ucs4ToUTF8(*(const unsigned char*)in, buf);
        in++;
        in_len--;
      }
@@ -395,7 +395,7 @@ namespace rfb {
      in = src;
      in_len = bytes;
      while ((in_len > 0) && (*in != '\0')) {
-      out += ucs4ToUTF8(*in, out);
+      out += ucs4ToUTF8(*(const unsigned char*)in, out);
        in++;
        in_len--;
      }
diff --git a/tests/unit/unicode.cxx b/tests/unit/unicode.cxx

index 4bcb65b3c403675f8c07323b76b0b831ab460018..bb2525de0c52b0800f718844927e9336b1105522 100644 (file)
--- a/tests/unit/unicode.cxx
+++ b/tests/unit/unicode.cxx
@@ -31,6 +31,11 @@ struct _ucs4utf16 {
      const wchar_t *utf16;
  };
  
+struct _latin1utf8 {
+    const char *latin1;
+    const char *utf8;
+};
+
  struct _utf8utf16 {
      const char *utf8;
      const wchar_t *utf16;
@@ -56,6 +61,13 @@ struct _ucs4utf16 ucs4utf16[] = {
      { 0x110200, L"\xfffd" },
  };
  
+struct _latin1utf8 latin1utf8[] = {
+    { "abc",            "abc" },
+    { "\xe5\xe4\xf6",   "\xc3\xa5\xc3\xa4\xc3\xb6" },
+    { "???",            "\xe2\x98\xb9\xe2\x98\xba\xe2\x98\xbb" },
+    { "?",              "\xe5\xe4" },
+};
+
  struct _utf8utf16 utf8utf16[] = {
      { "abc",                                                L"abc" },
      { "\xc3\xa5\xc3\xa4\xc3\xb6",                           L"\xe5\xe4\xf6" },
@@ -133,6 +145,28 @@ int main(int argc, char** argv)
          }
      }
  
+    for (i = 0;i < ARRAY_SIZE(latin1utf8);i++) {
+        /* Expected failure? */
+        if (strchr(latin1utf8[i].latin1, '?') != NULL)
+            continue;
+
+        out = rfb::latin1ToUTF8(latin1utf8[i].latin1);
+        if (strcmp(out, latin1utf8[i].utf8) != 0) {
+            printf("FAILED: latin1ToUTF8() #%d\n", (int)i+1);
+            failures++;
+        }
+        rfb::strFree(out);
+    }
+
+    for (i = 0;i < ARRAY_SIZE(latin1utf8);i++) {
+        out = rfb::utf8ToLatin1(latin1utf8[i].utf8);
+        if (strcmp(out, latin1utf8[i].latin1) != 0) {
+            printf("FAILED: utf8ToLatin1() #%d\n", (int)i+1);
+            failures++;
+        }
+        rfb::strFree(out);
+    }
+
      for (i = 0;i < ARRAY_SIZE(utf8utf16);i++) {
          /* Expected failure? */
          if (wcscmp(utf8utf16[i].utf16, L"\xfffd") == 0)
author	Pierre Ossman <ossman@cendio.se>
	Mon, 5 Oct 2020 14:05:15 +0000 (16:05 +0200)
committer	Pierre Ossman <ossman@cendio.se>
	Mon, 5 Oct 2020 14:05:15 +0000 (16:05 +0200)
common/rfb/util.cxx		patch \| blob \| history
tests/unit/unicode.cxx		patch \| blob \| history