*dst++ = 0x80 | (src & 0x3f);
*dst++ = '\0';
return 2;
+ } else if ((src >= 0xd800) && (src < 0xe000)) {
+ return ucs4ToUTF8(0xfffd, dst);
} else if (src < 0x10000) {
*dst++ = 0xe0 | (src >> 12);
*dst++ = 0x80 | ((src >> 6) & 0x3f);
max--;
}
+ // UTF-16 surrogate code point?
+ if ((*dst >= 0xd800) && (*dst < 0xe000))
+ *dst = 0xfffd;
+
return consumed;
}
{ 0x1f638, "\xf0\x9f\x98\xb8" },
{ 0x2d006, "\xf0\xad\x80\x86" },
{ 0xfffd, "\xe5\xe4" },
+ { 0xfffd, "\xed\xa2\x80" },
+ { 0xfffd, "\xed\xbb\xbf" },
+ { 0xd880, "\xef\xbf\xbd" },
+ { 0xdeff, "\xef\xbf\xbd" },
{ 0x110200, "\xef\xbf\xbd" },
};
const char *invalidutf8[] = {
"\xe5\xe4\xf6",
"\xf8\xa1\xa1\xa1\xa1",
+ "\xed\xa2\x80",
};
const wchar_t *validutf16[] = {