From d3e506655f0b7335f272c703c070889678bb6718 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 13 May 2020 16:22:37 +0100 Subject: [PATCH] [Fix] Fix incomplete utf8 sequences handling --- contrib/replxx/src/conversion.cxx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx index 8d724cc3c..ce9bd932b 100644 --- a/contrib/replxx/src/conversion.cxx +++ b/contrib/replxx/src/conversion.cxx @@ -54,9 +54,21 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons while (i < slen && j < dstSize) { UChar32 uc; + auto prev_i = i; U8_NEXT (sourceStart, i, slen, uc); if (uc <= 0) { + if (U8_IS_LEAD (sourceStart[prev_i])) { + auto lead_byte = sourceStart[prev_i]; + auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+ + ((uint8_t)(lead_byte)>=0xe0)+ + ((uint8_t)(lead_byte)>=0xf0)); + + if (trailing_bytes + i > slen) { + return ConversionResult::sourceExhausted; + } + } + /* Replace with 0xFFFD */ uc = 0x0000FFFD; } -- 2.39.5