aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/replxx
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-05-13 16:22:37 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-05-13 16:35:43 +0100
commitd3e506655f0b7335f272c703c070889678bb6718 (patch)
treea0b80640e700e86157813647f57980a86e8022d0 /contrib/replxx
parent2c9a6cfca599bba34075baba133edb156a861c07 (diff)
downloadrspamd-d3e506655f0b7335f272c703c070889678bb6718.tar.gz
rspamd-d3e506655f0b7335f272c703c070889678bb6718.zip
[Fix] Fix incomplete utf8 sequences handling
Diffstat (limited to 'contrib/replxx')
-rw-r--r--contrib/replxx/src/conversion.cxx12
1 files changed, 12 insertions, 0 deletions
diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx
index 8d724cc3c..ce9bd932b 100644
--- a/contrib/replxx/src/conversion.cxx
+++ b/contrib/replxx/src/conversion.cxx
@@ -54,9 +54,21 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons
while (i < slen && j < dstSize) {
UChar32 uc;
+ auto prev_i = i;
U8_NEXT (sourceStart, i, slen, uc);
if (uc <= 0) {
+ if (U8_IS_LEAD (sourceStart[prev_i])) {
+ auto lead_byte = sourceStart[prev_i];
+ auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
+ ((uint8_t)(lead_byte)>=0xe0)+
+ ((uint8_t)(lead_byte)>=0xf0));
+
+ if (trailing_bytes + i > slen) {
+ return ConversionResult::sourceExhausted;
+ }
+ }
+
/* Replace with 0xFFFD */
uc = 0x0000FFFD;
}