diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-13 16:22:37 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-13 16:35:43 +0100 |
commit | d3e506655f0b7335f272c703c070889678bb6718 (patch) | |
tree | a0b80640e700e86157813647f57980a86e8022d0 /contrib/replxx/src | |
parent | 2c9a6cfca599bba34075baba133edb156a861c07 (diff) | |
download | rspamd-d3e506655f0b7335f272c703c070889678bb6718.tar.gz rspamd-d3e506655f0b7335f272c703c070889678bb6718.zip |
[Fix] Fix incomplete utf8 sequences handling
Diffstat (limited to 'contrib/replxx/src')
-rw-r--r-- | contrib/replxx/src/conversion.cxx | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx index 8d724cc3c..ce9bd932b 100644 --- a/contrib/replxx/src/conversion.cxx +++ b/contrib/replxx/src/conversion.cxx @@ -54,9 +54,21 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons while (i < slen && j < dstSize) { UChar32 uc; + auto prev_i = i; U8_NEXT (sourceStart, i, slen, uc); if (uc <= 0) { + if (U8_IS_LEAD (sourceStart[prev_i])) { + auto lead_byte = sourceStart[prev_i]; + auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+ + ((uint8_t)(lead_byte)>=0xe0)+ + ((uint8_t)(lead_byte)>=0xf0)); + + if (trailing_bytes + i > slen) { + return ConversionResult::sourceExhausted; + } + } + /* Replace with 0xFFFD */ uc = 0x0000FFFD; } |