]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix incomplete utf8 sequences handling
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 13 May 2020 15:22:37 +0000 (16:22 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 13 May 2020 15:35:43 +0000 (16:35 +0100)
contrib/replxx/src/conversion.cxx

index 8d724cc3cc3faf86c2f0742a1116b0037ebaa7fe..ce9bd932b11081d8c6b01b7bc519b12a0fdd3b25 100644 (file)
@@ -54,9 +54,21 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons
 
                while (i < slen && j < dstSize) {
                        UChar32 uc;
+                       auto prev_i = i;
                        U8_NEXT (sourceStart, i, slen, uc);
 
                        if (uc <= 0) {
+                               if (U8_IS_LEAD (sourceStart[prev_i])) {
+                                       auto lead_byte = sourceStart[prev_i];
+                                       auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
+                                                       ((uint8_t)(lead_byte)>=0xe0)+
+                                                       ((uint8_t)(lead_byte)>=0xf0));
+
+                                       if (trailing_bytes + i > slen) {
+                                               return ConversionResult::sourceExhausted;
+                                       }
+                               }
+
                                /* Replace with 0xFFFD */
                                uc = 0x0000FFFD;
                        }