diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-08-24 15:53:32 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-08-24 15:53:32 +0100 |
commit | 94f656018d8e26ffe7b91897ee159001ab4c3a5f (patch) | |
tree | 80b386dc97dd2c4036664065c00190cadddc4cce /contrib/replxx/src/conversion.cxx | |
parent | 4893fc8dc5b54968be8949fe3b45fc7326cbb90f (diff) | |
download | rspamd-94f656018d8e26ffe7b91897ee159001ab4c3a5f.tar.gz rspamd-94f656018d8e26ffe7b91897ee159001ab4c3a5f.zip |
[Minor] Rework replxx to make in compatible with Rspamd again
Diffstat (limited to 'contrib/replxx/src/conversion.cxx')
-rw-r--r-- | contrib/replxx/src/conversion.cxx | 86 |
1 files changed, 56 insertions, 30 deletions
diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx index bcdbe048e..f629f910e 100644 --- a/contrib/replxx/src/conversion.cxx +++ b/contrib/replxx/src/conversion.cxx @@ -2,8 +2,9 @@ #include <string> #include <cstring> #include <cctype> -#include <locale.h> +#include <clocale> +#include "unicode/utf8.h" #include "conversion.hxx" #ifdef _WIN32 @@ -44,20 +45,38 @@ bool is8BitEncoding( is_8bit_encoding() ); ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char* src) { ConversionResult res = ConversionResult::conversionOK; if ( ! locale::is8BitEncoding ) { - const UTF8* sourceStart = reinterpret_cast<const UTF8*>(src); - const UTF8* sourceEnd = sourceStart + strlen(src); - UTF32* targetStart = reinterpret_cast<UTF32*>(dst); - UTF32* targetEnd = targetStart + dstSize; - - res = ConvertUTF8toUTF32( - &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion); + auto sourceStart = reinterpret_cast<const unsigned char*>(src); + auto slen = strlen(src); + auto targetStart = reinterpret_cast<UChar32*>(dst); + int i = 0, j = 0; + + while (i < slen && j < dstSize) { + UChar32 uc; + auto prev_i = i; + U8_NEXT (sourceStart, i, slen, uc); + + if (uc <= 0) { + if (U8_IS_LEAD (sourceStart[prev_i])) { + auto lead_byte = sourceStart[prev_i]; + auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+ + ((uint8_t)(lead_byte)>=0xe0)+ + ((uint8_t)(lead_byte)>=0xf0)); + + if (trailing_bytes + i > slen) { + return ConversionResult::sourceExhausted; + } + } + + /* Replace with 0xFFFD */ + uc = 0x0000FFFD; + } + targetStart[j++] = uc; + } - if (res == conversionOK) { - dstCount = static_cast<int>( targetStart - reinterpret_cast<UTF32*>( dst ) ); + dstCount = j; - if (dstCount < dstSize) { - *targetStart = 0; - } + if (j < dstSize) { + targetStart[j] = 0; } } else { for ( dstCount = 0; ( dstCount < dstSize ) && src[dstCount]; ++ dstCount ) { @@ -69,26 +88,32 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char8_t* src) { return copyString8to32( - dst, dstSize, dstCount, reinterpret_cast<const char*>(src) + dst, dstSize, dstCount, reinterpret_cast<const char*>(src) ); } -int copyString32to8( char* dst, int dstSize, const char32_t* src, int srcSize ) { - int resCount( 0 ); +int copyString32to8( + char* dst, int dstSize, const char32_t* src, int srcSize +) { + int resCount = 0; + if ( ! locale::is8BitEncoding ) { - const UTF32* sourceStart = reinterpret_cast<const UTF32*>(src); - const UTF32* sourceEnd = sourceStart + srcSize; - UTF8* targetStart = reinterpret_cast<UTF8*>(dst); - UTF8* targetEnd = targetStart + dstSize; - - ConversionResult res = ConvertUTF32toUTF8( - &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion - ); - - if ( res == conversionOK ) { - resCount = static_cast<int>( targetStart - reinterpret_cast<UTF8*>( dst ) ); - if ( resCount < dstSize ) { - *targetStart = 0; + int j = 0; + UBool is_error = 0; + + for (auto i = 0; i < srcSize; i ++) { + U8_APPEND ((uint8_t *)dst, j, dstSize, src[i], is_error); + + if (is_error) { + break; + } + } + + if (!is_error) { + resCount = j; + + if (j < dstSize) { + dst[j] = '\0'; } } } else { @@ -101,7 +126,8 @@ int copyString32to8( char* dst, int dstSize, const char32_t* src, int srcSize ) dst[i] = 0; } } - return ( resCount ); + + return resCount; } } |