aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/replxx/src/conversion.cxx
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-08-24 15:53:32 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-08-24 15:53:32 +0100
commit94f656018d8e26ffe7b91897ee159001ab4c3a5f (patch)
tree80b386dc97dd2c4036664065c00190cadddc4cce /contrib/replxx/src/conversion.cxx
parent4893fc8dc5b54968be8949fe3b45fc7326cbb90f (diff)
downloadrspamd-94f656018d8e26ffe7b91897ee159001ab4c3a5f.tar.gz
rspamd-94f656018d8e26ffe7b91897ee159001ab4c3a5f.zip
[Minor] Rework replxx to make in compatible with Rspamd again
Diffstat (limited to 'contrib/replxx/src/conversion.cxx')
-rw-r--r--contrib/replxx/src/conversion.cxx86
1 files changed, 56 insertions, 30 deletions
diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx
index bcdbe048e..f629f910e 100644
--- a/contrib/replxx/src/conversion.cxx
+++ b/contrib/replxx/src/conversion.cxx
@@ -2,8 +2,9 @@
#include <string>
#include <cstring>
#include <cctype>
-#include <locale.h>
+#include <clocale>
+#include "unicode/utf8.h"
#include "conversion.hxx"
#ifdef _WIN32
@@ -44,20 +45,38 @@ bool is8BitEncoding( is_8bit_encoding() );
ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char* src) {
ConversionResult res = ConversionResult::conversionOK;
if ( ! locale::is8BitEncoding ) {
- const UTF8* sourceStart = reinterpret_cast<const UTF8*>(src);
- const UTF8* sourceEnd = sourceStart + strlen(src);
- UTF32* targetStart = reinterpret_cast<UTF32*>(dst);
- UTF32* targetEnd = targetStart + dstSize;
-
- res = ConvertUTF8toUTF32(
- &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion);
+ auto sourceStart = reinterpret_cast<const unsigned char*>(src);
+ auto slen = strlen(src);
+ auto targetStart = reinterpret_cast<UChar32*>(dst);
+ int i = 0, j = 0;
+
+ while (i < slen && j < dstSize) {
+ UChar32 uc;
+ auto prev_i = i;
+ U8_NEXT (sourceStart, i, slen, uc);
+
+ if (uc <= 0) {
+ if (U8_IS_LEAD (sourceStart[prev_i])) {
+ auto lead_byte = sourceStart[prev_i];
+ auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
+ ((uint8_t)(lead_byte)>=0xe0)+
+ ((uint8_t)(lead_byte)>=0xf0));
+
+ if (trailing_bytes + i > slen) {
+ return ConversionResult::sourceExhausted;
+ }
+ }
+
+ /* Replace with 0xFFFD */
+ uc = 0x0000FFFD;
+ }
+ targetStart[j++] = uc;
+ }
- if (res == conversionOK) {
- dstCount = static_cast<int>( targetStart - reinterpret_cast<UTF32*>( dst ) );
+ dstCount = j;
- if (dstCount < dstSize) {
- *targetStart = 0;
- }
+ if (j < dstSize) {
+ targetStart[j] = 0;
}
} else {
for ( dstCount = 0; ( dstCount < dstSize ) && src[dstCount]; ++ dstCount ) {
@@ -69,26 +88,32 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons
ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char8_t* src) {
return copyString8to32(
- dst, dstSize, dstCount, reinterpret_cast<const char*>(src)
+ dst, dstSize, dstCount, reinterpret_cast<const char*>(src)
);
}
-int copyString32to8( char* dst, int dstSize, const char32_t* src, int srcSize ) {
- int resCount( 0 );
+int copyString32to8(
+ char* dst, int dstSize, const char32_t* src, int srcSize
+) {
+ int resCount = 0;
+
if ( ! locale::is8BitEncoding ) {
- const UTF32* sourceStart = reinterpret_cast<const UTF32*>(src);
- const UTF32* sourceEnd = sourceStart + srcSize;
- UTF8* targetStart = reinterpret_cast<UTF8*>(dst);
- UTF8* targetEnd = targetStart + dstSize;
-
- ConversionResult res = ConvertUTF32toUTF8(
- &sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion
- );
-
- if ( res == conversionOK ) {
- resCount = static_cast<int>( targetStart - reinterpret_cast<UTF8*>( dst ) );
- if ( resCount < dstSize ) {
- *targetStart = 0;
+ int j = 0;
+ UBool is_error = 0;
+
+ for (auto i = 0; i < srcSize; i ++) {
+ U8_APPEND ((uint8_t *)dst, j, dstSize, src[i], is_error);
+
+ if (is_error) {
+ break;
+ }
+ }
+
+ if (!is_error) {
+ resCount = j;
+
+ if (j < dstSize) {
+ dst[j] = '\0';
}
}
} else {
@@ -101,7 +126,8 @@ int copyString32to8( char* dst, int dstSize, const char32_t* src, int srcSize )
dst[i] = 0;
}
}
- return ( resCount );
+
+ return resCount;
}
}