From: Vsevolod Stakhov Date: Fri, 15 Nov 2019 18:25:15 +0000 (+0000) Subject: [Fix] Fix issues found X-Git-Tag: 2.2~32 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=4d436a67f1c15b4c1e47864a7d7f267162a7da79;p=rspamd.git [Fix] Fix issues found --- diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 942358d11..17da31f30 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -477,30 +477,29 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len) p = in; end = in + len; - while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len) > 0)) { + while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len)) > 0) { + err_offset --; /* As it returns it 1 indexed */ goffset cur_offset = err_offset; while (cur_offset < len) { goffset tmp = cur_offset; - U8_NEXT (in, cur_offset, len, uc); + U8_NEXT (p, cur_offset, len, uc); if (uc > 0) { /* Fill string between err_offset and tmp with `?` character */ - memset (in + err_offset, '?', - tmp - err_offset); + memset (p + err_offset - 1, '?', tmp - err_offset); break; } } if (uc < 0) { /* Fill till the end */ - memset (p + err_offset, '?', - len - err_offset); + memset (p + err_offset, '?', len - err_offset); break; } - p = in + cur_offset; + p += cur_offset; len = end - p; } } diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index c457fc455..e66101bb4 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -925,11 +925,11 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud) goffset err_offset; - if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen) == 0)) { + if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen)) == 0) { obj = ucl_object_fromlstring (url->host, url->hostlen); } else { - obj = ucl_object_fromlstring (url->host, err_offset); + obj = ucl_object_fromlstring (url->host, err_offset - 1); } } else { diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index dd1b139d8..9f4ad1cb0 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -3071,30 +3071,31 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen, } p = src; - dlen = slen; + dlen = slen + 1; /* As we add '\0' */ /* Check space required */ - while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain) > 0)) { + while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain)) > 0) { gint i = 0; + err_offset --; /* As it returns it 1 indexed */ p += err_offset; remain -= err_offset; dlen += err_offset; - /* Each invalid character of input requires 3 bytes of output */ + /* Each invalid character of input requires 3 bytes of output (+2 bytes) */ while (i < remain) { - gint old_i = i; U8_NEXT (p, i, remain, uc); if (uc < 0) { - dlen += 3; + dlen += 2; } else { - p += old_i; - remain -= old_i; break; } } + + p += i; + remain -= i; } if (pool) { @@ -3108,8 +3109,9 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen, d = dst; remain = slen; - while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain) > 0)) { + while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain)) > 0) { /* Copy valid */ + err_offset --; /* As it returns it 1 indexed */ memcpy (d, p, err_offset); d += err_offset; @@ -3130,8 +3132,7 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen, } else { /* Adjust p and remaining stuff and go to the outer cycle */ - p += old_i; - remain -= old_i; + i = old_i; break; } } @@ -3139,6 +3140,8 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen, * Now p is the first valid utf8 character and remain is the rest of the string * so we can continue our loop */ + p += i; + remain -= i; } if (err_offset == 0 && remain > 0) { diff --git a/test/lua/unit/utf.lua b/test/lua/unit/utf.lua index 75dd33977..2d2c77f67 100644 --- a/test/lua/unit/utf.lua +++ b/test/lua/unit/utf.lua @@ -5,7 +5,7 @@ context("UTF8 check functions", function() ffi.cdef[[ unsigned int rspamd_str_lc_utf8 (char *str, unsigned int size); unsigned int rspamd_str_lc (char *str, unsigned int size); - char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen); + char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen, void *); ]] local cases = { @@ -58,7 +58,7 @@ context("UTF8 check functions", function() local buf = ffi.new("char[?]", #c[1] + 1) ffi.copy(buf, c[1]) - local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL)) + local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL, NULL)) local function to_hex(s) return (s:gsub('.', function (c) return string.format('%02X', string.byte(c))