Просмотр исходного кода

[Project] Use own utf8 validation instead of glib

tags/2.2
Vsevolod Stakhov 4 лет назад
Родитель
Сommit
803a906206

+ 23
- 19
src/libmime/mime_encoding.c Просмотреть файл

@@ -22,6 +22,7 @@
#include "libserver/task.h"
#include "mime_encoding.h"
#include "message.h"
#include "contrib/fastutf8/fastutf8.h"
#include <unicode/ucnv.h>
#include <unicode/ucsdet.h>
#if U_ICU_VERSION_MAJOR_NUM >= 44
@@ -468,36 +469,39 @@ rspamd_mime_to_utf8_byte_array (GByteArray *in,
void
rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
{
const gchar *end, *p;
gsize remain = len;
gchar *p, *end;
goffset err_offset;
UChar32 uc = 0;

/* Now we validate input and replace bad characters with '?' symbol */
p = in;
end = in + len;

while (remain > 0 && !g_utf8_validate (p, remain, &end)) {
gchar *valid;
while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len) > 0)) {
goffset cur_offset = err_offset;

if (end >= in + len) {
if (p < in + len) {
memset ((gchar *)p, '?', (in + len) - p);
}
break;
}
while (cur_offset < len) {
goffset tmp = cur_offset;

valid = g_utf8_find_next_char (end, in + len);
U8_NEXT (in, cur_offset, len, uc);

if (!valid) {
valid = in + len;
if (uc > 0) {
/* Fill string between err_offset and tmp with `?` character */
memset (in + err_offset, '?',
tmp - err_offset);
break;
}
}

if (valid > end) {
memset ((gchar *)end, '?', valid - end);
p = valid;
remain = (in + len) - p;
}
else {
if (uc < 0) {
/* Fill till the end */
memset (p + err_offset, '?',
len - err_offset);
break;
}

p = in + cur_offset;
len = end - p;
}
}


+ 4
- 6
src/libserver/protocol.c Просмотреть файл

@@ -26,6 +26,7 @@
#include "unix-std.h"
#include "protocol_internal.h"
#include "libserver/mempool_vars_internal.h"
#include "contrib/fastutf8/fastutf8.h"
#include "task.h"
#include <math.h>

@@ -922,16 +923,13 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
return;
}

const gchar *end = NULL;
goffset err_offset;

if (g_utf8_validate (url->host, url->hostlen, &end)) {
if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen) == 0)) {
obj = ucl_object_fromlstring (url->host, url->hostlen);
}
else if (end - url->host > 0) {
obj = ucl_object_fromlstring (url->host, end - url->host);
}
else {
return;
obj = ucl_object_fromlstring (url->host, err_offset);
}
}
else {

+ 3
- 1
src/libserver/re_cache.c Просмотреть файл

@@ -41,6 +41,8 @@
#include <pcre2.h>
#endif

#include "contrib/fastutf8/fastutf8.h"

#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
@@ -988,7 +990,7 @@ rspamd_re_cache_process_headers_list (struct rspamd_task *task,
in = (const guchar *)cur->value;
lenvec[i] = strlen (cur->value);

if (!g_utf8_validate (in, lenvec[i], NULL)) {
if (rspamd_fast_utf8_validate (in, lenvec[i]) != 0) {
raw = TRUE;
}
}

+ 3
- 2
src/libutil/map_helpers.c Просмотреть файл

@@ -20,6 +20,7 @@
#include "radix.h"
#include "rspamd.h"
#include "cryptobox.h"
#include "contrib/fastutf8/fastutf8.h"

#ifdef WITH_HYPERSCAN
#include "hs.h"
@@ -1189,7 +1190,7 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
}

if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
if (g_utf8_validate (in, len, NULL)) {
if (rspamd_fast_utf8_validate (in, len) == 0) {
validated = TRUE;
}
}
@@ -1280,7 +1281,7 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
g_assert (in != NULL);

if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
if (g_utf8_validate (in, len, NULL)) {
if (rspamd_fast_utf8_validate (in, len) == 0) {
validated = TRUE;
}
}

+ 3
- 1
src/libutil/str_util.c Просмотреть файл

@@ -27,6 +27,8 @@
#endif
#include <math.h>

#include "contrib/fastutf8/fastutf8.h"

const guchar lc_map[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
@@ -2932,7 +2934,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
}

if (flags & RSPAMD_REGEXP_ESCAPE_UTF) {
if (!g_utf8_validate (pattern, slen, NULL)) {
if (rspamd_fast_utf8_validate (pattern, slen) != 0) {
tmp_utf = rspamd_str_make_utf_valid (pattern, slen, NULL);
}
}

+ 26
- 2
src/lua/lua_util.c Просмотреть файл

@@ -34,6 +34,7 @@

#include "unicode/uspoof.h"
#include "unicode/uscript.h"
#include "contrib/fastutf8/fastutf8.h"

/***
* @module rspamd_util
@@ -2855,10 +2856,33 @@ lua_util_is_valid_utf8 (lua_State *L)
const gchar *str;
gsize len;

str = lua_tolstring (L, 1, &len);
if (lua_isstring (L, 1)) {
str = lua_tolstring (L, 1, &len);
}
else {
struct rspamd_lua_text *t = lua_check_text (L, 1);

if (t) {
str = t->start;
len = t->len;
}
else {
return luaL_error (L, "invalid arguments (text expected)");
}
}

if (str) {
lua_pushboolean (L, g_utf8_validate (str, len, NULL));
goffset error_offset = rspamd_fast_utf8_validate (str, len);

if (error_offset == 0) {
lua_pushboolean (L, true);
}
else {
lua_pushboolean (L, false);
lua_pushnumber (L, error_offset);

return 2;
}
}
else {
return luaL_error (L, "invalid arguments");

Загрузка…
Отмена
Сохранить