#include "libserver/task.h"
#include "mime_encoding.h"
#include "message.h"
+#include "contrib/fastutf8/fastutf8.h"
#include <unicode/ucnv.h>
#include <unicode/ucsdet.h>
#if U_ICU_VERSION_MAJOR_NUM >= 44
void
rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
{
- const gchar *end, *p;
- gsize remain = len;
+ gchar *p, *end;
+ goffset err_offset;
+ UChar32 uc = 0;
/* Now we validate input and replace bad characters with '?' symbol */
p = in;
+ end = in + len;
- while (remain > 0 && !g_utf8_validate (p, remain, &end)) {
- gchar *valid;
+ while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len) > 0)) {
+ goffset cur_offset = err_offset;
- if (end >= in + len) {
- if (p < in + len) {
- memset ((gchar *)p, '?', (in + len) - p);
- }
- break;
- }
+ while (cur_offset < len) {
+ goffset tmp = cur_offset;
- valid = g_utf8_find_next_char (end, in + len);
+ U8_NEXT (in, cur_offset, len, uc);
- if (!valid) {
- valid = in + len;
+ if (uc > 0) {
+ /* Fill string between err_offset and tmp with `?` character */
+ memset (in + err_offset, '?',
+ tmp - err_offset);
+ break;
+ }
}
- if (valid > end) {
- memset ((gchar *)end, '?', valid - end);
- p = valid;
- remain = (in + len) - p;
- }
- else {
+ if (uc < 0) {
+ /* Fill till the end */
+ memset (p + err_offset, '?',
+ len - err_offset);
break;
}
+
+ p = in + cur_offset;
+ len = end - p;
}
}
#include "unix-std.h"
#include "protocol_internal.h"
#include "libserver/mempool_vars_internal.h"
+#include "contrib/fastutf8/fastutf8.h"
#include "task.h"
#include <math.h>
return;
}
- const gchar *end = NULL;
+ goffset err_offset;
- if (g_utf8_validate (url->host, url->hostlen, &end)) {
+ if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen) == 0)) {
obj = ucl_object_fromlstring (url->host, url->hostlen);
}
- else if (end - url->host > 0) {
- obj = ucl_object_fromlstring (url->host, end - url->host);
- }
else {
- return;
+ obj = ucl_object_fromlstring (url->host, err_offset);
}
}
else {
#include <pcre2.h>
#endif
+#include "contrib/fastutf8/fastutf8.h"
+
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
in = (const guchar *)cur->value;
lenvec[i] = strlen (cur->value);
- if (!g_utf8_validate (in, lenvec[i], NULL)) {
+ if (rspamd_fast_utf8_validate (in, lenvec[i]) != 0) {
raw = TRUE;
}
}
#include "radix.h"
#include "rspamd.h"
#include "cryptobox.h"
+#include "contrib/fastutf8/fastutf8.h"
#ifdef WITH_HYPERSCAN
#include "hs.h"
}
if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
- if (g_utf8_validate (in, len, NULL)) {
+ if (rspamd_fast_utf8_validate (in, len) == 0) {
validated = TRUE;
}
}
g_assert (in != NULL);
if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
- if (g_utf8_validate (in, len, NULL)) {
+ if (rspamd_fast_utf8_validate (in, len) == 0) {
validated = TRUE;
}
}
#endif
#include <math.h>
+#include "contrib/fastutf8/fastutf8.h"
+
const guchar lc_map[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
}
if (flags & RSPAMD_REGEXP_ESCAPE_UTF) {
- if (!g_utf8_validate (pattern, slen, NULL)) {
+ if (rspamd_fast_utf8_validate (pattern, slen) != 0) {
tmp_utf = rspamd_str_make_utf_valid (pattern, slen, NULL);
}
}
#include "unicode/uspoof.h"
#include "unicode/uscript.h"
+#include "contrib/fastutf8/fastutf8.h"
/***
* @module rspamd_util
const gchar *str;
gsize len;
- str = lua_tolstring (L, 1, &len);
+ if (lua_isstring (L, 1)) {
+ str = lua_tolstring (L, 1, &len);
+ }
+ else {
+ struct rspamd_lua_text *t = lua_check_text (L, 1);
+
+ if (t) {
+ str = t->start;
+ len = t->len;
+ }
+ else {
+ return luaL_error (L, "invalid arguments (text expected)");
+ }
+ }
if (str) {
- lua_pushboolean (L, g_utf8_validate (str, len, NULL));
+ goffset error_offset = rspamd_fast_utf8_validate (str, len);
+
+ if (error_offset == 0) {
+ lua_pushboolean (L, true);
+ }
+ else {
+ lua_pushboolean (L, false);
+ lua_pushnumber (L, error_offset);
+
+ return 2;
+ }
}
else {
return luaL_error (L, "invalid arguments");