diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-20 20:44:49 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-20 20:45:22 +0000 |
commit | a45141c003e065341474de0ec0b4310b2f4437c8 (patch) | |
tree | 2e9851e15290df805793cf6b51d0a0ae0753c195 /src/libutil/str_util.c | |
parent | dc506fc54b60f4bcc7390447a0d80bfd6f799e54 (diff) | |
download | rspamd-a45141c003e065341474de0ec0b4310b2f4437c8.tar.gz rspamd-a45141c003e065341474de0ec0b4310b2f4437c8.zip |
[Fix] Properly escape utf8 regexps in hyperscan mode
Diffstat (limited to 'src/libutil/str_util.c')
-rw-r--r-- | src/libutil/str_util.c | 46 |
1 files changed, 38 insertions, 8 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index f798d9eeb..be7323df3 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -2327,10 +2327,10 @@ out: gchar * rspamd_str_regexp_escape (const gchar *pattern, gsize slen, - gsize *dst_len, gboolean allow_glob) + gsize *dst_len, enum rspamd_regexp_escape_flags flags) { const gchar *p, *end = pattern + slen; - gchar *res, *d, t; + gchar *res, *d, t, *tmp_utf = NULL; gsize len; static const gchar hexdigests[16] = "0123456789abcdef"; @@ -2365,20 +2365,46 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, if (g_ascii_isspace (t)) { len ++; } - else if (!g_ascii_isprint (t)) { - /* \\xHH -> 4 symbols */ - len += 3; + else { + if (!(flags & RSPAMD_REGEXP_ESCAPE_UTF)) { + if (!g_ascii_isprint (t)) { + /* \\xHH -> 4 symbols */ + len += 3; + } + } } break; } } + if (flags & RSPAMD_REGEXP_ESCAPE_UTF) { + if (!g_utf8_validate (pattern, slen, NULL)) { + tmp_utf = g_utf8_make_valid (pattern, slen); + } + } + if (slen == len) { if (dst_len) { + + if (tmp_utf) { + slen = strlen (tmp_utf); + } + *dst_len = slen; } - return g_strdup (pattern); + + + if (tmp_utf) { + return tmp_utf; + } + else { + return g_strdup (pattern); + } + } + + if (tmp_utf) { + pattern = tmp_utf; } res = g_malloc (len + 1); @@ -2408,7 +2434,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, case '*': case '?': case '+': - if (allow_glob) { + if (flags & RSPAMD_REGEXP_ESCAPE_GLOB) { /* Treat * as .* and ? as .? */ *d++ = '.'; } @@ -2420,7 +2446,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, if (g_ascii_isspace (t)) { *d++ = '\\'; } - else if (!g_ascii_isgraph (t)) { + else if (!(flags & RSPAMD_REGEXP_ESCAPE_UTF) && !g_ascii_isgraph (t)) { *d++ = '\\'; *d++ = 'x'; *d++ = hexdigests[((t >> 4) & 0xF)]; @@ -2439,5 +2465,9 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, *dst_len = d - res; } + if (tmp_utf) { + g_free (tmp_utf); + } + return res; } |