summaryrefslogtreecommitdiffstats
path: root/src/libutil/str_util.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-20 20:44:49 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-20 20:45:22 +0000
commita45141c003e065341474de0ec0b4310b2f4437c8 (patch)
tree2e9851e15290df805793cf6b51d0a0ae0753c195 /src/libutil/str_util.c
parentdc506fc54b60f4bcc7390447a0d80bfd6f799e54 (diff)
downloadrspamd-a45141c003e065341474de0ec0b4310b2f4437c8.tar.gz
rspamd-a45141c003e065341474de0ec0b4310b2f4437c8.zip
[Fix] Properly escape utf8 regexps in hyperscan mode
Diffstat (limited to 'src/libutil/str_util.c')
-rw-r--r--src/libutil/str_util.c46
1 files changed, 38 insertions, 8 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index f798d9eeb..be7323df3 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -2327,10 +2327,10 @@ out:
gchar *
rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
- gsize *dst_len, gboolean allow_glob)
+ gsize *dst_len, enum rspamd_regexp_escape_flags flags)
{
const gchar *p, *end = pattern + slen;
- gchar *res, *d, t;
+ gchar *res, *d, t, *tmp_utf = NULL;
gsize len;
static const gchar hexdigests[16] = "0123456789abcdef";
@@ -2365,20 +2365,46 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
if (g_ascii_isspace (t)) {
len ++;
}
- else if (!g_ascii_isprint (t)) {
- /* \\xHH -> 4 symbols */
- len += 3;
+ else {
+ if (!(flags & RSPAMD_REGEXP_ESCAPE_UTF)) {
+ if (!g_ascii_isprint (t)) {
+ /* \\xHH -> 4 symbols */
+ len += 3;
+ }
+ }
}
break;
}
}
+ if (flags & RSPAMD_REGEXP_ESCAPE_UTF) {
+ if (!g_utf8_validate (pattern, slen, NULL)) {
+ tmp_utf = g_utf8_make_valid (pattern, slen);
+ }
+ }
+
if (slen == len) {
if (dst_len) {
+
+ if (tmp_utf) {
+ slen = strlen (tmp_utf);
+ }
+
*dst_len = slen;
}
- return g_strdup (pattern);
+
+
+ if (tmp_utf) {
+ return tmp_utf;
+ }
+ else {
+ return g_strdup (pattern);
+ }
+ }
+
+ if (tmp_utf) {
+ pattern = tmp_utf;
}
res = g_malloc (len + 1);
@@ -2408,7 +2434,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
case '*':
case '?':
case '+':
- if (allow_glob) {
+ if (flags & RSPAMD_REGEXP_ESCAPE_GLOB) {
/* Treat * as .* and ? as .? */
*d++ = '.';
}
@@ -2420,7 +2446,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
if (g_ascii_isspace (t)) {
*d++ = '\\';
}
- else if (!g_ascii_isgraph (t)) {
+ else if (!(flags & RSPAMD_REGEXP_ESCAPE_UTF) && !g_ascii_isgraph (t)) {
*d++ = '\\';
*d++ = 'x';
*d++ = hexdigests[((t >> 4) & 0xF)];
@@ -2439,5 +2465,9 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
*dst_len = d - res;
}
+ if (tmp_utf) {
+ g_free (tmp_utf);
+ }
+
return res;
}