diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-12 17:08:52 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-12 17:08:52 +0100 |
commit | f80d26f1384232ac4d3d4923173a2ca3a73a125f (patch) | |
tree | fc077bc0e9507e8ea34efcaf5c87dc9c727e638b /src/libutil/multipattern.c | |
parent | ec0e4b788949b555d0fbd52c8a0b8f970a8ba4b0 (diff) | |
download | rspamd-f80d26f1384232ac4d3d4923173a2ca3a73a125f.tar.gz rspamd-f80d26f1384232ac4d3d4923173a2ca3a73a125f.zip |
[Feature] Add escape functions for hyperscan
Diffstat (limited to 'src/libutil/multipattern.c')
-rw-r--r-- | src/libutil/multipattern.c | 295 |
1 files changed, 290 insertions, 5 deletions
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index 967a5115a..54ed13292 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -15,7 +15,8 @@ */ #include "config.h" -#include "multipattern.h" +#include "libutil/multipattern.h" +#include "libutil/str_util.h" #ifdef WITH_HYPERSCAN #include "hs.h" @@ -45,6 +46,275 @@ rspamd_multipattern_quark (void) return g_quark_from_static_string ("multipattern"); } +#ifdef WITH_HYPERSCAN +static gchar * +rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern) +{ + gsize len, slen; + const gchar *p, *prefix; + gchar *res; + + /* + * We understand the following cases + * 1) blah -> \\.blah + * 2) *.blah -> \\..*\\.blah + * 3) ??? + */ + slen = strlen (pattern); + + if (pattern[0] == '*') { + len = slen + 4; + p = strchr (pattern, '.'); + + if (p == NULL) { + /* XXX: bad */ + p = pattern; + } + else { + p ++; + } + + prefix = "\\..*\\."; + } + else { + len = slen + 2; + prefix = "\\."; + p = pattern; + } + + res = g_malloc (len + 1); + slen = rspamd_strlcpy (res, prefix, len + 1); + rspamd_strlcpy (res + slen, p, len + 1 - slen); + + return res; +} + +static gchar * +rspamd_multipattern_escape_generic_hyperscan (const gchar *pattern) +{ + const gchar *p; + gchar *res, *d, t; + gsize len, slen; + + slen = strlen (pattern); + len = slen; + + p = pattern; + + /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */ + while (*p) { + t = *p ++; + + switch (t) { + case '[': + case ']': + case '-': + case '\\': + case '{': + case '}': + case '(': + case ')': + case '*': + case '+': + case '?': + case '.': + case ',': + case '^': + case '$': + case '|': + case '#': + len ++; + break; + default: + if (g_ascii_isspace (t)) { + len ++; + } + break; + } + } + + if (slen == len) { + return g_strdup (pattern); + } + + res = g_malloc (len + 1); + p = pattern; + d = res; + + while (*p) { + t = *p ++; + + switch (t) { + case '[': + case ']': + case '-': + case '\\': + case '{': + case '}': + case '(': + case ')': + case '*': + case '+': + case '?': + case '.': + case ',': + case '^': + case '$': + case '|': + case '#': + *d++ = '\\'; + break; + default: + if (g_ascii_isspace (t)) { + *d++ = '\\'; + } + break; + } + + *d++ = t; + } + + *d = '\0'; + + return res; +} + +static gchar * +rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern) +{ + const gchar *p; + gchar *res, *d, t; + gsize len, slen; + + slen = strlen (pattern); + len = slen; + + p = pattern; + + /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */ + while (*p) { + t = *p ++; + + switch (t) { + case '[': + case ']': + case '-': + case '\\': + case '{': + case '}': + case '(': + case ')': + case '*': + case '+': + case '?': + case '.': + case ',': + case '^': + case '$': + case '|': + case '#': + len ++; + break; + default: + if (g_ascii_isspace (t)) { + len ++; + } + break; + } + } + + if (slen == len) { + return g_strdup (pattern); + } + + res = g_malloc (len + 1); + p = pattern; + d = res; + + while (*p) { + t = *p ++; + + switch (t) { + case '[': + case ']': + case '-': + case '\\': + case '{': + case '}': + case '(': + case ')': + case '+': + case '.': + case ',': + case '^': + case '$': + case '|': + case '#': + *d++ = '\\'; + break; + case '*': + case '?': + /* Treat * as .* and ? as .? */ + *d++ = '.'; + break; + default: + if (g_ascii_isspace (t)) { + *d++ = '\\'; + } + break; + } + + *d++ = t; + } + + *d = '\0'; + + return res; +} + +#else +static gchar * +rspamd_multipattern_escape_tld_acism (const gchar *pattern) +{ + gsize len, slen; + const gchar *p, *prefix; + gchar *res; + + /* + * We understand the following cases + * 1) blah -> \\.blah + * 2) *.blah -> \\..*\\.blah + * 3) ??? + */ + slen = strlen (pattern); + + if (pattern[0] == '*') { + len = slen; + p = strchr (pattern, '.'); + + if (p == NULL) { + /* XXX: bad */ + p = pattern; + } + else { + p ++; + } + + prefix = "."; + } + else { + len = slen + 1; + prefix = "."; + p = pattern; + } + + res = g_malloc (len + 1); + slen = rspamd_strlcpy (res, prefix, len + 1); + rspamd_strlcpy (res + slen, p, len + 1 - slen); + + return res; +} +#endif /* * Escapes special characters from specific pattern */ @@ -52,10 +322,25 @@ static gchar * rspamd_multipattern_pattern_filter (const gchar *pattern, enum rspamd_multipattern_flags flags) { - /* - * TODO: implement patterns filtering - */ - return strdup (pattern); +#ifdef WITH_HYPERSCAN + if (flags & RSPAMD_MULTIPATTERN_TLD) { + return rspamd_multipattern_escape_tld_hyperscan (pattern); + } + else if (flags & RSPAMD_MULTIPATTERN_RE) { + return g_strdup (pattern); + } + else if (flags & RSPAMD_MULTIPATTERN_GLOB) { + return rspamd_multipattern_escape_glob_hyperscan (pattern); + } + + return rspamd_multipattern_escape_generic_hyperscan (pattern); +#else + if (flags & RSPAMD_MULTIPATTERN_TLD) { + return rspamd_multipattern_escape_tld_acism (pattern); + } + + return g_strdup (pattern); +#endif } struct rspamd_multipattern * |