From: Vsevolod Stakhov Date: Sat, 23 Apr 2016 13:42:07 +0000 (+0100) Subject: [Feature] Allow non zero terminated patterns in multipattern X-Git-Tag: 1.3.0~658 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=db2aed4685476f5d34c783dd4d21d46ef3312026;p=rspamd.git [Feature] Allow non zero terminated patterns in multipattern --- diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index 092e9f889..b08ed70a5 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -64,9 +64,10 @@ rspamd_multipattern_library_init (const gchar *cache_dir) #ifdef WITH_HYPERSCAN static gchar * -rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern) +rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen, + gsize *dst_len) { - gsize len, slen; + gsize len; const gchar *p, *prefix; gchar *res; @@ -76,7 +77,6 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern) * 2) *.blah -> \\..*\\.blah * 3) ??? */ - slen = strlen (pattern); if (pattern[0] == '*') { len = slen + 4; @@ -100,115 +100,27 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern) res = g_malloc (len + 1); slen = rspamd_strlcpy (res, prefix, len + 1); - rspamd_strlcpy (res + slen, p, len + 1 - slen); - - return res; -} - -static gchar * -rspamd_multipattern_escape_generic_hyperscan (const gchar *pattern) -{ - const gchar *p; - gchar *res, *d, t; - gsize len, slen; - - slen = strlen (pattern); - len = slen; - - p = pattern; - - /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */ - while (*p) { - t = *p ++; - - switch (t) { - case '[': - case ']': - case '-': - case '\\': - case '{': - case '}': - case '(': - case ')': - case '*': - case '+': - case '?': - case '.': - case ',': - case '^': - case '$': - case '|': - case '#': - len ++; - break; - default: - if (g_ascii_isspace (t)) { - len ++; - } - break; - } - } - - if (slen == len) { - return g_strdup (pattern); - } - - res = g_malloc (len + 1); - p = pattern; - d = res; - - while (*p) { - t = *p ++; - - switch (t) { - case '[': - case ']': - case '-': - case '\\': - case '{': - case '}': - case '(': - case ')': - case '*': - case '+': - case '?': - case '.': - case ',': - case '^': - case '$': - case '|': - case '#': - *d++ = '\\'; - break; - default: - if (g_ascii_isspace (t)) { - *d++ = '\\'; - } - break; - } + slen += rspamd_strlcpy (res + slen, p, len + 1 - slen); - *d++ = t; - } - - *d = '\0'; + *dst_len = slen; return res; } static gchar * -rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern) +rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen, + gsize *dst_len, gboolean allow_glob) { - const gchar *p; + const gchar *p, *end = pattern + slen; gchar *res, *d, t; - gsize len, slen; + gsize len; + static const gchar hexdigests[16] = "0123456789abcdef"; - slen = strlen (pattern); len = slen; - p = pattern; /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */ - while (*p) { + while (p < end) { t = *p ++; switch (t) { @@ -235,11 +147,16 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern) if (g_ascii_isspace (t)) { len ++; } + else if (!g_ascii_isprint (t)) { + /* \\xHH -> 4 symbols */ + len += 3; + } break; } } if (slen == len) { + *dst_len = slen; return g_strdup (pattern); } @@ -259,7 +176,6 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern) case '}': case '(': case ')': - case '+': case '.': case ',': case '^': @@ -270,13 +186,26 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern) break; case '*': case '?': - /* Treat * as .* and ? as .? */ - *d++ = '.'; + case '+': + if (allow_glob) { + /* Treat * as .* and ? as .? */ + *d++ = '.'; + } + else { + *d++ = '\\'; + } break; default: if (g_ascii_isspace (t)) { *d++ = '\\'; } + else if (!g_ascii_isgraph (t)) { + *d++ = '\\'; + *d++ = 'x'; + *d++ = hexdigests[((t >> 4) & 0xF)]; + *d++ = hexdigests[((t) & 0xF)]; + continue; /* To avoid *d++ = t; */ + } break; } @@ -284,15 +213,17 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern) } *d = '\0'; + *dst_len = d - res; return res; } #else static gchar * -rspamd_multipattern_escape_tld_acism (const gchar *pattern) +rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len, + gsize *dst_len) { - gsize len, slen; + gsize dlen, slen; const gchar *p, *prefix; gchar *res; @@ -302,11 +233,11 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern) * 2) *.blah -> \\..*\\.blah * 3) ??? */ - slen = strlen (pattern); + slen = len; if (pattern[0] == '*') { - len = slen; - p = strchr (pattern, '.'); + dlen = slen; + p = memchr (pattern, '.', len); if (p == NULL) { /* XXX: bad */ @@ -316,17 +247,22 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern) p ++; } + dlen -= p - pattern; prefix = "."; + dlen ++; } else { - len = slen + 1; + dlen = slen + 1; prefix = "."; p = pattern; } - res = g_malloc (len + 1); - slen = rspamd_strlcpy (res, prefix, len + 1); - rspamd_strlcpy (res + slen, p, len + 1 - slen); + res = g_malloc (dlen + 1); + slen = strlen (prefix); + memcpy (res, prefix, slen); + memcpy (res + slen, p, dlen - slen); + + *dst_len = dlen; return res; } @@ -335,28 +271,37 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern) * Escapes special characters from specific pattern */ static gchar * -rspamd_multipattern_pattern_filter (const gchar *pattern, - enum rspamd_multipattern_flags flags) +rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len, + enum rspamd_multipattern_flags flags, + gsize *dst_len) { + gchar *ret = NULL; #ifdef WITH_HYPERSCAN if (flags & RSPAMD_MULTIPATTERN_TLD) { - return rspamd_multipattern_escape_tld_hyperscan (pattern); + ret = rspamd_multipattern_escape_tld_hyperscan (pattern, len, dst_len); } else if (flags & RSPAMD_MULTIPATTERN_RE) { - return g_strdup (pattern); + ret = malloc (len + 1); + *dst_len = rspamd_strlcpy (ret, pattern, len + 1); } else if (flags & RSPAMD_MULTIPATTERN_GLOB) { - return rspamd_multipattern_escape_glob_hyperscan (pattern); + ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE); + } + else { + ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE); } - - return rspamd_multipattern_escape_generic_hyperscan (pattern); #else if (flags & RSPAMD_MULTIPATTERN_TLD) { - return rspamd_multipattern_escape_tld_acism (pattern); + ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len); + } + else { + ret = malloc (len); + memcpy (ret, pattern, len); + *dst_len = len; } - - return g_strdup (pattern); #endif + + return ret; } struct rspamd_multipattern * @@ -404,6 +349,17 @@ void rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp, const gchar *pattern, gint flags) { + g_assert (pattern != NULL); + + rspamd_multipattern_add_pattern_len (mp, pattern, strlen (pattern), flags); +} + +void +rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp, + const gchar *pattern, gsize patlen, gint flags) +{ + gsize dlen; + g_assert (pattern != NULL); g_assert (mp != NULL); g_assert (!mp->compiled); @@ -420,16 +376,16 @@ rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp, } g_array_append_val (mp->hs_flags, fl); - np = rspamd_multipattern_pattern_filter (pattern, flags); + np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen); g_array_append_val (mp->hs_pats, np); fl = mp->cnt; g_array_append_val (mp->hs_ids, fl); - rspamd_cryptobox_hash_update (&mp->hash_state, np, strlen (np)); + rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen); #else ac_trie_pat_t pat; - pat.ptr = rspamd_multipattern_pattern_filter (pattern, flags); - pat.len = strlen (pat.ptr); + pat.ptr = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen); + pat.len = dlen; g_array_append_val (mp->pats, pat); #endif diff --git a/src/libutil/multipattern.h b/src/libutil/multipattern.h index d8f534b54..ef9f17583 100644 --- a/src/libutil/multipattern.h +++ b/src/libutil/multipattern.h @@ -92,13 +92,23 @@ struct rspamd_multipattern *rspamd_multipattern_create_full ( enum rspamd_multipattern_flags flags); /** - * Adds new pattern to match engine + * Adds new pattern to match engine from zero-terminated string * @param mp * @param pattern */ void rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp, const gchar *pattern, gint flags); +/** + * Adds new pattern from arbitrary string + * @param mp + * @param pattern + * @param patlen + * @param flags + */ +void rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp, + const gchar *pattern, gsize patlen, gint flags); + /** * Compiles multipattern structure * @param mp