Browse Source

[Feature] Allow non zero terminated patterns in multipattern

tags/1.3.0
Vsevolod Stakhov 8 years ago
parent
commit
db2aed4685
2 changed files with 92 additions and 126 deletions
  1. 81
    125
      src/libutil/multipattern.c
  2. 11
    1
      src/libutil/multipattern.h

+ 81
- 125
src/libutil/multipattern.c View File

@@ -64,9 +64,10 @@ rspamd_multipattern_library_init (const gchar *cache_dir)

#ifdef WITH_HYPERSCAN
static gchar *
rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern)
rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen,
gsize *dst_len)
{
gsize len, slen;
gsize len;
const gchar *p, *prefix;
gchar *res;

@@ -76,7 +77,6 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern)
* 2) *.blah -> \\..*\\.blah
* 3) ???
*/
slen = strlen (pattern);

if (pattern[0] == '*') {
len = slen + 4;
@@ -100,115 +100,27 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern)

res = g_malloc (len + 1);
slen = rspamd_strlcpy (res, prefix, len + 1);
rspamd_strlcpy (res + slen, p, len + 1 - slen);

return res;
}

static gchar *
rspamd_multipattern_escape_generic_hyperscan (const gchar *pattern)
{
const gchar *p;
gchar *res, *d, t;
gsize len, slen;

slen = strlen (pattern);
len = slen;

p = pattern;

/* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
while (*p) {
t = *p ++;

switch (t) {
case '[':
case ']':
case '-':
case '\\':
case '{':
case '}':
case '(':
case ')':
case '*':
case '+':
case '?':
case '.':
case ',':
case '^':
case '$':
case '|':
case '#':
len ++;
break;
default:
if (g_ascii_isspace (t)) {
len ++;
}
break;
}
}

if (slen == len) {
return g_strdup (pattern);
}

res = g_malloc (len + 1);
p = pattern;
d = res;

while (*p) {
t = *p ++;

switch (t) {
case '[':
case ']':
case '-':
case '\\':
case '{':
case '}':
case '(':
case ')':
case '*':
case '+':
case '?':
case '.':
case ',':
case '^':
case '$':
case '|':
case '#':
*d++ = '\\';
break;
default:
if (g_ascii_isspace (t)) {
*d++ = '\\';
}
break;
}
slen += rspamd_strlcpy (res + slen, p, len + 1 - slen);

*d++ = t;
}

*d = '\0';
*dst_len = slen;

return res;
}

static gchar *
rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern)
rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen,
gsize *dst_len, gboolean allow_glob)
{
const gchar *p;
const gchar *p, *end = pattern + slen;
gchar *res, *d, t;
gsize len, slen;
gsize len;
static const gchar hexdigests[16] = "0123456789abcdef";

slen = strlen (pattern);
len = slen;

p = pattern;

/* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
while (*p) {
while (p < end) {
t = *p ++;

switch (t) {
@@ -235,11 +147,16 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern)
if (g_ascii_isspace (t)) {
len ++;
}
else if (!g_ascii_isprint (t)) {
/* \\xHH -> 4 symbols */
len += 3;
}
break;
}
}

if (slen == len) {
*dst_len = slen;
return g_strdup (pattern);
}

@@ -259,7 +176,6 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern)
case '}':
case '(':
case ')':
case '+':
case '.':
case ',':
case '^':
@@ -270,13 +186,26 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern)
break;
case '*':
case '?':
/* Treat * as .* and ? as .? */
*d++ = '.';
case '+':
if (allow_glob) {
/* Treat * as .* and ? as .? */
*d++ = '.';
}
else {
*d++ = '\\';
}
break;
default:
if (g_ascii_isspace (t)) {
*d++ = '\\';
}
else if (!g_ascii_isgraph (t)) {
*d++ = '\\';
*d++ = 'x';
*d++ = hexdigests[((t >> 4) & 0xF)];
*d++ = hexdigests[((t) & 0xF)];
continue; /* To avoid *d++ = t; */
}
break;
}

@@ -284,15 +213,17 @@ rspamd_multipattern_escape_glob_hyperscan (const gchar *pattern)
}

*d = '\0';
*dst_len = d - res;

return res;
}

#else
static gchar *
rspamd_multipattern_escape_tld_acism (const gchar *pattern)
rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
gsize *dst_len)
{
gsize len, slen;
gsize dlen, slen;
const gchar *p, *prefix;
gchar *res;

@@ -302,11 +233,11 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern)
* 2) *.blah -> \\..*\\.blah
* 3) ???
*/
slen = strlen (pattern);
slen = len;

if (pattern[0] == '*') {
len = slen;
p = strchr (pattern, '.');
dlen = slen;
p = memchr (pattern, '.', len);

if (p == NULL) {
/* XXX: bad */
@@ -316,17 +247,22 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern)
p ++;
}

dlen -= p - pattern;
prefix = ".";
dlen ++;
}
else {
len = slen + 1;
dlen = slen + 1;
prefix = ".";
p = pattern;
}

res = g_malloc (len + 1);
slen = rspamd_strlcpy (res, prefix, len + 1);
rspamd_strlcpy (res + slen, p, len + 1 - slen);
res = g_malloc (dlen + 1);
slen = strlen (prefix);
memcpy (res, prefix, slen);
memcpy (res + slen, p, dlen - slen);

*dst_len = dlen;

return res;
}
@@ -335,28 +271,37 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern)
* Escapes special characters from specific pattern
*/
static gchar *
rspamd_multipattern_pattern_filter (const gchar *pattern,
enum rspamd_multipattern_flags flags)
rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
enum rspamd_multipattern_flags flags,
gsize *dst_len)
{
gchar *ret = NULL;
#ifdef WITH_HYPERSCAN
if (flags & RSPAMD_MULTIPATTERN_TLD) {
return rspamd_multipattern_escape_tld_hyperscan (pattern);
ret = rspamd_multipattern_escape_tld_hyperscan (pattern, len, dst_len);
}
else if (flags & RSPAMD_MULTIPATTERN_RE) {
return g_strdup (pattern);
ret = malloc (len + 1);
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
}
else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
return rspamd_multipattern_escape_glob_hyperscan (pattern);
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE);
}
else {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE);
}

return rspamd_multipattern_escape_generic_hyperscan (pattern);
#else
if (flags & RSPAMD_MULTIPATTERN_TLD) {
return rspamd_multipattern_escape_tld_acism (pattern);
ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
}
else {
ret = malloc (len);
memcpy (ret, pattern, len);
*dst_len = len;
}

return g_strdup (pattern);
#endif

return ret;
}

struct rspamd_multipattern *
@@ -404,6 +349,17 @@ void
rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp,
const gchar *pattern, gint flags)
{
g_assert (pattern != NULL);

rspamd_multipattern_add_pattern_len (mp, pattern, strlen (pattern), flags);
}

void
rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp,
const gchar *pattern, gsize patlen, gint flags)
{
gsize dlen;

g_assert (pattern != NULL);
g_assert (mp != NULL);
g_assert (!mp->compiled);
@@ -420,16 +376,16 @@ rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp,
}

g_array_append_val (mp->hs_flags, fl);
np = rspamd_multipattern_pattern_filter (pattern, flags);
np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
g_array_append_val (mp->hs_pats, np);
fl = mp->cnt;
g_array_append_val (mp->hs_ids, fl);
rspamd_cryptobox_hash_update (&mp->hash_state, np, strlen (np));
rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen);
#else
ac_trie_pat_t pat;

pat.ptr = rspamd_multipattern_pattern_filter (pattern, flags);
pat.len = strlen (pat.ptr);
pat.ptr = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
pat.len = dlen;

g_array_append_val (mp->pats, pat);
#endif

+ 11
- 1
src/libutil/multipattern.h View File

@@ -92,13 +92,23 @@ struct rspamd_multipattern *rspamd_multipattern_create_full (
enum rspamd_multipattern_flags flags);

/**
* Adds new pattern to match engine
* Adds new pattern to match engine from zero-terminated string
* @param mp
* @param pattern
*/
void rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp,
const gchar *pattern, gint flags);

/**
* Adds new pattern from arbitrary string
* @param mp
* @param pattern
* @param patlen
* @param flags
*/
void rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp,
const gchar *pattern, gsize patlen, gint flags);

/**
* Compiles multipattern structure
* @param mp

Loading…
Cancel
Save