diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 55 | ||||
-rw-r--r-- | src/lua/lua_util.c | 2 | ||||
-rw-r--r-- | src/rspamd.h | 1 |
3 files changed, 41 insertions, 17 deletions
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index dcd556910..19a5dba98 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -245,6 +245,43 @@ rspamd_utf_word_valid (const gchar *text, const gchar *end, } \ } while(0) +static inline void +rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res) +{ + rspamd_stat_token_t token; + + memset (&token, 0, sizeof (token)); + + if (ex->type == RSPAMD_EXCEPTION_GENERIC) { + token.original.begin = "!!EX!!"; + token.original.len = sizeof ("!!EX!!") - 1; + token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; + + g_array_append_val (res, token); + token.flags = 0; + } + else if (ex->type == RSPAMD_EXCEPTION_URL) { + struct rspamd_url *uri; + + uri = ex->ptr; + + if (uri && uri->tldlen > 0) { + token.original.begin = uri->tld; + token.original.len = uri->tldlen; + + } + else { + token.original.begin = "!!EX!!"; + token.original.len = sizeof ("!!EX!!") - 1; + } + + token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; + g_array_append_val (res, token); + token.flags = 0; + } +} + + GArray * rspamd_tokenize_text (const gchar *text, gsize len, const UText *utxt, @@ -347,15 +384,7 @@ start_over: while (cur && ex->pos <= last) { /* We have an exception at the beginning, skip those */ last += ex->len; - - if (ex->type == RSPAMD_EXCEPTION_URL) { - token.original.begin = "!!EX!!"; - token.original.len = sizeof ("!!EX!!") - 1; - token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; - - g_array_append_val (res, token); - token.flags = 0; - } + rspamd_tokenize_exception (ex, res); if (last > p) { /* Exception spread over the boundaries */ @@ -385,13 +414,7 @@ start_over: /* Process the current exception */ last += ex->len + (ex->pos - last); - if (ex->type == RSPAMD_EXCEPTION_URL) { - token.original.begin = "!!EX!!"; - token.original.len = sizeof ("!!EX!!") - 1; - token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; - - g_array_append_val (res, token); - } + rspamd_tokenize_exception (ex, res); if (last > p) { /* Exception spread over the boundaries */ diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index a064bce5b..377e08f70 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1120,7 +1120,7 @@ lua_util_tokenize_text (lua_State *L) ex = g_malloc0 (sizeof (*ex)); ex->pos = pos; ex->len = ex_len; - ex->type = RSPAMD_EXCEPTION_URL; + ex->type = RSPAMD_EXCEPTION_GENERIC; exceptions = g_list_prepend (exceptions, ex); } } diff --git a/src/rspamd.h b/src/rspamd.h index 80149a8e0..74f08c2d3 100644 --- a/src/rspamd.h +++ b/src/rspamd.h @@ -282,6 +282,7 @@ struct rspamd_main { enum rspamd_exception_type { RSPAMD_EXCEPTION_NEWLINE = 0, RSPAMD_EXCEPTION_URL, + RSPAMD_EXCEPTION_GENERIC, }; /** * Structure to point exception in text from processing |