aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/libstat/tokenizers/tokenizers.c55
-rw-r--r--src/lua/lua_util.c2
-rw-r--r--src/rspamd.h1
3 files changed, 41 insertions, 17 deletions
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index dcd556910..19a5dba98 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -245,6 +245,43 @@ rspamd_utf_word_valid (const gchar *text, const gchar *end,
} \
} while(0)
+static inline void
+rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
+{
+ rspamd_stat_token_t token;
+
+ memset (&token, 0, sizeof (token));
+
+ if (ex->type == RSPAMD_EXCEPTION_GENERIC) {
+ token.original.begin = "!!EX!!";
+ token.original.len = sizeof ("!!EX!!") - 1;
+ token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+
+ g_array_append_val (res, token);
+ token.flags = 0;
+ }
+ else if (ex->type == RSPAMD_EXCEPTION_URL) {
+ struct rspamd_url *uri;
+
+ uri = ex->ptr;
+
+ if (uri && uri->tldlen > 0) {
+ token.original.begin = uri->tld;
+ token.original.len = uri->tldlen;
+
+ }
+ else {
+ token.original.begin = "!!EX!!";
+ token.original.len = sizeof ("!!EX!!") - 1;
+ }
+
+ token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+ g_array_append_val (res, token);
+ token.flags = 0;
+ }
+}
+
+
GArray *
rspamd_tokenize_text (const gchar *text, gsize len,
const UText *utxt,
@@ -347,15 +384,7 @@ start_over:
while (cur && ex->pos <= last) {
/* We have an exception at the beginning, skip those */
last += ex->len;
-
- if (ex->type == RSPAMD_EXCEPTION_URL) {
- token.original.begin = "!!EX!!";
- token.original.len = sizeof ("!!EX!!") - 1;
- token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
-
- g_array_append_val (res, token);
- token.flags = 0;
- }
+ rspamd_tokenize_exception (ex, res);
if (last > p) {
/* Exception spread over the boundaries */
@@ -385,13 +414,7 @@ start_over:
/* Process the current exception */
last += ex->len + (ex->pos - last);
- if (ex->type == RSPAMD_EXCEPTION_URL) {
- token.original.begin = "!!EX!!";
- token.original.len = sizeof ("!!EX!!") - 1;
- token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
-
- g_array_append_val (res, token);
- }
+ rspamd_tokenize_exception (ex, res);
if (last > p) {
/* Exception spread over the boundaries */
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index a064bce5b..377e08f70 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1120,7 +1120,7 @@ lua_util_tokenize_text (lua_State *L)
ex = g_malloc0 (sizeof (*ex));
ex->pos = pos;
ex->len = ex_len;
- ex->type = RSPAMD_EXCEPTION_URL;
+ ex->type = RSPAMD_EXCEPTION_GENERIC;
exceptions = g_list_prepend (exceptions, ex);
}
}
diff --git a/src/rspamd.h b/src/rspamd.h
index 80149a8e0..74f08c2d3 100644
--- a/src/rspamd.h
+++ b/src/rspamd.h
@@ -282,6 +282,7 @@ struct rspamd_main {
enum rspamd_exception_type {
RSPAMD_EXCEPTION_NEWLINE = 0,
RSPAMD_EXCEPTION_URL,
+ RSPAMD_EXCEPTION_GENERIC,
};
/**
* Structure to point exception in text from processing