aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-26 16:58:25 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-26 16:58:25 +0000
commit5e9cd04af5b9314defdf5bdc6b014b7cfefa896e (patch)
tree91623cd594c61de361227cdfe386329fed3b18eb /src/libstat
parent2dcb09b6eae0b21d4da68f9323be4cbfeb2b4237 (diff)
downloadrspamd-5e9cd04af5b9314defdf5bdc6b014b7cfefa896e.tar.gz
rspamd-5e9cd04af5b9314defdf5bdc6b014b7cfefa896e.zip
[Project] Use URLs TLDs instead of !!EX!! in stat tokens
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/tokenizers/tokenizers.c55
1 files changed, 39 insertions, 16 deletions
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index dcd556910..19a5dba98 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -245,6 +245,43 @@ rspamd_utf_word_valid (const gchar *text, const gchar *end,
} \
} while(0)
+static inline void
+rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
+{
+ rspamd_stat_token_t token;
+
+ memset (&token, 0, sizeof (token));
+
+ if (ex->type == RSPAMD_EXCEPTION_GENERIC) {
+ token.original.begin = "!!EX!!";
+ token.original.len = sizeof ("!!EX!!") - 1;
+ token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+
+ g_array_append_val (res, token);
+ token.flags = 0;
+ }
+ else if (ex->type == RSPAMD_EXCEPTION_URL) {
+ struct rspamd_url *uri;
+
+ uri = ex->ptr;
+
+ if (uri && uri->tldlen > 0) {
+ token.original.begin = uri->tld;
+ token.original.len = uri->tldlen;
+
+ }
+ else {
+ token.original.begin = "!!EX!!";
+ token.original.len = sizeof ("!!EX!!") - 1;
+ }
+
+ token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+ g_array_append_val (res, token);
+ token.flags = 0;
+ }
+}
+
+
GArray *
rspamd_tokenize_text (const gchar *text, gsize len,
const UText *utxt,
@@ -347,15 +384,7 @@ start_over:
while (cur && ex->pos <= last) {
/* We have an exception at the beginning, skip those */
last += ex->len;
-
- if (ex->type == RSPAMD_EXCEPTION_URL) {
- token.original.begin = "!!EX!!";
- token.original.len = sizeof ("!!EX!!") - 1;
- token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
-
- g_array_append_val (res, token);
- token.flags = 0;
- }
+ rspamd_tokenize_exception (ex, res);
if (last > p) {
/* Exception spread over the boundaries */
@@ -385,13 +414,7 @@ start_over:
/* Process the current exception */
last += ex->len + (ex->pos - last);
- if (ex->type == RSPAMD_EXCEPTION_URL) {
- token.original.begin = "!!EX!!";
- token.original.len = sizeof ("!!EX!!") - 1;
- token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
-
- g_array_append_val (res, token);
- }
+ rspamd_tokenize_exception (ex, res);
if (last > p) {
/* Exception spread over the boundaries */