]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Use URLs TLDs instead of !!EX!! in stat tokens
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 26 Nov 2018 16:58:25 +0000 (16:58 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 26 Nov 2018 16:58:25 +0000 (16:58 +0000)
src/libstat/tokenizers/tokenizers.c
src/lua/lua_util.c
src/rspamd.h

index dcd5569109c4997a320ae17872fd714b9ce6c149..19a5dba98c63fd49f72e05ac182cf08294e4d087 100644 (file)
@@ -245,6 +245,43 @@ rspamd_utf_word_valid (const gchar *text, const gchar *end,
     } \
 } while(0)
 
+static inline void
+rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
+{
+       rspamd_stat_token_t token;
+
+       memset (&token, 0, sizeof (token));
+
+       if (ex->type == RSPAMD_EXCEPTION_GENERIC) {
+               token.original.begin = "!!EX!!";
+               token.original.len = sizeof ("!!EX!!") - 1;
+               token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+
+               g_array_append_val (res, token);
+               token.flags = 0;
+       }
+       else if (ex->type == RSPAMD_EXCEPTION_URL) {
+               struct rspamd_url *uri;
+
+               uri = ex->ptr;
+
+               if (uri && uri->tldlen > 0) {
+                       token.original.begin = uri->tld;
+                       token.original.len = uri->tldlen;
+
+               }
+               else {
+                       token.original.begin = "!!EX!!";
+                       token.original.len = sizeof ("!!EX!!") - 1;
+               }
+
+               token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+               g_array_append_val (res, token);
+               token.flags = 0;
+       }
+}
+
+
 GArray *
 rspamd_tokenize_text (const gchar *text, gsize len,
                                          const UText *utxt,
@@ -347,15 +384,7 @@ start_over:
                                                while (cur && ex->pos <= last) {
                                                        /* We have an exception at the beginning, skip those */
                                                        last += ex->len;
-
-                                                       if (ex->type == RSPAMD_EXCEPTION_URL) {
-                                                               token.original.begin = "!!EX!!";
-                                                               token.original.len = sizeof ("!!EX!!") - 1;
-                                                               token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
-
-                                                               g_array_append_val (res, token);
-                                                               token.flags = 0;
-                                                       }
+                                                       rspamd_tokenize_exception (ex, res);
 
                                                        if (last > p) {
                                                                /* Exception spread over the boundaries */
@@ -385,13 +414,7 @@ start_over:
                                                        /* Process the current exception */
                                                        last += ex->len + (ex->pos - last);
 
-                                                       if (ex->type == RSPAMD_EXCEPTION_URL) {
-                                                               token.original.begin = "!!EX!!";
-                                                               token.original.len = sizeof ("!!EX!!") - 1;
-                                                               token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
-
-                                                               g_array_append_val (res, token);
-                                                       }
+                                                       rspamd_tokenize_exception (ex, res);
 
                                                        if (last > p) {
                                                                /* Exception spread over the boundaries */
index a064bce5b10b6bc2612c409c813bf26deaa33e27..377e08f707ea12f04a6ef6a91145270a53e6a1fb 100644 (file)
@@ -1120,7 +1120,7 @@ lua_util_tokenize_text (lua_State *L)
                                        ex = g_malloc0 (sizeof (*ex));
                                        ex->pos = pos;
                                        ex->len = ex_len;
-                                       ex->type = RSPAMD_EXCEPTION_URL;
+                                       ex->type = RSPAMD_EXCEPTION_GENERIC;
                                        exceptions = g_list_prepend (exceptions, ex);
                                }
                        }
index 80149a8e0be9c4078706d5e386ff48a9878b2d6e..74f08c2d38368f6e8c27d29ba2ea71ca6ae129cf 100644 (file)
@@ -282,6 +282,7 @@ struct rspamd_main {
 enum rspamd_exception_type {
        RSPAMD_EXCEPTION_NEWLINE = 0,
        RSPAMD_EXCEPTION_URL,
+       RSPAMD_EXCEPTION_GENERIC,
 };
 /**
  * Structure to point exception in text from processing