]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Split message parsing and processing
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 6 Jun 2018 15:35:57 +0000 (16:35 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 6 Jun 2018 15:35:57 +0000 (16:35 +0100)
src/libmime/message.c
src/libmime/message.h
src/libserver/task.c
src/libserver/task.h
src/lua/lua_task.c
src/plugins/fuzzy_check.c
src/rspamadm/lua_repl.c

index 5681c3d001c9f8a43dcd6fc8e741e59f2209c65c..e5e43c5bec139160335ec73e9ee8131db22bb7df 100644 (file)
@@ -907,13 +907,10 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
 gboolean
 rspamd_message_parse (struct rspamd_task *task)
 {
-       struct rspamd_mime_text_part *p1, *p2;
        struct received_header *recv, *trecv;
        const gchar *p;
        gsize len;
        guint i;
-       gdouble diff, *pdiff;
-       guint tw, *ptw, dw;
        GError *err = NULL;
        rspamd_cryptobox_hash_state_t st;
        guchar digest_out[rspamd_cryptobox_HASHBYTES];
@@ -1016,16 +1013,6 @@ rspamd_message_parse (struct rspamd_task *task)
                task->queue_id = "undef";
        }
 
-       for (i = 0; i < task->parts->len; i ++) {
-               struct rspamd_mime_part *part;
-
-               part = g_ptr_array_index (task->parts, i);
-               rspamd_message_process_text_part (task, part);
-       }
-
-       rspamd_images_process (task);
-       rspamd_archives_process (task);
-
        if (task->received->len > 0) {
                gboolean need_recv_correction = FALSE;
                rspamd_inet_addr_t *raddr;
@@ -1130,6 +1117,50 @@ rspamd_message_parse (struct rspamd_task *task)
                                rspamd_url_task_subject_callback, task);
        }
 
+       for (i = 0; i < task->parts->len; i ++) {
+               struct rspamd_mime_part *part;
+
+               part = g_ptr_array_index (task->parts, i);
+               rspamd_cryptobox_hash_update (&st, part->digest, sizeof (part->digest));
+       }
+
+       rspamd_cryptobox_hash_final (&st, digest_out);
+       memcpy (task->digest, digest_out, sizeof (task->digest));
+
+       if (task->queue_id) {
+               msg_info_task ("loaded message; id: <%s>; queue-id: <%s>; size: %z; "
+                               "checksum: <%*xs>",
+                               task->message_id, task->queue_id, task->msg.len,
+                               (gint)sizeof (task->digest), task->digest);
+       }
+       else {
+               msg_info_task ("loaded message; id: <%s>; size: %z; "
+                               "checksum: <%*xs>",
+                               task->message_id, task->msg.len,
+                               (gint)sizeof (task->digest), task->digest);
+       }
+
+       return TRUE;
+}
+
+void
+rspamd_message_process (struct rspamd_task *task)
+{
+       guint i;
+       struct rspamd_mime_text_part *p1, *p2;
+       gdouble diff, *pdiff;
+       guint tw, *ptw, dw;
+
+       for (i = 0; i < task->parts->len; i ++) {
+               struct rspamd_mime_part *part;
+
+               part = g_ptr_array_index (task->parts, i);
+               rspamd_message_process_text_part (task, part);
+       }
+
+       rspamd_images_process (task);
+       rspamd_archives_process (task);
+
        /* Calculate distance for 2-parts messages */
        if (task->text_parts->len == 2) {
                p1 = g_ptr_array_index (task->text_parts, 0);
@@ -1144,7 +1175,7 @@ rspamd_message_parse (struct rspamd_task *task)
 
                        if (rspamd_ftok_cmp (&p1->mime_part->parent_part->ct->subtype, &srch) == 0) {
                                if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) &&
-                                               p1->normalized_hashes && p2->normalized_hashes) {
+                                       p1->normalized_hashes && p2->normalized_hashes) {
                                        /*
                                         * We also detect language on one part and propagate it to
                                         * another one
@@ -1219,13 +1250,6 @@ rspamd_message_parse (struct rspamd_task *task)
                }
        }
 
-       for (i = 0; i < task->parts->len; i ++) {
-               struct rspamd_mime_part *part;
-
-               part = g_ptr_array_index (task->parts, i);
-               rspamd_cryptobox_hash_update (&st, part->digest, sizeof (part->digest));
-       }
-
        /* Calculate average words length and number of short words */
        struct rspamd_mime_text_part *text_part;
        gdouble *var;
@@ -1258,24 +1282,6 @@ rspamd_message_parse (struct rspamd_task *task)
                        *var /= (double)total_words;
                }
        }
-
-       rspamd_cryptobox_hash_final (&st, digest_out);
-       memcpy (task->digest, digest_out, sizeof (task->digest));
-
-       if (task->queue_id) {
-               msg_info_task ("loaded message; id: <%s>; queue-id: <%s>; size: %z; "
-                               "checksum: <%*xs>",
-                               task->message_id, task->queue_id, task->msg.len,
-                               (gint)sizeof (task->digest), task->digest);
-       }
-       else {
-               msg_info_task ("loaded message; id: <%s>; size: %z; "
-                               "checksum: <%*xs>",
-                               task->message_id, task->msg.len,
-                               (gint)sizeof (task->digest), task->digest);
-       }
-
-       return TRUE;
 }
 
 
index 0ed2a5c663765936dec38728200ca9b67bf0429b..b16011666cb42f71c6c120c2d1a5f601774a80b1 100644 (file)
@@ -142,6 +142,12 @@ struct received_header {
  */
 gboolean rspamd_message_parse (struct rspamd_task *task);
 
+/**
+ * Process content in task (e.g. HTML parsing)
+ * @param task
+ */
+void rspamd_message_process (struct rspamd_task *task);
+
 /**
  * Get an array of header's values with specified header's name using raw headers
  * @param task worker task structure
index b5594816bf3def05ce8bde7b203e74196f93f4ab..9be780b1b44a1ec474dd3a9d7076e20f10aada99 100644 (file)
@@ -738,6 +738,12 @@ rspamd_task_process (struct rspamd_task *task, guint stages)
                                RSPAMD_TASK_STAGE_PRE_FILTERS);
                break;
 
+       case RSPAMD_TASK_STAGE_PROCESS_MESSAGE:
+               if (!(task->flags & RSPAMD_TASK_FLAG_SKIP_PROCESS)) {
+                       rspamd_message_process (task);
+               }
+               break;
+
        case RSPAMD_TASK_STAGE_FILTERS:
                rspamd_symbols_cache_process_symbols (task, task->cfg->cache,
                                RSPAMD_TASK_STAGE_FILTERS);
index e3c0492f6477f2f979bb65d31dbc86a1ddec461c..b9fd2f811d9ef652d9e9ce49edd79fed02ae11a6 100644 (file)
@@ -41,25 +41,27 @@ enum rspamd_task_stage {
        RSPAMD_TASK_STAGE_ENVELOPE = (1 << 1),
        RSPAMD_TASK_STAGE_READ_MESSAGE = (1 << 2),
        RSPAMD_TASK_STAGE_PRE_FILTERS = (1 << 3),
-       RSPAMD_TASK_STAGE_FILTERS = (1 << 4),
-       RSPAMD_TASK_STAGE_CLASSIFIERS_PRE = (1 << 5),
-       RSPAMD_TASK_STAGE_CLASSIFIERS = (1 << 6),
-       RSPAMD_TASK_STAGE_CLASSIFIERS_POST = (1 << 7),
-       RSPAMD_TASK_STAGE_COMPOSITES = (1 << 8),
-       RSPAMD_TASK_STAGE_POST_FILTERS = (1 << 9),
-       RSPAMD_TASK_STAGE_LEARN_PRE = (1 << 10),
-       RSPAMD_TASK_STAGE_LEARN = (1 << 11),
-       RSPAMD_TASK_STAGE_LEARN_POST = (1 << 12),
-       RSPAMD_TASK_STAGE_COMPOSITES_POST = (1 << 13),
-       RSPAMD_TASK_STAGE_IDEMPOTENT = (1 << 14),
-       RSPAMD_TASK_STAGE_DONE = (1 << 15),
-       RSPAMD_TASK_STAGE_REPLIED = (1 << 16)
+       RSPAMD_TASK_STAGE_PROCESS_MESSAGE = (1 << 4),
+       RSPAMD_TASK_STAGE_FILTERS = (1 << 5),
+       RSPAMD_TASK_STAGE_CLASSIFIERS_PRE = (1 << 6),
+       RSPAMD_TASK_STAGE_CLASSIFIERS = (1 << 7),
+       RSPAMD_TASK_STAGE_CLASSIFIERS_POST = (1 << 8),
+       RSPAMD_TASK_STAGE_COMPOSITES = (1 << 9),
+       RSPAMD_TASK_STAGE_POST_FILTERS = (1 << 10),
+       RSPAMD_TASK_STAGE_LEARN_PRE = (1 << 11),
+       RSPAMD_TASK_STAGE_LEARN = (1 << 12),
+       RSPAMD_TASK_STAGE_LEARN_POST = (1 << 13),
+       RSPAMD_TASK_STAGE_COMPOSITES_POST = (1 << 14),
+       RSPAMD_TASK_STAGE_IDEMPOTENT = (1 << 15),
+       RSPAMD_TASK_STAGE_DONE = (1 << 16),
+       RSPAMD_TASK_STAGE_REPLIED = (1 << 17)
 };
 
 #define RSPAMD_TASK_PROCESS_ALL (RSPAMD_TASK_STAGE_CONNECT | \
                RSPAMD_TASK_STAGE_ENVELOPE | \
                RSPAMD_TASK_STAGE_READ_MESSAGE | \
                RSPAMD_TASK_STAGE_PRE_FILTERS | \
+               RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
                RSPAMD_TASK_STAGE_FILTERS | \
                RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
                RSPAMD_TASK_STAGE_CLASSIFIERS | \
@@ -75,6 +77,7 @@ enum rspamd_task_stage {
 #define RSPAMD_TASK_PROCESS_LEARN (RSPAMD_TASK_STAGE_CONNECT | \
                RSPAMD_TASK_STAGE_ENVELOPE | \
                RSPAMD_TASK_STAGE_READ_MESSAGE | \
+               RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
                RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
                RSPAMD_TASK_STAGE_CLASSIFIERS | \
                RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
@@ -85,7 +88,7 @@ enum rspamd_task_stage {
 
 #define RSPAMD_TASK_FLAG_MIME (1 << 0)
 #define RSPAMD_TASK_FLAG_JSON (1 << 1)
-#define RSPAMD_TASK_FLAG_SKIP_EXTRA (1 << 2)
+#define RSPAMD_TASK_FLAG_SKIP_PROCESS (1 << 2)
 #define RSPAMD_TASK_FLAG_SKIP (1 << 3)
 #define RSPAMD_TASK_FLAG_EXT_URLS (1 << 4)
 #define RSPAMD_TASK_FLAG_SPAMC (1 << 5)
index 3aa24e88b731f630a8d26d7fa11e6aedadd7bdae..85459d44027e2a8be380d08c53f20fca1d936444 100644 (file)
@@ -1096,8 +1096,9 @@ lua_task_process_message (lua_State *L)
 
        if (task != NULL) {
                if (task->msg.len > 0) {
-                       if (rspamd_message_parse (task) == 0) {
+                       if (rspamd_message_parse (task)) {
                                lua_pushboolean (L, TRUE);
+                               rspamd_message_process (task);
                        }
                        else {
                                lua_pushboolean (L, FALSE);
index 0873d3c7addf98e0c41a0c1d7f2d6717708ae59f..c4318777f1b7c61a9ef3618a8c1860167a31ad6d 100644 (file)
@@ -3019,6 +3019,8 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
                                        "Message processing error");
                        return;
                }
+
+               rspamd_message_process (task);
        }
 
        PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
index e2f3a858129a1846c1124d697adc1f930e38a78a..a807101e0b7b15c5461deb6671c8466859a28b85 100644 (file)
@@ -390,6 +390,7 @@ rspamadm_lua_message_handler (lua_State *L, gint argc, gchar **argv)
                                continue;
                        }
 
+                       rspamd_message_process (task);
                        lua_pushcfunction (L, &rspamd_lua_traceback);
                        err_idx = lua_gettop (L);