From d2a938d398d31fae55e709a4e219b5acfa5c7622 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 13 Jul 2015 17:46:50 +0100 Subject: [PATCH] Rework parts and task structure: - Now text_parts, parts and received are arrays - Pre-allocate arrays with some reasonable defaults - Use arrays instead of lists in plugins and checks - Remove unused fields from task structure - Rework mime_foreach callback function - Remove deprecated scan_milliseconds field --- src/libmime/message.c | 74 +++---- src/libmime/mime_expressions.c | 262 +++++++++--------------- src/libmime/smtp_utils.c | 5 +- src/libserver/protocol.c | 15 +- src/libserver/roll_history.c | 2 +- src/libserver/task.c | 51 +++-- src/libserver/task.h | 108 +++++----- src/libstat/learn_cache/sqlite3_cache.c | 14 +- src/libstat/stat_process.c | 10 +- src/libutil/util.c | 5 +- src/libutil/util.h | 3 +- src/lua/lua_task.c | 40 ++-- src/lua/lua_trie.c | 11 +- src/plugins/chartable.c | 9 +- src/plugins/fuzzy_check.c | 27 ++- 15 files changed, 273 insertions(+), 363 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 7fbb46e58..8143469a3 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1334,7 +1334,7 @@ process_text_part (struct rspamd_task *task, text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY; text_part->orig = NULL; text_part->content = NULL; - task->text_parts = g_list_prepend (task->text_parts, text_part); + g_ptr_array_add (task->text_parts, text_part); return; } text_part->orig = part_content; @@ -1363,7 +1363,7 @@ process_text_part (struct rspamd_task *task, rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content); - task->text_parts = g_list_prepend (task->text_parts, text_part); + g_ptr_array_add (task->text_parts, text_part); } else if (g_mime_content_type_is_type (type, "text", "*")) { @@ -1377,7 +1377,7 @@ process_text_part (struct rspamd_task *task, text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY; text_part->orig = NULL; text_part->content = NULL; - task->text_parts = g_list_prepend (task->text_parts, text_part); + g_ptr_array_add (task->text_parts, text_part); return; } @@ -1388,7 +1388,7 @@ process_text_part (struct rspamd_task *task, text_part->orig = part_content; rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff); - task->text_parts = g_list_prepend (task->text_parts, text_part); + g_ptr_array_add (task->text_parts, text_part); } else { return; @@ -1434,6 +1434,12 @@ process_text_part (struct rspamd_task *task, } } +struct mime_foreach_data { + struct rspamd_task *task; + guint parser_recursion; + GMimeObject *parent; +}; + #ifdef GMIME24 static void mime_foreach_callback (GMimeObject * parent, @@ -1444,15 +1450,15 @@ static void mime_foreach_callback (GMimeObject * part, gpointer user_data) #endif { - struct rspamd_task *task = (struct rspamd_task *)user_data; + struct mime_foreach_data *md = user_data; + struct rspamd_task *task; struct mime_part *mime_part; GMimeContentType *type; GMimeDataWrapper *wrapper; GMimeStream *part_stream; GByteArray *part_content; - task->parts_count++; - + task = md->task; /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ /* find out what class 'part' is... */ @@ -1466,15 +1472,15 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) g_mime_message_foreach_part() again here. */ message = g_mime_message_part_get_message ((GMimeMessagePart *) part); - if (task->scan_milliseconds++ < RECURSION_LIMIT) { + if (md->parser_recursion++ < RECURSION_LIMIT) { #ifdef GMIME24 - g_mime_message_foreach (message, mime_foreach_callback, task); + g_mime_message_foreach (message, mime_foreach_callback, md); #else - g_mime_message_foreach_part (message, mime_foreach_callback, task); + g_mime_message_foreach_part (message, mime_foreach_callback, md); #endif } else { - msg_err ("too deep mime recursion detected: %d", task->scan_milliseconds); + msg_err ("too deep mime recursion detected: %d", md->parser_recursion); return; } #ifndef GMIME24 @@ -1493,14 +1499,14 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) } else if (GMIME_IS_MULTIPART (part)) { /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ - task->parser_parent_part = part; + md->parent = part; #ifndef GMIME24 debug_task ("detected multipart part"); /* we'll get to finding out if this is a signed/encrypted multipart later... */ if (task->parser_recursion++ < RECURSION_LIMIT) { g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, - task); + md); } else { msg_err ("endless recursion detected: %d", task->parser_recursion); @@ -1563,7 +1569,7 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) mime_part->type = type; mime_part->content = part_content; - mime_part->parent = task->parser_parent_part; + mime_part->parent = md->parent; mime_part->filename = g_mime_part_get_filename (GMIME_PART ( part)); mime_part->mime = part; @@ -1571,13 +1577,13 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) debug_task ("found part with content-type: %s/%s", type->type, type->subtype); - task->parts = g_list_prepend (task->parts, mime_part); + g_ptr_array_add (task->parts, mime_part); /* Skip empty parts */ process_text_part (task, part_content, type, mime_part, - task->parser_parent_part, + md->parent, (part_content->len <= 0)); } else { @@ -1618,6 +1624,7 @@ rspamd_message_parse (struct rspamd_task *task) GList *first, *cur; GMimePart *part; GMimeDataWrapper *wrapper; + struct mime_foreach_data md; struct received_header *recv; gchar *mid, *url_str; const gchar *url_end, *p, *end; @@ -1675,25 +1682,21 @@ rspamd_message_parse (struct rspamd_task *task) task->message_id = "undef"; } - /* - * XXX: we use this strange value to save bytes in the task for - * saving foreach recursion - */ - task->scan_milliseconds = 0; + memset (&md, 0, sizeof (md)); + md.task = task; #ifdef GMIME24 - g_mime_message_foreach (message, mime_foreach_callback, task); + g_mime_message_foreach (message, mime_foreach_callback, &md); #else /* * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback * so we need to set up parent part by hands */ - task->parser_parent_part = g_mime_message_get_mime_part (message); - g_object_unref (task->parser_parent_part); - g_mime_message_foreach_part (message, mime_foreach_callback, task); + md.parent = g_mime_message_get_mime_part (message); + g_object_unref (md.parent); + g_mime_message_foreach_part (message, mime_foreach_callback, &md); #endif - task->scan_milliseconds = 0; - debug_task ("found %d parts in message", task->parts_count); + debug_task ("found %ud parts in message", task->parts->len); if (task->queue_id == NULL) { task->queue_id = "undef"; } @@ -1721,13 +1724,13 @@ rspamd_message_parse (struct rspamd_task *task) rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header)); parse_recv_header (task->task_pool, cur->data, recv); - task->received = g_list_prepend (task->received, recv); + g_ptr_array_add (task->received, recv); cur = g_list_next (cur); } /* Extract data from received header if we were not given IP */ if (task->received && (task->flags & RSPAMD_TASK_FLAG_NO_IP)) { - recv = task->received->data; + recv = g_ptr_array_index (task->received, 0); if (recv->real_ip) { if (!rspamd_parse_inet_address (&task->from_addr, recv->real_ip)) { msg_warn ("cannot get IP from received header: '%s'", @@ -1771,18 +1774,14 @@ rspamd_message_parse (struct rspamd_task *task) rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); - /* - * XXX: we use this strange value to save bytes in the task for - * saving foreach recursion - */ - task->scan_milliseconds = 0; + memset (&md, 0, sizeof (md)); + md.task = task; #ifdef GMIME24 - g_mime_message_foreach (task->message, mime_foreach_callback, task); + g_mime_message_foreach (task->message, mime_foreach_callback, &md); #else g_mime_message_foreach_part (task->message, mime_foreach_callback, - task); + &md); #endif - task->scan_milliseconds = 0; /* Generate message ID */ mid = g_mime_utils_generate_message_id ("localhost.localdomain"); rspamd_mempool_add_destructor (task->task_pool, @@ -1790,6 +1789,7 @@ rspamd_message_parse (struct rspamd_task *task) g_mime_message_set_message_id (task->message, mid); task->message_id = mid; task->queue_id = mid; + /* Set headers for message */ if (task->subject) { g_mime_message_set_subject (task->message, task->subject); diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index fd7e2e216..592667346 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -790,6 +790,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, const gchar *in; gint ret = 0; GList *cur, *headerlist; + guint i; rspamd_regexp_t *regexp; struct url_regexp_param callback_param = { .task = task, @@ -888,12 +889,11 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, break; case REGEXP_MIME: /* Iterate throught text parts */ - cur = g_list_first (task->text_parts); - while (cur) { - part = (struct mime_text_part *)cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); + /* Skip empty parts */ if (IS_PART_EMPTY (part)) { - cur = g_list_next (cur); continue; } @@ -918,11 +918,8 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, if (!re->is_multiple && ret) { break; } - - cur = g_list_next (cur); } - break; case REGEXP_MESSAGE: raw = TRUE; @@ -1179,7 +1176,6 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) { gint threshold, threshold2 = -1, diff; struct mime_text_part *p1, *p2; - GList *cur; struct expression_argument *arg; GMimeObject *parent; const GMimeContentType *ct; @@ -1244,18 +1240,12 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) } } - if (g_list_length (task->text_parts) == 2) { - cur = g_list_first (task->text_parts); - p1 = cur->data; - cur = g_list_next (cur); + if (task->text_parts->len == 2) { + p1 = g_ptr_array_index (task->text_parts, 0); + p2 = g_ptr_array_index (task->text_parts, 1); pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); *pdiff = -1; - if (cur == NULL) { - msg_info ("bad parts list"); - return FALSE; - } - p2 = cur->data; /* First of all check parent object */ if (p1->parent && p1->parent == p2->parent) { parent = p1->parent; @@ -1462,20 +1452,17 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, void *unused) { struct mime_text_part *p; - GList *cur; gboolean res = FALSE; - cur = g_list_first (task->text_parts); - while (cur) { - p = cur->data; + if (task->text_parts->len == 1) { + p = g_ptr_array_index (task->text_parts, 0); + if (IS_PART_HTML (p)) { res = TRUE; } else { res = FALSE; - break; } - cur = g_list_next (cur); } return res; @@ -1633,12 +1620,12 @@ gboolean rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) { struct mime_text_part *p; - GList *cur; + guint i; gboolean res = TRUE; - cur = g_list_first (task->text_parts); - while (cur) { - p = cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + + p = g_ptr_array_index (task->text_parts, i); if (!IS_PART_EMPTY (p) && IS_PART_HTML (p)) { if (p->flags & RSPAMD_MIME_PART_FLAG_BALANCED) { res = TRUE; @@ -1648,7 +1635,6 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) break; } } - cur = g_list_next (cur); } return res; @@ -1681,9 +1667,9 @@ gboolean rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) { struct mime_text_part *p; - GList *cur; struct expression_argument *arg; struct html_tag *tag; + guint i; gboolean res = FALSE; struct html_callback_data cd; @@ -1705,12 +1691,12 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) return FALSE; } - cur = g_list_first (task->text_parts); cd.res = &res; cd.tag = tag; - while (cur && res == FALSE) { - p = cur->data; + for (i = 0; i < task->text_parts->len && res; i ++) { + p = g_ptr_array_index (task->text_parts, i); + if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes) { g_node_traverse (p->html_nodes, G_PRE_ORDER, @@ -1719,7 +1705,6 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) search_html_node_callback, &cd); } - cur = g_list_next (cur); } return res; @@ -1730,17 +1715,15 @@ gboolean rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) { struct mime_text_part *p; - GList *cur; + guint i; gboolean res = FALSE; - cur = g_list_first (task->text_parts); + for (i = 0; i < task->text_parts->len && res; i ++) { + p = g_ptr_array_index (task->text_parts, i); - while (cur && res == FALSE) { - p = cur->data; if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes == NULL) { res = TRUE; } - cur = g_list_next (cur); } return res; @@ -1921,8 +1904,8 @@ rspamd_content_type_compare_param (struct rspamd_task * task, GMimeObject *part; GMimeContentType *ct; gint r; + guint i; gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; struct mime_part *cur_part; if (args == NULL || args->len < 2) { @@ -1935,14 +1918,15 @@ rspamd_content_type_compare_param (struct rspamd_task * task, param_name = arg->data; arg_pattern = &g_array_index (args, struct expression_argument, 1); - - part = g_mime_message_get_mime_part (task->message); - if (part) { + for (i = 0; i < task->text_parts->len; i ++) { + cur_part = g_ptr_array_index (task->text_parts, i); + part = cur_part->mime; ct = (GMimeContentType *)g_mime_object_get_content_type (part); + if (args->len >= 3) { arg1 = &g_array_index (args, struct expression_argument, 2); if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { + sizeof ("true") - 1) == 0) { recursive = TRUE; } } @@ -1955,55 +1939,38 @@ rspamd_content_type_compare_param (struct rspamd_task * task, recursive = TRUE; } } - - if (recursive) { - cur = task->parts; - } - #ifndef GMIME24 g_object_unref (part); #endif - for (;; ) { - if ((param_data = + + if ((param_data = g_mime_content_type_get_parameter ((GMimeContentType *)ct, - param_name)) == NULL) { - result = FALSE; - } - else { - if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) { - re = arg_pattern->data; - - if ((r = rspamd_task_re_cache_check (task, - rspamd_regexp_get_pattern (re))) == -1) { - r = rspamd_regexp_search (re, param_data, 0, - NULL, NULL, FALSE, NULL); - rspamd_task_re_cache_add (task, - rspamd_regexp_get_pattern (re), r); - } - } - else { - /* Just do strcasecmp */ - if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) { - return TRUE; - } - } - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; + param_name)) == NULL) { + result = FALSE; + } + else { + if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) { + re = arg_pattern->data; + + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, param_data, 0, + NULL, NULL, FALSE, NULL); + rspamd_task_re_cache_add (task, + rspamd_regexp_get_pattern (re), r); } - cur = g_list_next (cur); } else { - /* All is done */ - return result; + /* Just do strcasecmp */ + if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) { + return TRUE; + } } } + /* Get next part */ + if (!recursive) { + break; + } } return FALSE; @@ -2020,7 +1987,7 @@ rspamd_content_type_has_param (struct rspamd_task * task, GMimeObject *part; GMimeContentType *ct; gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; + guint i; struct mime_part *cur_part; if (args == NULL || args->len < 1) { @@ -2032,9 +1999,11 @@ rspamd_content_type_has_param (struct rspamd_task * task, g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); param_name = arg->data; - part = g_mime_message_get_mime_part (task->message); - if (part) { + for (i = 0; i < task->text_parts->len; i ++) { + cur_part = g_ptr_array_index (task->text_parts, i); + part = cur_part->mime; ct = (GMimeContentType *)g_mime_object_get_content_type (part); + if (args->len >= 2) { arg1 = &g_array_index (args, struct expression_argument, 2); if (g_ascii_strncasecmp (arg1->data, "true", @@ -2052,39 +2021,21 @@ rspamd_content_type_has_param (struct rspamd_task * task, } } - if (recursive) { - cur = task->parts; - } - #ifndef GMIME24 g_object_unref (part); #endif - for (;; ) { - if ((param_data = + if ((param_data = g_mime_content_type_get_parameter ((GMimeContentType *)ct, - param_name)) != NULL) { - return TRUE; - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; - } - cur = g_list_next (cur); - } - else { - /* All is done */ - return result; - } + param_name)) != NULL) { + return TRUE; + } + /* Get next part */ + if (!recursive) { + break; } - } - return TRUE; + return result; } static gboolean @@ -2098,19 +2049,22 @@ rspamd_content_type_check (struct rspamd_task *task, GMimeObject *part; GMimeContentType *ct; gint r; - gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; + guint i; + gboolean recursive = FALSE; struct mime_part *cur_part; - if (args == NULL) { + if (args == NULL || args->len < 1) { msg_warn ("no parameters to function"); return FALSE; } + arg_pattern = &g_array_index (args, struct expression_argument, 0); - part = g_mime_message_get_mime_part (task->message); - if (part) { + for (i = 0; i < task->text_parts->len; i ++) { + cur_part = g_ptr_array_index (task->text_parts, i); + part = cur_part->mime; ct = (GMimeContentType *)g_mime_object_get_content_type (part); + if (args->len >= 2) { arg1 = &g_array_index (args, struct expression_argument, 1); if (g_ascii_strncasecmp (arg1->data, "true", @@ -2128,56 +2082,38 @@ rspamd_content_type_check (struct rspamd_task *task, } } - if (recursive) { - cur = task->parts; - } - #ifndef GMIME24 g_object_unref (part); #endif - for (;;) { - - if (check_subtype) { - param_data = ct->subtype; - } - else { - param_data = ct->type; - } + if (check_subtype) { + param_data = ct->subtype; + } + else { + param_data = ct->type; + } - if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) { - re = arg_pattern->data; + if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) { + re = arg_pattern->data; - if ((r = rspamd_task_re_cache_check (task, - rspamd_regexp_get_pattern (re))) == -1) { - r = rspamd_regexp_search (re, param_data, 0, - NULL, NULL, FALSE, NULL); - rspamd_task_re_cache_add (task, - rspamd_regexp_get_pattern (re), r); - } - } - else { - /* Just do strcasecmp */ - if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) { - return TRUE; - } - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; - } - cur = g_list_next (cur); + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, param_data, 0, + NULL, NULL, FALSE, NULL); + rspamd_task_re_cache_add (task, + rspamd_regexp_get_pattern (re), r); } - else { - /* All is done */ - return result; + } + else { + /* Just do strcasecmp */ + if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) { + return TRUE; } } + /* Get next part */ + if (!recursive) { + break; + } } return FALSE; @@ -2258,16 +2194,15 @@ common_has_content_part (struct rspamd_task * task, { rspamd_regexp_t *re; struct mime_part *part; - GList *cur; GMimeContentType *ct; gint r; + guint i; - cur = g_list_first (task->parts); - while (cur) { - part = cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); ct = part->type; + if (ct == NULL) { - cur = g_list_next (cur); continue; } @@ -2304,7 +2239,6 @@ common_has_content_part (struct rspamd_task * task, } } } - cur = g_list_next (cur); } return FALSE; diff --git a/src/libmime/smtp_utils.c b/src/libmime/smtp_utils.c index 71f57a5b4..b3abe0302 100644 --- a/src/libmime/smtp_utils.c +++ b/src/libmime/smtp_utils.c @@ -212,9 +212,8 @@ smtp_metric_callback (gpointer key, gpointer value, gpointer ud) cd->log_size - cd->log_offset, "]), len: %z, time: %s,", task->msg.len, - calculate_check_time (task->time_real, task->time_virtual, - task->cfg->clock_res, - &task->scan_milliseconds)); + rspamd_log_check_time (task->time_real, task->time_virtual, + task->cfg->clock_res)); } gboolean diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 761b04a30..a4d78427f 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -642,15 +642,14 @@ rspamd_protocol_handle_request (struct rspamd_task *task, static void write_hashes_to_log (struct rspamd_task *task, GString *logbuf) { - GList *cur; struct mime_text_part *text_part; + guint i; - cur = task->text_parts; + for (i = 0; i < task->text_parts->len; i ++) { + text_part = g_ptr_array_index (task->text_parts, i); - while (cur) { - text_part = cur->data; if (text_part->fuzzy) { - if (cur->next != NULL) { + if (i != task->text_parts->len - 1) { rspamd_printf_gstring (logbuf, " part: %Xd,", text_part->fuzzy->h); @@ -660,7 +659,6 @@ write_hashes_to_log (struct rspamd_task *task, GString *logbuf) text_part->fuzzy->h); } } - cur = g_list_next (cur); } } @@ -923,10 +921,9 @@ rspamd_metric_result_ucl (struct rspamd_task *task, rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", task->msg.len, - calculate_check_time (task->time_real, + rspamd_log_check_time (task->time_real, task->time_virtual, - task->cfg->clock_res, - &task->scan_milliseconds), + task->cfg->clock_res), task->dns_requests); } diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c index c494ce74c..9effcfc3a 100644 --- a/src/libserver/roll_history.c +++ b/src/libserver/roll_history.c @@ -154,7 +154,7 @@ rspamd_roll_history_update (struct roll_history *history, } } - row->scan_time = task->scan_milliseconds; + row->scan_time = rspamd_get_ticks () - task->time_real; row->len = task->msg.len; row->completed = TRUE; } diff --git a/src/libserver/task.c b/src/libserver/task.c index 236f2918b..8dd682a1e 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -94,6 +94,16 @@ rspamd_task_new (struct rspamd_worker *worker) rspamd_mempool_add_destructor (new_task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, new_task->urls); + new_task->parts = g_ptr_array_sized_new (4); + rspamd_mempool_add_destructor (new_task->task_pool, + rspamd_ptr_array_free_hard, new_task->parts); + new_task->text_parts = g_ptr_array_sized_new (2); + rspamd_mempool_add_destructor (new_task->task_pool, + rspamd_ptr_array_free_hard, new_task->text_parts); + new_task->received = g_ptr_array_sized_new (8); + rspamd_mempool_add_destructor (new_task->task_pool, + rspamd_ptr_array_free_hard, new_task->received); + new_task->sock = -1; new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON); new_task->pre_result.action = METRIC_ACTION_NOACTION; @@ -159,57 +169,56 @@ rspamd_task_restore (void *arg) void rspamd_task_free (struct rspamd_task *task, gboolean is_soft) { - GList *part; struct mime_part *p; struct mime_text_part *tp; + guint i; if (task) { debug_task ("free pointer %p", task); - while ((part = g_list_first (task->parts))) { - task->parts = g_list_remove_link (task->parts, part); - p = (struct mime_part *) part->data; + + for (i = 0; i < task->parts->len; i ++) { + p = g_ptr_array_index (task->parts, i); g_byte_array_free (p->content, TRUE); - g_list_free_1 (part); } - if (task->text_parts) { - part = task->text_parts; - while (part) { - tp = (struct mime_text_part *)part->data; - if (tp->words) { - g_array_free (tp->words, TRUE); - } - if (tp->normalized_words) { - g_array_free (tp->normalized_words, TRUE); - } - part = g_list_next (part); - } - g_list_free (task->text_parts); + for (i = 0; i < task->text_parts->len; i ++) { + tp = g_ptr_array_index (task->text_parts, i); + if (tp->words) { + g_array_free (tp->words, TRUE); + } + if (tp->normalized_words) { + g_array_free (tp->normalized_words, TRUE); + } } + if (task->images) { g_list_free (task->images); } + if (task->messages) { g_list_free (task->messages); } - if (task->received) { - g_list_free (task->received); - } + if (task->http_conn != NULL) { rspamd_http_connection_unref (task->http_conn); } + if (task->sock != -1) { close (task->sock); } + if (task->settings != NULL) { ucl_object_unref (task->settings); } + if (task->client_addr) { rspamd_inet_address_destroy (task->client_addr); } + if (task->from_addr) { rspamd_inet_address_destroy (task->from_addr); } + if (task->err) { g_error_free (task->err); } diff --git a/src/libserver/task.h b/src/libserver/task.h index 79aa19fb5..e6894bea9 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -107,78 +107,72 @@ struct custom_command { * Worker task structure */ struct rspamd_task { - struct rspamd_worker *worker; /**< pointer to worker object */ - struct custom_command *custom_cmd; /**< custom command if any */ - guint processed_stages; /**< bits of stages that are processed */ - enum rspamd_command cmd; /**< command */ - gint sock; /**< socket descriptor */ - guint flags; /**< Bit flags */ - guint message_len; /**< Message length */ - - gchar *helo; /**< helo header value */ - gchar *queue_id; /**< queue id if specified */ - const gchar *message_id; /**< message id */ - - rspamd_inet_addr_t *from_addr; /**< from addr for a task */ - rspamd_inet_addr_t *client_addr; /**< address of connected socket */ - gchar *deliver_to; /**< address to deliver */ - gchar *user; /**< user to deliver */ - gchar *subject; /**< subject (for non-mime) */ - gchar *hostname; /**< hostname reported by MTA */ - GHashTable *request_headers; /**< HTTP headers in a request */ - GHashTable *reply_headers; /**< Custom reply headers */ + struct rspamd_worker *worker; /**< pointer to worker object */ + struct custom_command *custom_cmd; /**< custom command if any */ + guint processed_stages; /**< bits of stages that are processed */ + enum rspamd_command cmd; /**< command */ + gint sock; /**< socket descriptor */ + guint flags; /**< Bit flags */ + guint message_len; /**< Message length */ + guint32 dns_requests; /**< number of DNS requests per this task */ + gchar *helo; /**< helo header value */ + gchar *queue_id; /**< queue id if specified */ + const gchar *message_id; /**< message id */ + rspamd_inet_addr_t *from_addr; /**< from addr for a task */ + rspamd_inet_addr_t *client_addr; /**< address of connected socket */ + gchar *deliver_to; /**< address to deliver */ + gchar *user; /**< user to deliver */ + gchar *subject; /**< subject (for non-mime) */ + gchar *hostname; /**< hostname reported by MTA */ + GHashTable *request_headers; /**< HTTP headers in a request */ + GHashTable *reply_headers; /**< Custom reply headers */ struct { const gchar *start; gsize len; - } msg; /**< message buffer */ - struct rspamd_http_connection *http_conn; /**< HTTP server connection */ - struct rspamd_async_session * s; /**< async session object */ - gint parts_count; /**< mime parts count */ - GMimeMessage *message; /**< message, parsed with GMime */ - GMimeObject *parser_parent_part; /**< current parent part */ - GList *parts; /**< list of parsed parts */ - GList *text_parts; /**< list of text parts */ - rspamd_fstring_t raw_headers_content; /**< list of raw headers */ - GList *received; /**< list of received headers */ - GHashTable *urls; /**< list of parsed urls */ - GHashTable *emails; /**< list of parsed emails */ - GList *images; /**< list of images */ - GHashTable *raw_headers; /**< list of raw headers */ - GHashTable *results; /**< hash table of metric_result indexed by - * metric's name */ - GHashTable *tokens; /**< hash table of tokens indexed by tokenizer - * pointer */ - - InternetAddressList *rcpt_mime; /**< list of all recipients */ - InternetAddressList *rcpt_envelope; /**< list of all recipients */ + } msg; /**< message buffer */ + struct rspamd_http_connection *http_conn; /**< HTTP server connection */ + struct rspamd_async_session * s; /**< async session object */ + GMimeMessage *message; /**< message, parsed with GMime */ + GPtrArray *parts; /**< list of parsed parts */ + GPtrArray *text_parts; /**< list of text parts */ + rspamd_fstring_t raw_headers_content; /**< list of raw headers */ + GPtrArray *received; /**< list of received headers */ + GHashTable *urls; /**< list of parsed urls */ + GHashTable *emails; /**< list of parsed emails */ + GList *images; /**< list of images */ + GHashTable *raw_headers; /**< list of raw headers */ + GHashTable *results; /**< hash table of metric_result indexed by + * metric's name */ + GHashTable *tokens; /**< hash table of tokens indexed by tokenizer + * pointer */ + InternetAddressList *rcpt_mime; /**< list of all recipients */ + InternetAddressList *rcpt_envelope; /**< list of all recipients */ InternetAddressList *from_mime; InternetAddressList *from_envelope; - GList *messages; /**< list of messages that would be reported */ - GHashTable *re_cache; /**< cache for matched or not matched regexps */ - struct rspamd_config *cfg; /**< pointer to config object */ + GList *messages; /**< list of messages that would be reported */ + GHashTable *re_cache; /**< cache for matched or not matched regexps */ + struct rspamd_config *cfg; /**< pointer to config object */ GError *err; - rspamd_mempool_t *task_pool; /**< memory pool for task */ + rspamd_mempool_t *task_pool; /**< memory pool for task */ double time_real; double time_virtual; struct timeval tv; - guint32 scan_milliseconds; /**< how much milliseconds passed */ - gboolean (*fin_callback)(struct rspamd_task *task, void *arg); /**< calback for filters finalizing */ - void *fin_arg; /**< argument for fin callback */ - - guint32 dns_requests; /**< number of DNS requests per this task */ + gboolean (*fin_callback)(struct rspamd_task *task, void *arg); + /**< calback for filters finalizing */ + void *fin_arg; /**< argument for fin callback */ - struct rspamd_dns_resolver *resolver; /**< DNS resolver */ - struct event_base *ev_base; /**< Event base */ + struct rspamd_dns_resolver *resolver; /**< DNS resolver */ + struct event_base *ev_base; /**< Event base */ - gpointer checkpoint; /**< Opaque checkpoint data */ + gpointer checkpoint; /**< Opaque checkpoint data */ struct { - enum rspamd_metric_action action; /**< Action of pre filters */ - gchar *str; /**< String describing action */ - } pre_result; /**< Result of pre-filters */ + enum rspamd_metric_action action; /**< Action of pre filters */ + gchar *str; /**< String describing action */ + } pre_result; /**< Result of pre-filters */ - ucl_object_t *settings; /**< Settings applied to task */ + ucl_object_t *settings; /**< Settings applied to task */ }; /** diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index fa366877e..ebfb5510c 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -247,24 +247,20 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, blake2b_state st; rspamd_fstring_t *word; guchar out[BLAKE2B_OUTBYTES]; - GList *cur; - guint i; + guint i, j; if (ctx != NULL && ctx->db != NULL) { blake2b_init (&st, sizeof (out)); - cur = task->text_parts; - while (cur) { - part = (struct mime_text_part *)cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); if (part->words != NULL) { - for (i = 0; i < part->words->len; i ++) { - word = &g_array_index (part->words, rspamd_fstring_t, i); + for (j = 0; j < part->words->len; j ++) { + word = &g_array_index (part->words, rspamd_fstring_t, j); blake2b_update (&st, word->begin, word->len); } } - - cur = g_list_next (cur); } blake2b_final (&st, out, sizeof (out)); diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 9c261eccd..d147a29d3 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -308,12 +308,10 @@ rspamd_stat_process_tokenize (struct rspamd_tokenizer_config *cf, struct mime_text_part *part; GArray *words; gchar *sub; - GList *cur; - - cur = task->text_parts; + guint i; - while (cur != NULL) { - part = (struct mime_text_part *)cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); if (!IS_PART_EMPTY (part) && part->words != NULL) { if (compat) { @@ -325,8 +323,6 @@ rspamd_stat_process_tokenize (struct rspamd_tokenizer_config *cf, part->normalized_words, tok->tokens, IS_PART_UTF (part)); } } - - cur = g_list_next (cur); } if (task->subject != NULL) { diff --git a/src/libutil/util.c b/src/libutil/util.c index 8be3fc597..bc58fc70a 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -1191,8 +1191,7 @@ resolve_stat_filename (rspamd_mempool_t * pool, } const gchar * -calculate_check_time (gdouble start_real, gdouble start_virtual, gint resolution, - guint32 *scan_time) +rspamd_log_check_time (gdouble start_real, gdouble start_virtual, gint resolution) { double vdiff, diff, end_real, end_virtual; static gchar res[64]; @@ -1203,8 +1202,6 @@ calculate_check_time (gdouble start_real, gdouble start_virtual, gint resolution vdiff = (end_virtual - start_virtual) * 1000; diff = (end_real - start_real) * 1000; - *scan_time = diff; - sprintf (fmt, "%%.%dfms real, %%.%dfms virtual", resolution, resolution); snprintf (res, sizeof (res), fmt, diff, vdiff); diff --git a/src/libutil/util.h b/src/libutil/util.h index abc949dfc..d3dcdbcf7 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -158,8 +158,7 @@ gchar * resolve_stat_filename (rspamd_mempool_t *pool, gchar *from); const gchar * -calculate_check_time (gdouble start_real, gdouble start_virtual, gint resolution, - guint32 *scan_time); +rspamd_log_check_time (gdouble start_real, gdouble start_virtual, gint resolution); /* * File locking functions diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index e40fe0197..90aea155d 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -886,23 +886,22 @@ lua_task_get_emails (lua_State * L) static gint lua_task_get_text_parts (lua_State * L) { - gint i = 1; + guint i; struct rspamd_task *task = lua_check_task (L, 1); - GList *cur; struct mime_text_part *part, **ppart; if (task != NULL) { lua_newtable (L); - cur = task->text_parts; - while (cur) { - part = cur->data; + + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); ppart = lua_newuserdata (L, sizeof (struct mime_text_part *)); *ppart = part; rspamd_lua_setclass (L, "rspamd{textpart}", -1); /* Make it array */ - lua_rawseti (L, -2, i++); - cur = g_list_next (cur); + lua_rawseti (L, -2, i + 1); } + return 1; } lua_pushnil (L); @@ -912,22 +911,20 @@ lua_task_get_text_parts (lua_State * L) static gint lua_task_get_parts (lua_State * L) { - gint i = 1; + guint i; struct rspamd_task *task = lua_check_task (L, 1); - GList *cur; struct mime_part *part, **ppart; if (task != NULL) { lua_newtable (L); - cur = task->parts; - while (cur) { - part = cur->data; + + for (i = 0; i < task->parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); ppart = lua_newuserdata (L, sizeof (struct mime_part *)); *ppart = part; rspamd_lua_setclass (L, "rspamd{mimepart}", -1); /* Make it array */ - lua_rawseti (L, -2, i++); - cur = g_list_next (cur); + lua_rawseti (L, -2, i + 1); } return 1; } @@ -1153,23 +1150,23 @@ static gint lua_task_get_received_headers (lua_State * L) { struct rspamd_task *task = lua_check_task (L, 1); - GList *cur; struct received_header *rh; - gint i = 1; + guint i; if (task) { lua_newtable (L); - cur = g_list_first (task->received); - while (cur) { - rh = cur->data; + + for (i = 0; i < task->received->len; i ++) { + rh = g_ptr_array_index (task->received, i); + if (rh->is_error || G_UNLIKELY ( rh->from_ip == NULL && rh->real_ip == NULL && rh->real_hostname == NULL && rh->by_hostname == NULL)) { - cur = g_list_next (cur); continue; } + lua_newtable (L); rspamd_lua_table_set (L, "from_hostname", rh->from_hostname); lua_pushstring (L, "from_ip"); @@ -1180,8 +1177,7 @@ lua_task_get_received_headers (lua_State * L) rspamd_lua_ip_push_fromstring (L, rh->real_ip); lua_settable (L, -3); rspamd_lua_table_set (L, "by_hostname", rh->by_hostname); - lua_rawseti (L, -2, i++); - cur = g_list_next (cur); + lua_rawseti (L, -2, i + 1); } } else { diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c index f1b9088db..bceda4502 100644 --- a/src/lua/lua_trie.c +++ b/src/lua/lua_trie.c @@ -260,17 +260,14 @@ lua_trie_search_mime (lua_State *L) ac_trie_t *trie = lua_check_trie (L, 1); struct rspamd_task *task = lua_check_task (L, 2); struct mime_text_part *part; - GList *cur; const gchar *text; gint state = 0; - gsize len; + gsize len, i; gboolean found = FALSE; if (trie) { - cur = task->text_parts; - - while (cur) { - part = cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); if (!IS_PART_EMPTY (part) && part->content != NULL) { text = part->content->data; @@ -280,8 +277,6 @@ lua_trie_search_mime (lua_State *L) found = TRUE; } } - - cur = g_list_next (cur); } } diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index cbf23c336..cafef6d40 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -205,16 +205,15 @@ check_part (struct mime_text_part *part, gboolean raw_mode) static void chartable_symbol_callback (struct rspamd_task *task, void *unused) { - GList *cur; + guint i; struct mime_text_part *part; - cur = g_list_first (task->text_parts); - while (cur) { - part = cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); + if (!IS_PART_EMPTY (part) && check_part (part, task->cfg->raw_mode)) { rspamd_task_insert_result (task, chartable_module_ctx->symbol, 1, NULL); } - cur = g_list_next (cur); } } diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index c585df7a4..088a31979 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -960,16 +960,15 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, struct rspamd_image *image; struct rspamd_fuzzy_cmd *cmd; gsize hashlen; - GList *cur; + guint i; GPtrArray *res; - cur = task->text_parts; res = g_ptr_array_new (); - while (cur) { - part = cur->data; + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); + if (IS_PART_EMPTY (part)) { - cur = g_list_next (cur); continue; } @@ -977,17 +976,17 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, if (fuzzy_module_ctx->min_bytes > part->content->len) { msg_info ("<%s>, part is shorter than %d symbols, skip fuzzy check", task->message_id, fuzzy_module_ctx->min_bytes); - cur = g_list_next (cur); continue; } /* Check length of hash */ hashlen = strlen (part->fuzzy->hash_pipe); + if (hashlen == 0) { msg_info ("<%s>, part hash empty, skip fuzzy check", task->message_id, fuzzy_module_ctx->min_hash_len); - cur = g_list_next (cur); continue; } + if (fuzzy_module_ctx->min_hash_len != 0 && hashlen * part->fuzzy->block_size < fuzzy_module_ctx->min_hash_len) { @@ -995,7 +994,6 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, "<%s>, part hash is shorter than %d symbols, skip fuzzy check", task->message_id, fuzzy_module_ctx->min_hash_len); - cur = g_list_next (cur); continue; } @@ -1012,10 +1010,11 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, if (cmd) { g_ptr_array_add (res, cmd); } - - cur = g_list_next (cur); } + /* Process images */ + GList *cur; + cur = task->images; while (cur) { image = cur->data; @@ -1045,10 +1044,11 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, } cur = g_list_next (cur); } + /* Process other parts */ - cur = task->parts; - while (cur) { - mime_part = cur->data; + for (i = 0; i < task->parts->len; i ++) { + mime_part = g_ptr_array_index (task->parts, i); + if (mime_part->content->len > 0 && fuzzy_check_content_type (rule, mime_part->type)) { if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >= @@ -1071,7 +1071,6 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, } } } - cur = g_list_next (cur); } if (res->len == 0) { -- 2.39.5