diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-12 17:36:22 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-12 17:36:22 +0100 |
commit | 3a0e37802d0b77b11d0e4d1b0c988a1f7680d527 (patch) | |
tree | 846737c9d30a0a199dfe32f5963ea1e2f5330f3d | |
parent | 4c1013fbdd26d37a31ac5895e0ecefb248fa4c6d (diff) | |
parent | 5ccaf0c78a1b8ebcb75956941dcd648cf5614fe9 (diff) | |
download | rspamd-3a0e37802d0b77b11d0e4d1b0c988a1f7680d527.tar.gz rspamd-3a0e37802d0b77b11d0e4d1b0c988a1f7680d527.zip |
Merge pull request #2976 from rspamd/mime-rework
[Project] Mime rework #2970
44 files changed, 1188 insertions, 1154 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 06cc9fd30..88aea0b62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -771,6 +771,7 @@ IF(NOT CMAKE_C_OPT_FLAGS) ENDIF() ELSE(ENABLE_OPTIMIZATION MATCHES "ON") IF(ENABLE_FULL_DEBUG MATCHES "ON") + ADD_DEFINITIONS(-DFULL_DEBUG) SET(CMAKE_C_OPT_FLAGS "-g -O0") ELSE(ENABLE_FULL_DEBUG MATCHES "ON") SET(CMAKE_C_OPT_FLAGS "-g -O2") diff --git a/contrib/uthash/utlist.h b/contrib/uthash/utlist.h index c82dd916e..6c72b9fb2 100644 --- a/contrib/uthash/utlist.h +++ b/contrib/uthash/utlist.h @@ -28,7 +28,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <assert.h> -/* +/* * This file contains macros to manipulate singly and doubly-linked lists. * * 1. LL_ macros: singly-linked lists. @@ -38,7 +38,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * To use singly-linked lists, your structure must have a "next" pointer. * To use doubly-linked lists, your structure must "prev" and "next" pointers. * Either way, the pointer to the head of the list must be initialized to NULL. - * + * * ----------------.EXAMPLE ------------------------- * struct item { * int id; @@ -88,7 +88,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define _PREVASGN(elt,list,to,prev) { char **_alias = (char**)&((list)->prev); *_alias=(char*)(to); } #define _RS(list) { char **_alias = (char**)&(list); *_alias=_tmp; } #define _CASTASGN(a,b) { char **_alias = (char**)&(a); *_alias=(char*)(b); } -#else +#else #define _SV(elt,list) #define _NEXT(elt,list,next) ((elt)->next) #define _NEXTASGN(elt,list,to,next) ((elt)->next)=(to) @@ -312,7 +312,7 @@ do { #define LL_PREPEND2(head,add,next) \ do { \ (add)->next = head; \ - head = add; \ + (head) = (add); \ } while (0) #define LL_CONCAT(head1,head2) \ @@ -365,6 +365,15 @@ do { } \ } while (0) +#define LL_REVERSE2(head,next) do { \ + LDECLTYPE(head) _cur = (head), _p = NULL, _n = NULL; \ + while(_cur != NULL) { _n = _cur->next; _cur->next = _p; _p = _cur; _cur = _n; } \ + (head) = _p; \ +} while (0) + +#define LL_REVERSE(head) \ + LL_REVERSE2(head,next) + /* Here are VS2008 replacements for LL_APPEND and LL_DELETE */ #define LL_APPEND_VS2008(head,add) \ LL_APPEND2_VS2008(head,add,next) @@ -445,7 +454,7 @@ do { LL_FOREACH2(head,out,next) { \ if ((out)->field == (val)) break; \ } \ -} while(0) +} while(0) #define LL_SEARCH(head,out,elt,cmp) \ LL_SEARCH2(head,out,elt,cmp,next) @@ -455,7 +464,7 @@ do { LL_FOREACH2(head,out,next) { \ if ((cmp(out,elt))==0) break; \ } \ -} while(0) +} while(0) #define LL_REPLACE_ELEM(head, el, add) \ do { \ @@ -531,7 +540,7 @@ do { (head)->prev = (head); \ (head)->next = NULL; \ } \ -} while (0) +} while (0) #define DL_CONCAT(head1,head2) \ DL_CONCAT2(head1,head2,prev,next) @@ -549,7 +558,7 @@ do { (head1)=(head2); \ } \ } \ -} while (0) +} while (0) #define DL_DELETE(head,del) \ DL_DELETE2(head,del,prev,next) @@ -570,7 +579,7 @@ do { (head)->prev = (del)->prev; \ } \ } \ -} while (0) +} while (0) #define DL_COUNT(head,el,counter) \ DL_COUNT2(head,el,counter,next) \ @@ -674,7 +683,7 @@ do { (del)->prev->next = (del)->next; \ if ((del) == (head)) (head)=(del)->next; \ } \ -} while (0) +} while (0) #define CDL_COUNT(head,el,counter) \ CDL_COUNT2(head,el,counter,next) \ @@ -689,7 +698,7 @@ do { CDL_FOREACH2(head,el,next) #define CDL_FOREACH2(head,el,next) \ - for(el=head;el;el=((el)->next==head ? 0L : (el)->next)) + for(el=head;el;el=((el)->next==head ? 0L : (el)->next)) #define CDL_FOREACH_SAFE(head,el,tmp1,tmp2) \ CDL_FOREACH_SAFE2(head,el,tmp1,tmp2,prev,next) @@ -707,7 +716,7 @@ do { CDL_FOREACH2(head,out,next) { \ if ((out)->field == (val)) break; \ } \ -} while(0) +} while(0) #define CDL_SEARCH(head,out,elt,cmp) \ CDL_SEARCH2(head,out,elt,cmp,next) @@ -717,7 +726,7 @@ do { CDL_FOREACH2(head,out,next) { \ if ((cmp(out,elt))==0) break; \ } \ -} while(0) +} while(0) #define CDL_REPLACE_ELEM(head, el, add) \ do { \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 03fa61600..19945f62d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -164,10 +164,10 @@ ELSE() ADD_LIBRARY(rspamd-server SHARED ${RSPAMD_CRYPTOBOX} ${RSPAMD_UTIL} - ${RSPAMD_LUA} ${RSPAMD_SERVER} ${RSPAMD_STAT} ${RSPAMD_MIME} + ${RSPAMD_LUA} ${CMAKE_CURRENT_BINARY_DIR}/modules.c ${PLUGINSSRC} "${RAGEL_ragel_smtp_addr_OUTPUTS}" diff --git a/src/controller.c b/src/controller.c index 46c02d47c..bb8c67e6f 100644 --- a/src/controller.c +++ b/src/controller.c @@ -1870,7 +1870,8 @@ rspamd_controller_learn_fin_task (void *ud) session = conn_ent->ud; if (task->err != NULL) { - msg_info_session ("cannot learn <%s>: %e", task->message_id, task->err); + msg_info_session ("cannot learn <%s>: %e", + MESSAGE_FIELD (task, message_id), task->err); rspamd_controller_send_error (conn_ent, task->err->code, "%s", task->err->message); @@ -1882,13 +1883,14 @@ rspamd_controller_learn_fin_task (void *ud) msg_info_task ("<%s> learned message as %s: %s", rspamd_inet_address_to_string (session->from_addr), session->is_spam ? "spam" : "ham", - task->message_id); + MESSAGE_FIELD (task, message_id)); rspamd_controller_send_string (conn_ent, "{\"success\":true}"); return TRUE; } if (!rspamd_task_process (task, RSPAMD_TASK_PROCESS_LEARN)) { - msg_info_task ("cannot learn <%s>: %e", task->message_id, task->err); + msg_info_task ("cannot learn <%s>: %e", + MESSAGE_FIELD (task, message_id), task->err); if (task->err) { rspamd_controller_send_error (conn_ent, task->err->code, "%s", @@ -1909,7 +1911,7 @@ rspamd_controller_learn_fin_task (void *ud) msg_info_task ("<%s> learned message as %s: %s", rspamd_inet_address_to_string (session->from_addr), session->is_spam ? "spam" : "ham", - task->message_id); + MESSAGE_FIELD (task, message_id)); rspamd_controller_send_string (conn_ent, "{\"success\":true}"); } @@ -1948,7 +1950,8 @@ rspamd_controller_check_fin_task (void *ud) conn_ent = task->fin_arg; if (task->err) { - msg_info_task ("cannot check <%s>: %e", task->message_id, task->err); + msg_info_task ("cannot check <%s>: %e", + MESSAGE_FIELD (task, message_id), task->err); rspamd_controller_send_error (conn_ent, task->err->code, "%s", task->err->message); return TRUE; @@ -2027,7 +2030,8 @@ rspamd_controller_handle_learn_common ( rspamd_learn_task_spam (task, is_spam, session->classifier, NULL); if (!rspamd_task_process (task, RSPAMD_TASK_PROCESS_LEARN)) { - msg_warn_session ("<%s> message cannot be processed", task->message_id); + msg_warn_session ("<%s> message cannot be processed", + MESSAGE_FIELD (task, message_id)); goto end; } diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index 0a3939e54..0b4ebe614 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -440,7 +440,7 @@ rspamd_cryptobox_nm (rspamd_nm_t nm, g_assert (len == sizeof (s)); /* Still do hchacha iteration since we are not using SHA1 KDF */ - crypto_core_hchacha20 (nm, n0, s, NULL); + hchacha (s, n0, nm, 20); EC_KEY_free (lk); EC_POINT_free (ec_pub); diff --git a/src/libmime/archives.c b/src/libmime/archives.c index c19991eb6..b1c1624a4 100644 --- a/src/libmime/archives.c +++ b/src/libmime/archives.c @@ -1906,9 +1906,7 @@ rspamd_archives_process (struct rspamd_task *task) const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; const guchar gz_magic[] = {0x1F, 0x8B}; - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) { if (part->parsed_data.len > 0) { if (rspamd_archive_cheat_detect (part, "zip", diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h index 69aa2aa00..13e94f7cc 100644 --- a/src/libmime/email_addr.h +++ b/src/libmime/email_addr.h @@ -58,7 +58,7 @@ struct rspamd_email_address { guint flags; }; -struct received_header; +struct rspamd_received_header; struct rspamd_task; /** diff --git a/src/libmime/filter.c b/src/libmime/filter.c index a040cda1d..a193f5f78 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -139,14 +139,14 @@ rspamd_add_passthrough_result (struct rspamd_task *task, if (!isnan (target_score)) { msg_info_task ("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)", - task->message_id, action->name, + MESSAGE_FIELD_CHECK (task, message_id), action->name, flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", target_score, message, module, priority); } else { msg_info_task ("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)", - task->message_id, action->name, + MESSAGE_FIELD_CHECK (task, message_id), action->name, flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", message, module, priority); } diff --git a/src/libmime/images.c b/src/libmime/images.c index 787417ab3..cb59bc88e 100644 --- a/src/libmime/images.c +++ b/src/libmime/images.c @@ -54,9 +54,7 @@ rspamd_images_process (struct rspamd_task *task) RSPAMD_FTOK_ASSIGN (&srch, "image"); - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_ARCHIVE))) { if (rspamd_ftok_cmp (&part->ct->type, &srch) == 0 && part->parsed_data.len > 0) { @@ -603,17 +601,15 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) struct html_image *himg; const gchar *cid, *html_cid; guint cid_len, i, j; - GPtrArray *ar; struct rspamd_image *img; img = rspamd_maybe_process_image (task->task_pool, &part->parsed_data); if (img != NULL) { - msg_debug_images ("detected %s image of size %ud x %ud in message <%s>", + msg_debug_images ("detected %s image of size %ud x %ud", rspamd_image_type_str (img->type), - img->width, img->height, - task->message_id); + img->width, img->height); if (part->cd) { img->filename = &part->cd->filename; @@ -625,11 +621,10 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) part->specific.img = img; /* Check Content-Id */ - ar = rspamd_message_get_header_from_hash (part->raw_headers, - task->task_pool, "Content-Id", FALSE); + rh = rspamd_message_get_header_from_hash (part->raw_headers, + "Content-Id"); - if (ar != NULL && ar->len > 0) { - rh = g_ptr_array_index (ar, 0); + if (rh) { cid = rh->decoded; if (*cid == '<') { @@ -643,9 +638,8 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) cid_len --; } - for (i = 0; i < task->text_parts->len; i ++) { - tp = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { if (IS_PART_HTML (tp) && tp->html != NULL && tp->html->images != NULL) { for (j = 0; j < tp->html->images->len; j ++) { diff --git a/src/libmime/message.c b/src/libmime/message.c index 482287769..3efd30da0 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -34,8 +34,10 @@ #include <math.h> #include <unicode/uchar.h> +#include "sodium.h" #include "libserver/cfg_file_private.h" #include "lua/lua_common.h" +#include "contrib/uthash/utlist.h" #define GTUBE_SYMBOL "GTUBE" @@ -656,8 +658,8 @@ rspamd_check_gtube (struct rspamd_task *task, struct rspamd_mime_text_part *part task->flags |= RSPAMD_TASK_FLAG_SKIP; task->flags |= RSPAMD_TASK_FLAG_GTUBE; msg_info_task ( - "<%s>: gtube %s pattern has been found in part of length %ud", - task->message_id, rspamd_action_to_str (act), + "gtube %s pattern has been found in part of length %ud", + rspamd_action_to_str (act), part->utf_content->len); } } @@ -791,8 +793,8 @@ rspamd_message_process_html_text_part (struct rspamd_task *task, text_part->html, text_part->utf_raw_content, &text_part->exceptions, - task->urls, - task->emails); + MESSAGE_FIELD (task, urls), + MESSAGE_FIELD (task, emails)); if (text_part->utf_content->len == 0) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY; @@ -942,7 +944,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, } } - g_ptr_array_add (task->text_parts, text_part); + g_ptr_array_add (MESSAGE_FIELD (task, text_parts), text_part); mime_part->flags |= RSPAMD_MIME_PART_TEXT; mime_part->specific.txt = text_part; @@ -1061,12 +1063,9 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, part->raw_data.len = len; part->parsed_data.begin = start; part->parsed_data.len = len; - part->id = task->parts->len; - part->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); - part->headers_order = g_queue_new (); - - + part->id = MESSAGE_FIELD (task, parts)->len; + part->raw_headers = rspamd_message_headers_new (); + part->headers_order = NULL; tok = rspamd_task_get_request_header (task, "Filename"); @@ -1080,30 +1079,94 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, part->cd = rspamd_content_disposition_parse (cdbuf, strlen (cdbuf), task->task_pool); - g_ptr_array_add (task->parts, part); + g_ptr_array_add (MESSAGE_FIELD (task, parts), part); rspamd_mime_parser_calc_digest (part); /* Generate message ID */ mid = rspamd_mime_message_id_generate ("localhost.localdomain"); rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid); - task->message_id = mid; + MESSAGE_FIELD (task, message_id) = mid; task->queue_id = mid; } +static void +rspamd_message_dtor (struct rspamd_message *msg) +{ + guint i; + struct rspamd_mime_part *p; + struct rspamd_mime_text_part *tp; + + + PTR_ARRAY_FOREACH (msg->parts, i, p) { + if (p->raw_headers) { + rspamd_message_headers_destroy (p->raw_headers); + } + + if (IS_CT_MULTIPART (p->ct)) { + if (p->specific.mp->children) { + g_ptr_array_free (p->specific.mp->children, TRUE); + } + } + } + + PTR_ARRAY_FOREACH (msg->text_parts, i, tp) { + if (tp->utf_words) { + g_array_free (tp->utf_words, TRUE); + } + if (tp->normalized_hashes) { + g_array_free (tp->normalized_hashes, TRUE); + } + if (tp->languages) { + g_ptr_array_unref (tp->languages); + } + } + + g_ptr_array_unref (msg->text_parts); + g_ptr_array_unref (msg->parts); + + g_ptr_array_unref (msg->from_mime); + g_ptr_array_unref (msg->rcpt_mime); + + g_hash_table_unref (msg->urls); + g_hash_table_unref (msg->emails); +} + +struct rspamd_message* +rspamd_message_new (struct rspamd_task *task) +{ + struct rspamd_message *msg; + + msg = rspamd_mempool_alloc0 (task->task_pool, sizeof (*msg)); + + msg->raw_headers = rspamd_message_headers_new (); + + msg->emails = g_hash_table_new (rspamd_email_hash, rspamd_emails_cmp); + msg->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp); + + msg->parts = g_ptr_array_sized_new (4); + msg->text_parts = g_ptr_array_sized_new (2); + + msg->from_mime = g_ptr_array_sized_new (1); + msg->rcpt_mime = g_ptr_array_sized_new (1); + + REF_INIT_RETAIN (msg, rspamd_message_dtor); + + return msg; +} + gboolean rspamd_message_parse (struct rspamd_task *task) { - struct received_header *recv, *trecv; + struct rspamd_received_header *recv, *trecv; const gchar *p; gsize len; guint i; GError *err = NULL; - rspamd_cryptobox_hash_state_t st; - guchar digest_out[rspamd_cryptobox_HASHBYTES]; if (RSPAMD_TASK_IS_EMPTY (task)) { /* Don't do anything with empty task */ + task->flags |= RSPAMD_TASK_FLAG_SKIP_PROCESS; return TRUE; } @@ -1146,7 +1209,13 @@ rspamd_message_parse (struct rspamd_task *task) task->msg.begin = p; task->msg.len = len; - rspamd_cryptobox_hash_init (&st, NULL, 0); + + /* Cleanup old message */ + if (task->message) { + rspamd_message_unref (task->message); + } + + task->message = rspamd_message_new (task); if (task->flags & RSPAMD_TASK_FLAG_MIME) { enum rspamd_mime_parse_error ret; @@ -1190,20 +1259,20 @@ rspamd_message_parse (struct rspamd_task *task) } - if (task->message_id == NULL) { - task->message_id = "undef"; + if (MESSAGE_FIELD (task, message_id) == NULL) { + MESSAGE_FIELD (task, message_id) = "undef"; } - debug_task ("found %ud parts in message", task->parts->len); + debug_task ("found %ud parts in message", MESSAGE_FIELD (task, parts)->len); if (task->queue_id == NULL) { task->queue_id = "undef"; } - if (task->received->len > 0) { + if (MESSAGE_FIELD (task, received)) { gboolean need_recv_correction = FALSE; rspamd_inet_addr_t *raddr; - recv = g_ptr_array_index (task->received, 0); + recv = MESSAGE_FIELD (task, received); /* * For the first header we must ensure that * received is consistent with the IP that we obtain through @@ -1231,7 +1300,7 @@ rspamd_message_parse (struct rspamd_task *task) " not ours, prepend it with fake one"); trecv = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct received_header)); + sizeof (struct rspamd_received_header)); trecv->flags |= RSPAMD_RECEIVED_FLAG_ARTIFICIAL; if (task->flags & RSPAMD_TASK_FLAG_SSL) { @@ -1257,30 +1326,14 @@ rspamd_message_parse (struct rspamd_task *task) trecv->from_hostname = trecv->real_hostname; } -#ifdef GLIB_VERSION_2_40 - g_ptr_array_insert (task->received, 0, trecv); -#else - /* - * Unfortunately, before glib 2.40 we cannot insert element into a - * ptr array - */ - GPtrArray *nar = g_ptr_array_sized_new (task->received->len + 1); - - g_ptr_array_add (nar, trecv); - PTR_ARRAY_FOREACH (task->received, i, recv) { - g_ptr_array_add (nar, recv); - } - rspamd_mempool_add_destructor (task->task_pool, - rspamd_ptr_array_free_hard, nar); - task->received = nar; -#endif + DL_PREPEND (MESSAGE_FIELD (task, received), trecv); } } /* Extract data from received header if we were not given IP */ - if (task->received->len > 0 && (task->flags & RSPAMD_TASK_FLAG_NO_IP) && + if (MESSAGE_FIELD (task, received) && (task->flags & RSPAMD_TASK_FLAG_NO_IP) && (task->cfg && !task->cfg->ignore_received)) { - recv = g_ptr_array_index (task->received, 0); + recv = MESSAGE_FIELD (task, received); if (recv->real_ip) { if (!rspamd_parse_inet_address (&task->from_addr, recv->real_ip, @@ -1295,37 +1348,48 @@ rspamd_message_parse (struct rspamd_task *task) } } + struct rspamd_mime_part *part; + + /* Blake2b applied to string 'rspamd' */ + static const guchar RSPAMD_ALIGNED(32) hash_key[] = { + 0xef,0x43,0xae,0x80,0xcc,0x8d,0xc3,0x4c, + 0x6f,0x1b,0xd6,0x18,0x1b,0xae,0x87,0x74, + 0x0c,0xca,0xf7,0x8e,0x5f,0x2e,0x54,0x32, + 0xf6,0x79,0xb9,0x27,0x26,0x96,0x20,0x92, + 0x70,0x07,0x85,0xeb,0x83,0xf7,0x89,0xe0, + 0xd7,0x32,0x2a,0xd2,0x1a,0x64,0x41,0xef, + 0x49,0xff,0xc3,0x8c,0x54,0xf9,0x67,0x74, + 0x30,0x1e,0x70,0x2e,0xb7,0x12,0x09,0xfe, + }; + + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { + crypto_shorthash_siphashx24 (MESSAGE_FIELD (task, digest), + part->digest, sizeof (part->digest), + i == 0 ? hash_key : MESSAGE_FIELD (task, digest)); + } + /* Parse urls inside Subject header */ - if (task->subject) { - p = task->subject; + if (MESSAGE_FIELD (task, subject)) { + p = MESSAGE_FIELD (task, subject); len = strlen (p); - rspamd_cryptobox_hash_update (&st, p, len); + crypto_shorthash_siphashx24 (MESSAGE_FIELD (task, digest), p, len, + MESSAGE_FIELD (task, digest)); rspamd_url_find_multiple (task->task_pool, p, len, RSPAMD_URL_FIND_STRICT, NULL, rspamd_url_task_subject_callback, task); } - for (i = 0; i < task->parts->len; i ++) { - struct rspamd_mime_part *part; - - part = g_ptr_array_index (task->parts, i); - rspamd_cryptobox_hash_update (&st, part->digest, sizeof (part->digest)); - } - - rspamd_cryptobox_hash_final (&st, digest_out); - memcpy (task->digest, digest_out, sizeof (task->digest)); - if (task->queue_id) { msg_info_task ("loaded message; id: <%s>; queue-id: <%s>; size: %z; " "checksum: <%*xs>", - task->message_id, task->queue_id, task->msg.len, - (gint)sizeof (task->digest), task->digest); + MESSAGE_FIELD (task, message_id), task->queue_id, task->msg.len, + (gint)sizeof (MESSAGE_FIELD (task, digest)), MESSAGE_FIELD (task, digest)); } else { msg_info_task ("loaded message; id: <%s>; size: %z; " "checksum: <%*xs>", - task->message_id, task->msg.len, - (gint)sizeof (task->digest), task->digest); + MESSAGE_FIELD (task, message_id), task->msg.len, + (gint)sizeof (MESSAGE_FIELD (task, digest)), MESSAGE_FIELD (task, digest)); } return TRUE; @@ -1338,13 +1402,9 @@ rspamd_message_process (struct rspamd_task *task) struct rspamd_mime_text_part *p1, *p2; gdouble diff, *pdiff; guint tw, *ptw, dw; + struct rspamd_mime_part *part; - for (i = 0; i < task->parts->len; i ++) { - struct rspamd_mime_part *part; - - part = g_ptr_array_index (task->parts, i); - - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (!rspamd_message_process_text_part_maybe (task, part) && part->parsed_data.len > 0) { const gchar *mb = magic_buffer (task->cfg->libs_ctx->libmagic, @@ -1372,7 +1432,7 @@ rspamd_message_process (struct rspamd_task *task) gdouble *var; guint total_words = 0; - PTR_ARRAY_FOREACH (task->text_parts, i, text_part) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) { if (!text_part->language) { rspamd_mime_part_detect_language (task, text_part); } @@ -1385,9 +1445,9 @@ rspamd_message_process (struct rspamd_task *task) } /* Calculate distance for 2-parts messages */ - if (task->text_parts->len == 2) { - p1 = g_ptr_array_index (task->text_parts, 0); - p2 = g_ptr_array_index (task->text_parts, 1); + if (i == 2) { + p1 = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0); + p2 = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 1); /* First of all check parent object */ if (p1->mime_part->parent_part) { @@ -1499,97 +1559,31 @@ rspamd_message_process (struct rspamd_task *task) } -GPtrArray * -rspamd_message_get_header_from_hash (GHashTable *htb, - rspamd_mempool_t *pool, - const gchar *field, - gboolean strong) +struct rspamd_message * +rspamd_message_ref (struct rspamd_message *msg) { - GPtrArray *ret, *ar; - struct rspamd_mime_header *cur; - guint i; - - ar = g_hash_table_lookup (htb, field); - - if (ar == NULL) { - return NULL; - } - - if (strong && pool != NULL) { - /* Need to filter what we have */ - ret = g_ptr_array_sized_new (ar->len); + REF_RETAIN (msg); - PTR_ARRAY_FOREACH (ar, i, cur) { - if (strcmp (cur->name, field) != 0) { - continue; - } - - g_ptr_array_add (ret, cur); - } - - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard, ret); - } - else { - ret = ar; - } - - return ret; + return msg; } -GPtrArray * -rspamd_message_get_header_array (struct rspamd_task *task, - const gchar *field, - gboolean strong) +void rspamd_message_unref (struct rspamd_message *msg) { - return rspamd_message_get_header_from_hash (task->raw_headers, - task->task_pool, field, strong); + if (msg) { + REF_RELEASE (msg); + } } -GPtrArray * -rspamd_message_get_mime_header_array (struct rspamd_task *task, - const gchar *field, - gboolean strong) +void rspamd_message_update_digest (struct rspamd_message *msg, + const void *input, gsize len) { - GPtrArray *ret, *ar; - struct rspamd_mime_header *cur; - guint nelems = 0, i, j; - struct rspamd_mime_part *mp; - - for (i = 0; i < task->parts->len; i ++) { - mp = g_ptr_array_index (task->parts, i); - ar = g_hash_table_lookup (mp->raw_headers, field); - - if (ar == NULL) { - continue; - } - - nelems += ar->len; - } - - if (nelems == 0) { - return NULL; - } + guchar RSPAMD_ALIGNED(32) ex_key[crypto_shorthash_siphashx24_KEYBYTES]; - ret = g_ptr_array_sized_new (nelems); + /* Sanity */ + G_STATIC_ASSERT (sizeof (ex_key) == sizeof (msg->digest)); + G_STATIC_ASSERT (crypto_shorthash_siphashx24_BYTES == sizeof (msg->digest)); - for (i = 0; i < task->parts->len; i ++) { - mp = g_ptr_array_index (task->parts, i); - ar = g_hash_table_lookup (mp->raw_headers, field); + memcpy (ex_key, msg->digest, sizeof (msg->digest)); - PTR_ARRAY_FOREACH (ar, j, cur) { - if (strong) { - if (strcmp (cur->name, field) != 0) { - continue; - } - } - - g_ptr_array_add (ret, cur); - } - } - - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard, ret); - - return ret; -} + crypto_shorthash_siphashx24 (msg->digest, input, len, ex_key); +}
\ No newline at end of file diff --git a/src/libmime/message.h b/src/libmime/message.h index 17c4ec5b9..a6b6f1022 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -7,11 +7,14 @@ #define RSPAMD_MESSAGE_H #include "config.h" -#include "email_addr.h" -#include "addr.h" -#include "cryptobox.h" -#include "mime_headers.h" -#include "content_type.h" + +#include "libmime/email_addr.h" +#include "libutil/addr.h" +#include "libcryptobox/cryptobox.h" +#include "libmime/mime_headers.h" +#include "libmime/content_type.h" +#include "libutil/ref.h" +#include "libutil/str_util.h" #include <unicode/uchar.h> #include <unicode/utext.h> @@ -58,8 +61,8 @@ struct rspamd_mime_part { rspamd_ftok_t parsed_data; struct rspamd_mime_part *parent_part; - GQueue *headers_order; - GHashTable *raw_headers; + struct rspamd_mime_header *headers_order; + khash_t(rspamd_mime_headers_htb) *raw_headers; gchar *raw_headers_str; gsize raw_headers_len; @@ -129,6 +132,36 @@ struct rspamd_mime_text_part { guint unicode_scripts; }; +struct rspamd_message_raw_headers_content { + const gchar *begin; + gsize len; + const gchar *body_start; +}; + +struct rspamd_message { + const gchar *message_id; + gchar *subject; + + GPtrArray *parts; /**< list of parsed parts */ + GPtrArray *text_parts; /**< list of text parts */ + struct rspamd_message_raw_headers_content raw_headers_content; + struct rspamd_received_header *received; /**< list of received headers */ + GHashTable *urls; /**< list of parsed urls */ + GHashTable *emails; /**< list of parsed emails */ + khash_t(rspamd_mime_headers_htb) *raw_headers; /**< list of raw headers */ + struct rspamd_mime_header *headers_order; /**< order of raw headers */ + GPtrArray *rcpt_mime; + GPtrArray *from_mime; + guchar digest[16]; + enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ + ref_entry_t ref; +}; + +#define MESSAGE_FIELD(task, field) ((task)->message->field) +#define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? \ + (task)->message->field : \ + (__typeof__((task)->message->field))NULL) + /** * Parse and pre-process mime message * @param task worker_task object @@ -142,40 +175,6 @@ gboolean rspamd_message_parse (struct rspamd_task *task); */ void rspamd_message_process (struct rspamd_task *task); -/** - * Get an array of header's values with specified header's name using raw headers - * @param task worker task structure - * @param field header's name - * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not - * @return An array of header's values or NULL. It is NOT permitted to free array or values. - */ -GPtrArray *rspamd_message_get_header_array (struct rspamd_task *task, - const gchar *field, - gboolean strong); - -/** - * Get an array of mime parts header's values with specified header's name using raw headers - * @param task worker task structure - * @param field header's name - * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not - * @return An array of header's values or NULL. It is NOT permitted to free array or values. - */ -GPtrArray *rspamd_message_get_mime_header_array (struct rspamd_task *task, - const gchar *field, - gboolean strong); - -/** - * Get an array of header's values with specified header's name using raw headers - * @param htb hash table indexed by header name (caseless) with ptr arrays as elements - * @param field header's name - * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not - * @return An array of header's values or NULL. It is NOT permitted to free array or values. - */ -GPtrArray *rspamd_message_get_header_from_hash (GHashTable *htb, - rspamd_mempool_t *pool, - const gchar *field, - gboolean strong); - /** * Converts string to cte @@ -191,6 +190,21 @@ enum rspamd_cte rspamd_cte_from_string (const gchar *str); */ const gchar *rspamd_cte_to_string (enum rspamd_cte ct); +struct rspamd_message* rspamd_message_new (struct rspamd_task *task); + +struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg); + +void rspamd_message_unref (struct rspamd_message *msg); + +/** + * Updates digest of the message if modified + * @param msg + * @param input + * @param len + */ +void rspamd_message_update_digest (struct rspamd_message *msg, + const void *input, gsize len); + #ifdef __cplusplus } #endif diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 8dc7da12e..0fbba54b2 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -667,7 +667,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, } if (charset == NULL) { - msg_info_task ("<%s>: has invalid charset", task->message_id); + msg_info_task ("<%s>: has invalid charset", + MESSAGE_FIELD_CHECK (task, message_id)); SET_PART_RAW (text_part); text_part->utf_raw_content = part_content; @@ -690,7 +691,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, if (!rspamd_mime_text_part_utf8_convert (task, text_part, part_content, charset, &err)) { msg_warn_task ("<%s>: cannot convert from %s to utf8: %s", - task->message_id, + MESSAGE_FIELD (task, message_id), charset, err ? err->message : "unknown problem"); SET_PART_RAW (text_part); diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index d7622376c..975a112bc 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -1200,7 +1200,7 @@ gboolean rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused) { struct expression_argument *arg; - GPtrArray *headerlist; + struct rspamd_mime_header *rh; if (args == NULL || task == NULL) { return FALSE; @@ -1212,14 +1212,13 @@ rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused) return FALSE; } - headerlist = rspamd_message_get_header_array (task, - (gchar *)arg->data, - FALSE); + rh = rspamd_message_get_header_array (task, + (gchar *)arg->data); debug_task ("try to get header %s: %d", (gchar *)arg->data, - (headerlist != NULL)); + (rh != NULL)); - if (headerlist) { + if (rh) { return TRUE; } @@ -1344,11 +1343,11 @@ rspamd_recipients_distance (struct rspamd_task *task, GArray * args, return FALSE; } - if (!task->rcpt_mime) { + if (!MESSAGE_FIELD (task, rcpt_mime)) { return FALSE; } - num = task->rcpt_mime->len; + num = MESSAGE_FIELD (task, rcpt_mime)->len; if (num < MIN_RCPT_TO_COMPARE) { return FALSE; @@ -1357,7 +1356,7 @@ rspamd_recipients_distance (struct rspamd_task *task, GArray * args, ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list)); /* Fill array */ - PTR_ARRAY_FOREACH (task->rcpt_mime, i, cur) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, rcpt_mime), i, cur) { ar[i].name = cur->addr; ar[i].namelen = cur->addr_len; ar[i].addr = cur->domain; @@ -1391,8 +1390,8 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, struct rspamd_mime_text_part *p; gboolean res = FALSE; - if (task->text_parts->len == 1) { - p = g_ptr_array_index (task->text_parts, 0); + if (MESSAGE_FIELD (task, text_parts)->len == 1) { + p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0); if (IS_PART_HTML (p)) { res = TRUE; @@ -1445,8 +1444,8 @@ rspamd_is_recipients_sorted (struct rspamd_task * task, { /* Check all types of addresses */ - if (task->rcpt_mime) { - return is_recipient_list_sorted (task->rcpt_mime); + if (MESSAGE_FIELD (task, rcpt_mime)) { + return is_recipient_list_sorted (MESSAGE_FIELD (task, rcpt_mime)); } return FALSE; @@ -1480,7 +1479,7 @@ rspamd_compare_transfer_encoding (struct rspamd_task * task, return FALSE; } - PTR_ARRAY_FOREACH (task->parts, i, part) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (IS_CT_TEXT (part->ct)) { if (part->cte == cte) { return TRUE; @@ -1498,9 +1497,7 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) guint i; gboolean res = TRUE; - for (i = 0; i < task->text_parts->len; i ++) { - - p = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { if (IS_PART_HTML (p)) { if (p->flags & RSPAMD_MIME_TEXT_PART_FLAG_BALANCED) { res = TRUE; @@ -1535,9 +1532,7 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) return FALSE; } - for (i = 0; i < task->text_parts->len; i ++) { - p = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { if (IS_PART_HTML (p) && p->html) { res = rspamd_html_tag_seen (p->html, arg->data); } @@ -1558,9 +1553,7 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) guint i; gboolean res = FALSE; - for (i = 0; i < task->text_parts->len; i ++) { - p = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { if (IS_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) { res = TRUE; } @@ -1589,7 +1582,7 @@ rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused) return FALSE; } - return g_hash_table_lookup (task->raw_headers, arg->data) != NULL; + return rspamd_message_get_header_array (task, arg->data) != NULL; } static gboolean @@ -1679,7 +1672,7 @@ rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) case 's': case 'S': if (g_ascii_strcasecmp (type, "subject") == 0) { - str = task->subject; + str = MESSAGE_FIELD (task, subject); } else { msg_warn_task ("bad argument to function: %s", type); @@ -1794,9 +1787,7 @@ rspamd_content_type_compare_param (struct rspamd_task * task, param_name = arg->data; arg_pattern = &g_array_index (args, struct expression_argument, 1); - for (i = 0; i < task->parts->len; i ++) { - cur_part = g_ptr_array_index (task->parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) { if (args->len >= 3) { arg1 = &g_array_index (args, struct expression_argument, 2); if (g_ascii_strncasecmp (arg1->data, "true", @@ -1876,9 +1867,7 @@ rspamd_content_type_has_param (struct rspamd_task * task, g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); param_name = arg->data; - for (i = 0; i < task->parts->len; i ++) { - cur_part = g_ptr_array_index (task->parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) { if (args->len >= 2) { arg1 = &g_array_index (args, struct expression_argument, 1); if (g_ascii_strncasecmp (arg1->data, "true", @@ -1951,8 +1940,7 @@ rspamd_content_type_check (struct rspamd_task *task, arg_pattern = &g_array_index (args, struct expression_argument, 0); - for (i = 0; i < task->parts->len; i ++) { - cur_part = g_ptr_array_index (task->parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) { ct = cur_part->ct; if (args->len >= 2) { @@ -2091,8 +2079,7 @@ common_has_content_part (struct rspamd_task * task, gint r = 0; guint i; - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { ct = part->ct; if (ct == NULL) { @@ -2216,7 +2203,7 @@ rspamd_is_empty_body (struct rspamd_task *task, struct rspamd_mime_part *part; guint i; - PTR_ARRAY_FOREACH (task->parts, i, part) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (part->parsed_data.len > 0) { return FALSE; } diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index cf6d0f763..7c6c02709 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -20,14 +20,18 @@ #include "contrib/uthash/utlist.h" #include "libserver/mempool_vars_internal.h" #include "libserver/url.h" +#include "libutil/util.h" #include <unicode/utf8.h> +__KHASH_IMPL (rspamd_mime_headers_htb, static inline, gchar *, + struct rspamd_mime_header *, 1, rspamd_strcase_hash, rspamd_strcase_equal); + static void rspamd_mime_header_check_special (struct rspamd_task *task, struct rspamd_mime_header *rh) { guint64 h; - struct received_header *recv; + struct rspamd_received_header *recv; const gchar *p, *end; gchar *id; @@ -36,39 +40,43 @@ rspamd_mime_header_check_special (struct rspamd_task *task, switch (h) { case 0x88705DC4D9D61ABULL: /* received */ recv = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct received_header)); + sizeof (struct rspamd_received_header)); recv->hdr = rh; if (rspamd_smtp_received_parse (task, rh->decoded, strlen (rh->decoded), recv) != -1) { - g_ptr_array_add (task->received, recv); + DL_APPEND (MESSAGE_FIELD (task, received), recv); } - rh->type = RSPAMD_HEADER_RECEIVED; + rh->flags |= RSPAMD_HEADER_RECEIVED; break; case 0x76F31A09F4352521ULL: /* to */ - task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool, - rh->decoded, strlen (rh->decoded), task->rcpt_mime); - rh->type = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool, + rh->decoded, strlen (rh->decoded), + MESSAGE_FIELD (task, rcpt_mime)); + rh->flags |= RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; break; case 0x7EB117C1480B76ULL: /* cc */ - task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool, - rh->decoded, strlen (rh->decoded), task->rcpt_mime); - rh->type = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool, + rh->decoded, strlen (rh->decoded), + MESSAGE_FIELD (task, rcpt_mime)); + rh->flags |= RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; break; case 0xE4923E11C4989C8DULL: /* bcc */ - task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool, - rh->decoded, strlen (rh->decoded), task->rcpt_mime); - rh->type = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool, + rh->decoded, strlen (rh->decoded), + MESSAGE_FIELD (task, rcpt_mime)); + rh->flags |= RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; break; case 0x41E1985EDC1CBDE4ULL: /* from */ - task->from_mime = rspamd_email_address_from_mime (task->task_pool, - rh->decoded, strlen (rh->decoded), task->from_mime); - rh->type = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE; + MESSAGE_FIELD (task, from_mime) = rspamd_email_address_from_mime (task->task_pool, + rh->decoded, strlen (rh->decoded), + MESSAGE_FIELD (task, from_mime)); + rh->flags |= RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE; break; case 0x43A558FC7C240226ULL: /* message-id */ { - rh->type = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE; p = rh->decoded; end = p + strlen (p); @@ -98,29 +106,29 @@ rspamd_mime_header_check_special (struct rspamd_task *task, *d = '\0'; - task->message_id = id; + MESSAGE_FIELD (task, message_id) = id; } break; } case 0xB91D3910358E8212ULL: /* subject */ - if (task->subject == NULL) { - task->subject = rh->decoded; + if (MESSAGE_FIELD (task, subject) == NULL) { + MESSAGE_FIELD (task, subject) = rh->decoded; } - rh->type = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE; break; case 0xEE4AA2EAAC61D6F4ULL: /* return-path */ if (task->from_envelope == NULL) { task->from_envelope = rspamd_email_address_from_smtp (rh->decoded, strlen (rh->decoded)); } - rh->type = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE; break; case 0xB9EEFAD2E93C2161ULL: /* delivered-to */ if (task->deliver_to == NULL) { task->deliver_to = rh->decoded; } - rh->type = RSPAMD_HEADER_DELIVERED_TO; + rh->flags = RSPAMD_HEADER_DELIVERED_TO; break; case 0x2EC3BFF3C393FC10ULL: /* date */ case 0xAC0DDB1A1D214CAULL: /* sender */ @@ -128,31 +136,37 @@ rspamd_mime_header_check_special (struct rspamd_task *task, case 0x81CD9E9131AB6A9AULL: /* content-type */ case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */ case 0xB3F6704CB3AD6589ULL: /* references */ - rh->type = RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_UNIQUE; break; } } static void rspamd_mime_header_add (struct rspamd_task *task, - GHashTable *target, GQueue *order, - struct rspamd_mime_header *rh, - gboolean check_special) + khash_t(rspamd_mime_headers_htb) *target, + struct rspamd_mime_header **order_ptr, + struct rspamd_mime_header *rh, + gboolean check_special) { - GPtrArray *ar; + khiter_t k; + struct rspamd_mime_header *ex; + int res; + + k = kh_put (rspamd_mime_headers_htb, target, rh->name, &res); - if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) { - g_ptr_array_add (ar, rh); + if (res == 0) { + ex = kh_value (target, k); + DL_APPEND (ex, rh); msg_debug_task ("append raw header %s: %s", rh->name, rh->value); } else { - ar = g_ptr_array_sized_new (2); - g_ptr_array_add (ar, rh); - g_hash_table_insert (target, rh->name, ar); + kh_value (target, k) = rh; + rh->prev = rh; + rh->next = NULL; msg_debug_task ("add new raw header %s: %s", rh->name, rh->value); } - g_queue_push_tail (order, rh); + LL_PREPEND2 (*order_ptr, rh, ord_next); if (check_special) { rspamd_mime_header_check_special (task, rh); @@ -162,8 +176,9 @@ rspamd_mime_header_add (struct rspamd_task *task, /* Convert raw headers to a list of struct raw_header * */ void -rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, - GQueue *order, +rspamd_mime_headers_process (struct rspamd_task *task, + khash_t(rspamd_mime_headers_htb) *target, + struct rspamd_mime_header **order_ptr, const gchar *in, gsize len, gboolean check_newlines) { @@ -205,7 +220,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, tmp = rspamd_mempool_alloc (task->task_pool, l + 1); rspamd_null_safe_copy (c, l, tmp, l + 1); nh->name = tmp; - nh->empty_separator = TRUE; + nh->flags |= RSPAMD_HEADER_EMPTY_SEPARATOR; nh->raw_value = c; nh->raw_len = p - c; /* Including trailing ':' */ p++; @@ -225,12 +240,12 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, case 2: /* We got header's name, so skip any \t or spaces */ if (*p == '\t') { - nh->tab_separated = TRUE; - nh->empty_separator = FALSE; + nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR; + nh->flags |= RSPAMD_HEADER_TAB_SEPARATED; p++; } else if (*p == ' ') { - nh->empty_separator = FALSE; + nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR; p++; } else if (*p == '\n' || *p == '\r') { @@ -377,7 +392,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, /* We also validate utf8 and replace all non-valid utf8 chars */ rspamd_mime_charset_utf_enforce (nh->decoded, strlen (nh->decoded)); nh->order = norder ++; - rspamd_mime_header_add (task, target, order, nh, check_newlines); + rspamd_mime_header_add (task, target, order_ptr, nh, check_newlines); nh = NULL; state = 0; break; @@ -387,7 +402,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, nh->decoded = ""; nh->raw_len = p - nh->raw_value; nh->order = norder ++; - rspamd_mime_header_add (task, target, order, nh, check_newlines); + rspamd_mime_header_add (task, target, order_ptr, nh, check_newlines); nh = NULL; state = 0; break; @@ -450,10 +465,12 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, } } + /* Since we have prepended headers, we need to reverse the list to get the actual order */ + LL_REVERSE (*order_ptr); + if (check_newlines) { guint max_cnt = 0; gint sel = 0; - GList *cur; rspamd_cryptobox_hash_state_t hs; guchar hout[rspamd_cryptobox_HASHBYTES], *hexout; @@ -464,19 +481,14 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, } } - task->nlines_type = sel; + MESSAGE_FIELD (task, nlines_type) = sel; - cur = order->head; rspamd_cryptobox_hash_init (&hs, NULL, 0); - while (cur) { - nh = cur->data; - - if (nh->name && nh->type != RSPAMD_HEADER_RECEIVED) { + LL_FOREACH (*order_ptr, nh) { + if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) { rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name)); } - - cur = g_list_next (cur); } rspamd_cryptobox_hash_final (&hs, hout); @@ -1287,7 +1299,7 @@ rspamd_smtp_received_process_rdns (struct rspamd_task *task, static gboolean rspamd_smtp_received_process_host_tcpinfo (struct rspamd_task *task, - struct received_header *rh, + struct rspamd_received_header *rh, const gchar *data, gsize len) { @@ -1373,7 +1385,7 @@ rspamd_smtp_received_process_host_tcpinfo (struct rspamd_task *task, static void rspamd_smtp_received_process_from (struct rspamd_task *task, struct rspamd_received_part *rpart, - struct received_header *rh) + struct rspamd_received_header *rh) { if (rpart->dlen > 0) { /* We have seen multiple cases: @@ -1457,7 +1469,7 @@ int rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t len, - struct received_header *rh) + struct rspamd_received_header *rh) { goffset date_pos = -1; struct rspamd_received_part *head, *cur; @@ -1469,7 +1481,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task, return -1; } - rh->type = RSPAMD_RECEIVED_UNKNOWN; + rh->flags = RSPAMD_RECEIVED_UNKNOWN; DL_FOREACH (head, cur) { switch (cur->type) { @@ -1490,7 +1502,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task, RSPAMD_FTOK_ASSIGN (&t2, "smtp"); if (rspamd_ftok_cmp (&t1, &t2) == 0) { - rh->type = RSPAMD_RECEIVED_SMTP; + rh->flags = RSPAMD_RECEIVED_SMTP; } RSPAMD_FTOK_ASSIGN (&t2, "esmtp"); @@ -1501,11 +1513,11 @@ rspamd_smtp_received_parse (struct rspamd_task *task, */ if (t1.len == t2.len + 1) { if (t1.begin[t2.len] == 'a') { - rh->type = RSPAMD_RECEIVED_ESMTPA; + rh->flags = RSPAMD_RECEIVED_ESMTPA; rh->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED; } else if (t1.begin[t2.len] == 's') { - rh->type = RSPAMD_RECEIVED_ESMTPS; + rh->flags = RSPAMD_RECEIVED_ESMTPS; rh->flags |= RSPAMD_RECEIVED_FLAG_SSL; } continue; @@ -1513,14 +1525,14 @@ rspamd_smtp_received_parse (struct rspamd_task *task, else if (t1.len == t2.len + 2) { if (t1.begin[t2.len] == 's' && t1.begin[t2.len + 1] == 'a') { - rh->type = RSPAMD_RECEIVED_ESMTPSA; + rh->flags = RSPAMD_RECEIVED_ESMTPSA; rh->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED; rh->flags |= RSPAMD_RECEIVED_FLAG_SSL; } continue; } else if (t1.len == t2.len) { - rh->type = RSPAMD_RECEIVED_ESMTP; + rh->flags = RSPAMD_RECEIVED_ESMTP; continue; } } @@ -1528,21 +1540,21 @@ rspamd_smtp_received_parse (struct rspamd_task *task, RSPAMD_FTOK_ASSIGN (&t2, "lmtp"); if (rspamd_ftok_cmp (&t1, &t2) == 0) { - rh->type = RSPAMD_RECEIVED_LMTP; + rh->flags = RSPAMD_RECEIVED_LMTP; continue; } RSPAMD_FTOK_ASSIGN (&t2, "imap"); if (rspamd_ftok_cmp (&t1, &t2) == 0) { - rh->type = RSPAMD_RECEIVED_IMAP; + rh->flags = RSPAMD_RECEIVED_IMAP; continue; } RSPAMD_FTOK_ASSIGN (&t2, "local"); if (rspamd_ftok_cmp (&t1, &t2) == 0) { - rh->type = RSPAMD_RECEIVED_LOCAL; + rh->flags = RSPAMD_RECEIVED_LOCAL; continue; } @@ -1551,12 +1563,12 @@ rspamd_smtp_received_parse (struct rspamd_task *task, if (rspamd_ftok_starts_with (&t1, &t2)) { if (t1.len == t2.len + 1) { if (t1.begin[t2.len] == 's') { - rh->type = RSPAMD_RECEIVED_HTTP; + rh->flags = RSPAMD_RECEIVED_HTTP; rh->flags |= RSPAMD_RECEIVED_FLAG_SSL; } } else if (t1.len == t2.len) { - rh->type = RSPAMD_RECEIVED_HTTP; + rh->flags = RSPAMD_RECEIVED_HTTP; } continue; @@ -1584,4 +1596,43 @@ rspamd_smtp_received_parse (struct rspamd_task *task, } return 0; +} + +struct rspamd_mime_header * +rspamd_message_get_header_from_hash (khash_t(rspamd_mime_headers_htb) *htb, + const gchar *field) +{ + khiter_t k; + + if (htb) { + k = kh_get (rspamd_mime_headers_htb, htb, (gchar *) field); + + if (k == kh_end (htb)) { + return NULL; + } + + return kh_value (htb, k); + } + + return NULL; +} + +struct rspamd_mime_header * +rspamd_message_get_header_array (struct rspamd_task *task, + const gchar *field) +{ + return rspamd_message_get_header_from_hash (MESSAGE_FIELD_CHECK (task, raw_headers), + field); +} + +void +rspamd_message_headers_destroy (khash_t(rspamd_mime_headers_htb) *htb) +{ + kh_destroy (rspamd_mime_headers_htb, htb); +} + +khash_t(rspamd_mime_headers_htb) * +rspamd_message_headers_new (void) +{ + return kh_init (rspamd_mime_headers_htb); }
\ No newline at end of file diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 5cb300978..f9aa555c5 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -19,6 +19,7 @@ #include "config.h" #include "libutil/mem_pool.h" #include "libutil/addr.h" +#include "khash.h" #ifdef __cplusplus extern "C" { @@ -31,54 +32,71 @@ enum rspamd_rfc2047_encoding { RSPAMD_RFC2047_BASE64, }; -enum rspamd_mime_header_special_type { - RSPAMD_HEADER_GENERIC = 0, - RSPAMD_HEADER_RECEIVED = 1 << 0, - RSPAMD_HEADER_TO = 1 << 2, - RSPAMD_HEADER_CC = 1 << 3, - RSPAMD_HEADER_BCC = 1 << 4, - RSPAMD_HEADER_FROM = 1 << 5, - RSPAMD_HEADER_MESSAGE_ID = 1 << 6, - RSPAMD_HEADER_SUBJECT = 1 << 7, - RSPAMD_HEADER_RETURN_PATH = 1 << 8, - RSPAMD_HEADER_DELIVERED_TO = 1 << 9, - RSPAMD_HEADER_SENDER = 1 << 10, - RSPAMD_HEADER_RCPT = 1 << 11, - RSPAMD_HEADER_UNIQUE = 1 << 12 +enum rspamd_mime_header_flags { + RSPAMD_HEADER_GENERIC = 0u, + RSPAMD_HEADER_RECEIVED = 1u << 0u, + RSPAMD_HEADER_TO = 1u << 2u, + RSPAMD_HEADER_CC = 1u << 3u, + RSPAMD_HEADER_BCC = 1u << 4u, + RSPAMD_HEADER_FROM = 1u << 5u, + RSPAMD_HEADER_MESSAGE_ID = 1u << 6u, + RSPAMD_HEADER_SUBJECT = 1u << 7u, + RSPAMD_HEADER_RETURN_PATH = 1u << 8u, + RSPAMD_HEADER_DELIVERED_TO = 1u << 9u, + RSPAMD_HEADER_SENDER = 1u << 10u, + RSPAMD_HEADER_RCPT = 1u << 11u, + RSPAMD_HEADER_UNIQUE = 1u << 12u, + RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u, + RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u, }; struct rspamd_mime_header { - gchar *name; - gchar *value; const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */ gsize raw_len; - gboolean tab_separated; - gboolean empty_separator; guint order; - enum rspamd_mime_header_special_type type; + int flags; /* see enum rspamd_mime_header_flags */ + /* These are zero terminated (historically) */ + gchar *name; /* Also used for key */ + gchar *value; gchar *separator; gchar *decoded; + struct rspamd_mime_header *prev, *next; /* Headers with the same name */ + struct rspamd_mime_header *ord_next; /* Overall order of headers, slist */ }; +/* Define hash type */ +__KHASH_TYPE (rspamd_mime_headers_htb, gchar *, struct rspamd_mime_header *) + enum rspamd_received_type { - RSPAMD_RECEIVED_SMTP = 0, - RSPAMD_RECEIVED_ESMTP, - RSPAMD_RECEIVED_ESMTPA, - RSPAMD_RECEIVED_ESMTPS, - RSPAMD_RECEIVED_ESMTPSA, - RSPAMD_RECEIVED_LMTP, - RSPAMD_RECEIVED_IMAP, - RSPAMD_RECEIVED_LOCAL, - RSPAMD_RECEIVED_HTTP, - RSPAMD_RECEIVED_MAPI, - RSPAMD_RECEIVED_UNKNOWN + RSPAMD_RECEIVED_SMTP = 1u << 0u, + RSPAMD_RECEIVED_ESMTP = 1u << 1u, + RSPAMD_RECEIVED_ESMTPA = 1u << 2u, + RSPAMD_RECEIVED_ESMTPS = 1u << 3u, + RSPAMD_RECEIVED_ESMTPSA = 1u << 4u, + RSPAMD_RECEIVED_LMTP = 1u << 5u, + RSPAMD_RECEIVED_IMAP = 1u << 6u, + RSPAMD_RECEIVED_LOCAL = 1u << 7u, + RSPAMD_RECEIVED_HTTP = 1u << 8u, + RSPAMD_RECEIVED_MAPI = 1u << 9u, + RSPAMD_RECEIVED_UNKNOWN = 1u << 10u, + RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 11u), + RSPAMD_RECEIVED_FLAG_SSL = (1u << 12u), + RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 13u), }; -#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0) -#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1) -#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2) +#define RSPAMD_RECEIVED_FLAG_TYPE_MASK (RSPAMD_RECEIVED_SMTP| \ + RSPAMD_RECEIVED_ESMTP| \ + RSPAMD_RECEIVED_ESMTPA| \ + RSPAMD_RECEIVED_ESMTPS| \ + RSPAMD_RECEIVED_ESMTPSA| \ + RSPAMD_RECEIVED_LMTP| \ + RSPAMD_RECEIVED_IMAP| \ + RSPAMD_RECEIVED_LOCAL| \ + RSPAMD_RECEIVED_HTTP| \ + RSPAMD_RECEIVED_MAPI| \ + RSPAMD_RECEIVED_UNKNOWN) -struct received_header { +struct rspamd_received_header { const gchar *from_hostname; const gchar *from_ip; const gchar *real_hostname; @@ -88,8 +106,8 @@ struct received_header { rspamd_inet_addr_t *addr; struct rspamd_mime_header *hdr; time_t timestamp; - enum rspamd_received_type type; - gint flags; + gint flags; /* See enum rspamd_received_type */ + struct rspamd_received_header *prev, *next; }; /** @@ -100,8 +118,9 @@ struct received_header { * @param len * @param check_newlines */ -void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, - GQueue *order, +void rspamd_mime_headers_process (struct rspamd_task *task, + khash_t(rspamd_mime_headers_htb) *target, + struct rspamd_mime_header **order_ptr, const gchar *in, gsize len, gboolean check_newlines); @@ -130,6 +149,38 @@ gchar *rspamd_mime_header_encode (const gchar *in, gsize len); */ gchar *rspamd_mime_message_id_generate (const gchar *fqdn); +/** + * Get an array of header's values with specified header's name using raw headers + * @param task worker task structure + * @param field header's name + * @return An array of header's values or NULL. It is NOT permitted to free array or values. + */ +struct rspamd_mime_header * +rspamd_message_get_header_array (struct rspamd_task *task, + const gchar *field); + +/** + * Get an array of header's values with specified header's name using raw headers + * @param htb hash table indexed by header name (caseless) with ptr arrays as elements + * @param field header's name + * @return An array of header's values or NULL. It is NOT permitted to free array or values. + */ +struct rspamd_mime_header * +rspamd_message_get_header_from_hash (khash_t(rspamd_mime_headers_htb) *htb, + const gchar *field); + +/** + * Cleans up hash table of the headers + * @param htb + */ +void rspamd_message_headers_destroy (khash_t(rspamd_mime_headers_htb) *htb); + +/** + * Init headers hash + * @return + */ +khash_t(rspamd_mime_headers_htb)* rspamd_message_headers_new (void); + #ifdef __cplusplus } #endif diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 6572f4e88..c075857b4 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -14,6 +14,7 @@ * limitations under the License. */ + #include "config.h" #include "task.h" #include "mime_parser.h" @@ -21,6 +22,7 @@ #include "message.h" #include "multipattern.h" #include "contrib/libottery/ottery.h" +#include "contrib/uthash/utlist.h" struct rspamd_mime_parser_lib_ctx { struct rspamd_multipattern *mp_boundary; @@ -256,21 +258,16 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, static void rspamd_mime_part_get_cte (struct rspamd_task *task, - GHashTable *hdrs, - struct rspamd_mime_part *part, - gboolean apply_heuristic) + khash_t(rspamd_mime_headers_htb) *hdrs, + struct rspamd_mime_part *part, + gboolean apply_heuristic) { - struct rspamd_mime_header *hdr; - guint i; - GPtrArray *hdrs_cte; + struct rspamd_mime_header *hdr, *cur; enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN; - hdrs_cte = rspamd_message_get_header_from_hash (hdrs, - task->task_pool, - "Content-Transfer-Encoding", FALSE); - - if (hdrs_cte == NULL) { + hdr = rspamd_message_get_header_from_hash (hdrs, "Content-Transfer-Encoding"); + if (hdr == NULL) { if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN && !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) { part->cte = part->parent_part->cte; @@ -287,12 +284,11 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, part->flags |= RSPAMD_MIME_PART_MISSING_CTE; } else { - for (i = 0; i < hdrs_cte->len; i ++) { + DL_FOREACH (hdr, cur) { gsize hlen; gchar lc_buf[128]; - hdr = g_ptr_array_index (hdrs_cte, i); - hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", hdr->value); + hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", cur->value); rspamd_str_lc (lc_buf, hlen); cte = rspamd_mime_parse_cte (lc_buf, hlen); @@ -337,19 +333,16 @@ check_cte: static void rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part) { - struct rspamd_mime_header *hdr; - guint i; - GPtrArray *hdrs; + struct rspamd_mime_header *hdr, *cur; struct rspamd_content_disposition *cd = NULL; rspamd_ftok_t srch; struct rspamd_content_type_param *found; - hdrs = rspamd_message_get_header_from_hash (part->raw_headers, - task->task_pool, - "Content-Disposition", FALSE); + hdr = rspamd_message_get_header_from_hash (part->raw_headers, + "Content-Disposition"); - if (hdrs == NULL) { + if (hdr == NULL) { cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd)); cd->type = RSPAMD_CT_INLINE; @@ -370,15 +363,13 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part } } else { - for (i = 0; i < hdrs->len; i ++) { + DL_FOREACH (hdr, cur) { gsize hlen; - - hdr = g_ptr_array_index (hdrs, i); cd = NULL; - if (hdr->decoded) { - hlen = strlen (hdr->decoded); - cd = rspamd_content_disposition_parse (hdr->decoded, hlen, + if (cur->decoded) { + hlen = strlen (cur->decoded); + cd = rspamd_content_disposition_parse (cur->decoded, hlen, task->task_pool); } @@ -517,8 +508,8 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, g_assert_not_reached (); } - part->id = task->parts->len; - g_ptr_array_add (task->parts, part); + part->id = MESSAGE_FIELD (task, parts)->len; + g_ptr_array_add (MESSAGE_FIELD (task, parts), part); msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte", &part->ct->type, &part->ct->subtype, part->parsed_data.len, part->raw_data.len, rspamd_cte_to_string (part->cte)); @@ -546,12 +537,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, GError **err) { struct rspamd_content_type *ct, *sel = NULL; - struct rspamd_mime_header *hdr; - GPtrArray *hdrs = NULL; + struct rspamd_mime_header *hdr = NULL, *cur; struct rspamd_mime_part *npart; GString str; goffset hdr_pos, body_pos; - guint i; enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL; @@ -592,9 +581,8 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_mime_part)); npart->parent_part = multipart; - npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); - npart->headers_order = g_queue_new (); + npart->raw_headers = rspamd_message_headers_new (); + npart->headers_order = NULL; if (multipart) { if (multipart->specific.mp->children == NULL) { @@ -612,15 +600,14 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, if (npart->raw_headers_len > 0) { rspamd_mime_headers_process (task, npart->raw_headers, - npart->headers_order, + &npart->headers_order, npart->raw_headers_str, npart->raw_headers_len, FALSE); } - hdrs = rspamd_message_get_header_from_hash (npart->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash (npart->raw_headers, + "Content-Type"); } else { @@ -631,11 +618,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, } - if (hdrs != NULL) { + if (hdr != NULL) { - for (i = 0; i < hdrs->len; i ++) { - hdr = g_ptr_array_index (hdrs, i); - ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded), + DL_FOREACH (hdr, cur) { + ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded), task->task_pool); /* Here we prefer multipart content-type or any content-type */ @@ -848,8 +834,8 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, return RSPAMD_MIME_PARSE_NESTING; } - part->id = task->parts->len; - g_ptr_array_add (task->parts, part); + part->id = MESSAGE_FIELD (task, parts)->len; + g_ptr_array_add (MESSAGE_FIELD (task, parts), part); st->nesting ++; rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE); @@ -1098,8 +1084,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, GError **err) { struct rspamd_content_type *ct, *sel = NULL; - struct rspamd_mime_header *hdr; - GPtrArray *hdrs = NULL; + struct rspamd_mime_header *hdr = NULL, *cur; const gchar *pbegin, *p; gsize plen, len; struct rspamd_mime_part *npart; @@ -1159,42 +1144,45 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (hdr_pos > 0 && hdr_pos < str.len) { - task->raw_headers_content.begin = str.str; - task->raw_headers_content.len = hdr_pos; - task->raw_headers_content.body_start = str.str + body_pos; + MESSAGE_FIELD (task, raw_headers_content).begin = str.str; + MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + body_pos; - if (task->raw_headers_content.len > 0) { - rspamd_mime_headers_process (task, task->raw_headers, - task->headers_order, - task->raw_headers_content.begin, - task->raw_headers_content.len, + if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { + rspamd_mime_headers_process (task, + MESSAGE_FIELD (task, raw_headers), + &MESSAGE_FIELD (task, headers_order), + MESSAGE_FIELD (task, raw_headers_content).begin, + MESSAGE_FIELD (task, raw_headers_content).len, TRUE); } - hdrs = rspamd_message_get_header_from_hash (task->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash ( + MESSAGE_FIELD (task, raw_headers), + "Content-Type"); } else { /* First apply heuristic, maybe we have just headers */ hdr_pos = rspamd_mime_parser_headers_heuristic (&str, &body_pos); if (hdr_pos > 0 && hdr_pos <= str.len) { - task->raw_headers_content.begin = str.str; - task->raw_headers_content.len = hdr_pos; - task->raw_headers_content.body_start = str.str + body_pos; - - if (task->raw_headers_content.len > 0) { - rspamd_mime_headers_process (task, task->raw_headers, - task->headers_order, - task->raw_headers_content.begin, - task->raw_headers_content.len, + MESSAGE_FIELD (task, raw_headers_content).begin = str.str; + MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + + body_pos; + + if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { + rspamd_mime_headers_process (task, + MESSAGE_FIELD (task, raw_headers), + &MESSAGE_FIELD (task, headers_order), + MESSAGE_FIELD (task, raw_headers_content).begin, + MESSAGE_FIELD (task, raw_headers_content).len, TRUE); } - hdrs = rspamd_message_get_header_from_hash (task->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash ( + MESSAGE_FIELD (task, raw_headers), + "Content-Type"); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } else { @@ -1204,7 +1192,8 @@ rspamd_mime_parse_message (struct rspamd_task *task, pbegin = st->start + body_pos; plen = st->end - pbegin; - npart->raw_headers = g_hash_table_ref (task->raw_headers); + /* TODO: check if it is correct */ + npart->raw_headers = NULL; npart->headers_order = NULL; } else { @@ -1227,9 +1216,8 @@ rspamd_mime_parse_message (struct rspamd_task *task, str.len = part->parsed_data.len; hdr_pos = rspamd_string_find_eoh (&str, &body_pos); - npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); - npart->headers_order = g_queue_new (); + npart->raw_headers = rspamd_message_headers_new (); + npart->headers_order = NULL; if (hdr_pos > 0 && hdr_pos < str.len) { npart->raw_headers_str = str.str; @@ -1237,16 +1225,16 @@ rspamd_mime_parse_message (struct rspamd_task *task, npart->raw_data.begin = str.str + body_pos; if (npart->raw_headers_len > 0) { - rspamd_mime_headers_process (task, npart->raw_headers, - npart->headers_order, + rspamd_mime_headers_process (task, + npart->raw_headers, + &npart->headers_order, npart->raw_headers_str, npart->raw_headers_len, FALSE); } - hdrs = rspamd_message_get_header_from_hash (npart->raw_headers, - task->task_pool, - "Content-Type", FALSE); + hdr = rspamd_message_get_header_from_hash (npart->raw_headers, + "Content-Type"); } else { body_pos = 0; @@ -1260,13 +1248,12 @@ rspamd_mime_parse_message (struct rspamd_task *task, npart->raw_data.len = plen; npart->parent_part = part; - if (hdrs == NULL) { + if (hdr == NULL) { sel = NULL; } else { - for (i = 0; i < hdrs->len; i ++) { - hdr = g_ptr_array_index (hdrs, i); - ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded), + DL_FOREACH (hdr, cur) { + ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded), task->task_pool); /* Here we prefer multipart content-type or any content-type */ @@ -1408,7 +1395,7 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err) st = g_malloc0 (sizeof (*st)); st->stack = g_ptr_array_sized_new (4); - st->pos = task->raw_headers_content.body_start; + st->pos = MESSAGE_FIELD (task, raw_headers_content).body_start; st->end = task->msg.begin + task->msg.len; st->boundaries = g_array_sized_new (FALSE, FALSE, sizeof (struct rspamd_mime_boundary), 8); diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index b4fb825b4..3a52f1dd6 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -29,7 +29,7 @@ extern "C" { int rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t len, - struct received_header *rh); + struct rspamd_received_header *rh); int rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr); diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c index 9386c5cdc..69226e595 100644 --- a/src/libserver/dkim.c +++ b/src/libserver/dkim.c @@ -2165,23 +2165,34 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, const gchar *dkim_header, const gchar *dkim_domain) { - struct rspamd_mime_header *rh; - gint rh_num = 0; - GPtrArray *ar; + struct rspamd_mime_header *rh, *cur, *sel = NULL; + gint hdr_cnt = 0; if (dkim_header == NULL) { - ar = g_hash_table_lookup (task->raw_headers, header_name); + rh = rspamd_message_get_header_array (task, header_name); - if (ar) { - /* Check uniqueness of the header */ - rh = g_ptr_array_index (ar, 0); - if ((rh->type & RSPAMD_HEADER_UNIQUE) && ar->len > 1) { + if (rh) { + /* Check uniqueness of the header but we count from the bottom to top */ + for (cur = rh->prev; ; cur = cur->prev) { + if (hdr_cnt == count) { + sel = cur; + } + + hdr_cnt ++; + + if (cur->next == NULL) { + /* Cycle */ + break; + } + } + + if ((rh->flags & RSPAMD_HEADER_UNIQUE) && hdr_cnt > 1) { guint64 random_cookie = ottery_rand_uint64 (); msg_warn_dkim ("header %s is intended to be unique by" " email standards, but we have %d headers of this" " type, artificially break DKIM check", header_name, - ar->len); + hdr_cnt); rspamd_dkim_hash_update (ctx->headers_hash, (const gchar *)&random_cookie, sizeof (random_cookie)); @@ -2189,11 +2200,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, return FALSE; } - if (ar->len > count) { - /* Set skip count */ - rh_num = ar->len - count - 1; - } - else { + if (hdr_cnt <= count) { /* * If DKIM has less headers requested than there are in a * message, then it's fine, it allows adding extra headers @@ -2201,22 +2208,23 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, return TRUE; } - rh = g_ptr_array_index (ar, rh_num); + /* Selected header must be non-null if previous condition is false */ + g_assert (sel != NULL); if (ctx->header_canon_type == DKIM_CANON_SIMPLE) { - rspamd_dkim_hash_update (ctx->headers_hash, rh->raw_value, - rh->raw_len); + rspamd_dkim_hash_update (ctx->headers_hash, sel->raw_value, + sel->raw_len); msg_debug_dkim ("update signature with header: %*s", - (gint)rh->raw_len, rh->raw_value); + (gint)sel->raw_len, sel->raw_value); } else { - if (ctx->is_sign && (rh->type & RSPAMD_HEADER_FROM)) { + if (ctx->is_sign && (sel->flags & RSPAMD_HEADER_FROM)) { /* Special handling of the From handling when rewrite is done */ gboolean has_rewrite = FALSE; guint i; struct rspamd_email_address *addr; - PTR_ARRAY_FOREACH (task->from_mime, i, addr) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, from_mime), i, addr) { if ((addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL) && !(addr->flags & RSPAMD_EMAIL_ADDR_ALIASED)) { has_rewrite = TRUE; @@ -2224,7 +2232,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, } if (has_rewrite) { - PTR_ARRAY_FOREACH (task->from_mime, i, addr) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, from_mime), i, addr) { if (!(addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL)) { if (!rspamd_dkim_canonize_header_relaxed (ctx, addr->raw, header_name, FALSE)) { @@ -2237,7 +2245,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, } } - if (!rspamd_dkim_canonize_header_relaxed (ctx, rh->value, + if (!rspamd_dkim_canonize_header_relaxed (ctx, sel->value, header_name, FALSE)) { return FALSE; } @@ -2248,17 +2256,15 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx, /* For signature check just use the saved dkim header */ if (ctx->header_canon_type == DKIM_CANON_SIMPLE) { /* We need to find our own signature and use it */ - guint i; + rh = rspamd_message_get_header_array (task, header_name); - ar = g_hash_table_lookup (task->raw_headers, header_name); - - if (ar) { + if (rh) { /* We need to find our own signature */ if (!dkim_domain) { return FALSE; } - PTR_ARRAY_FOREACH (ar, i, rh) { + DL_FOREACH (rh, cur) { guint64 th = rspamd_cryptobox_fast_hash (rh->decoded, strlen (rh->decoded), rspamd_hash_seed ()); @@ -2348,7 +2354,8 @@ rspamd_dkim_check (rspamd_dkim_context_t *ctx, /* First of all find place of body */ body_end = task->msg.begin + task->msg.len; - body_start = task->raw_headers_content.body_start; + + body_start = MESSAGE_FIELD (task, raw_headers_content).body_start; res = rspamd_mempool_alloc0 (task->task_pool, sizeof (*res)); res->ctx = ctx; @@ -2948,7 +2955,7 @@ rspamd_dkim_sign (struct rspamd_task *task, const gchar *selector, /* First of all find place of body */ body_end = task->msg.begin + task->msg.len; - body_start = task->raw_headers_content.body_start; + body_start = MESSAGE_FIELD (task, raw_headers_content).body_start; if (len > 0) { ctx->common.len = len; @@ -3020,6 +3027,8 @@ rspamd_dkim_sign (struct rspamd_task *task, const gchar *selector, /* Now canonize headers */ for (i = 0; i < ctx->common.hlist->len; i++) { + struct rspamd_mime_header *rh, *cur; + dh = g_ptr_array_index (ctx->common.hlist, i); /* We allow oversigning if dh->count > number of headers with this name */ @@ -3027,25 +3036,25 @@ rspamd_dkim_sign (struct rspamd_task *task, const gchar *selector, if (hstat.s.flags & RSPAMD_DKIM_FLAG_OVERSIGN) { /* Do oversigning */ - GPtrArray *ar; guint count = 0; - ar = g_hash_table_lookup (task->raw_headers, dh->name); - - if (ar) { - count = ar->len; - } + rh = rspamd_message_get_header_array (task, dh->name); - for (j = 0; j < count; j ++) { - /* Sign all existing headers */ - rspamd_dkim_canonize_header (&ctx->common, task, dh->name, j, - NULL, NULL); + if (rh) { + DL_FOREACH (rh, cur) { + /* Sign all existing headers */ + rspamd_dkim_canonize_header (&ctx->common, task, dh->name, + count, + NULL, NULL); + count++; + } } /* Now add one more entry to oversign */ if (count > 0 || !(hstat.s.flags & RSPAMD_DKIM_FLAG_OVERSIGN_EXISTING)) { cur_len = (strlen (dh->name) + 1) * (count + 1); headers_len += cur_len; + if (headers_len > 70 && i > 0 && i < ctx->common.hlist->len - 1) { rspamd_printf_gstring (hdr, " "); headers_len = cur_len; @@ -3057,7 +3066,9 @@ rspamd_dkim_sign (struct rspamd_task *task, const gchar *selector, } } else { - if (g_hash_table_lookup (task->raw_headers, dh->name)) { + rh = rspamd_message_get_header_array (task, dh->name); + + if (rh) { if (hstat.s.count > 0) { cur_len = (strlen (dh->name) + 1) * (hstat.s.count); @@ -3166,7 +3177,7 @@ rspamd_dkim_sign (struct rspamd_task *task, const gchar *selector, } else { b64_data = rspamd_encode_base64_fold (sig_buf, sig_len, 70, NULL, - task->nlines_type); + MESSAGE_FIELD (task, nlines_type)); } rspamd_printf_gstring (hdr, "%s", b64_data); diff --git a/src/libserver/dns.c b/src/libserver/dns.c index 5277e2f6c..3b8770a7b 100644 --- a/src/libserver/dns.c +++ b/src/libserver/dns.c @@ -206,8 +206,8 @@ make_dns_request_task_common (struct rspamd_task *task, } if (!forced && task->dns_requests >= task->cfg->dns_max_requests) { - msg_info_task ("<%s> stop resolving on reaching %ud requests", - task->message_id, task->dns_requests); + msg_info_task ("stop resolving on reaching %ud requests", + task->dns_requests); } return TRUE; diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index bef7a0452..8834529ee 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -478,10 +478,6 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, break; case 's': case 'S': - IF_HEADER (SUBJECT_HEADER) { - msg_debug_protocol ("read subject header, value: %V", hv); - task->subject = rspamd_mempool_ftokdup (task->task_pool, hv_tok); - } IF_HEADER (SETTINGS_ID_HEADER) { msg_debug_protocol ("read settings-id header, value: %V", hv); task->settings_elt = rspamd_config_find_settings_name_ref ( @@ -862,7 +858,7 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud) } msg_notice_task_encrypted ("<%s> %s: %*s; ip: %s; URL: %*s", - task->message_id, + MESSAGE_FIELD_CHECK (task, message_id), has_user ? "user" : "from", len, user_field, rspamd_inet_address_to_string (task->from_addr), @@ -939,7 +935,7 @@ rspamd_protocol_rewrite_subject (struct rspamd_task *task) } p = c; - s = task->subject; + s = MESSAGE_FIELD_CHECK (task, subject); if (s) { slen = strlen (s); @@ -1112,7 +1108,7 @@ rspamd_metric_result_ucl (struct rspamd_task *task, sobj = rspamd_metric_symbol_ucl (task, sym); ucl_object_insert_key (obj, sobj, sym->name, 0, false); } - }); + }) if (task->cmd == CMD_CHECK_V2) { ucl_object_insert_key (top, obj, "symbols", 0, false); @@ -1289,16 +1285,16 @@ rspamd_protocol_write_ucl (struct rspamd_task *task, } } - if (flags & RSPAMD_PROTOCOL_URLS) { - if (g_hash_table_size (task->urls) > 0) { + if (flags & RSPAMD_PROTOCOL_URLS && task->message) { + if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) { ucl_object_insert_key (top, - rspamd_urls_tree_ucl (task->urls, task), + rspamd_urls_tree_ucl (MESSAGE_FIELD (task, urls), task), "urls", 0, false); } - if (g_hash_table_size (task->emails) > 0) { + if (g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) { ucl_object_insert_key (top, - rspamd_emails_tree_ucl (task->emails, task), + rspamd_emails_tree_ucl (MESSAGE_FIELD (task, emails), task), "emails", 0, false); } } @@ -1310,7 +1306,8 @@ rspamd_protocol_write_ucl (struct rspamd_task *task, } if (flags & RSPAMD_PROTOCOL_BASIC) { - ucl_object_insert_key (top, ucl_object_fromstring (task->message_id), + ucl_object_insert_key (top, + ucl_object_fromstring (MESSAGE_FIELD_CHECK (task, message_id)), "message-id", 0, false); ucl_object_insert_key (top, ucl_object_fromdouble (task->time_real_finish - task->task_timestamp), @@ -1334,13 +1331,15 @@ rspamd_protocol_write_ucl (struct rspamd_task *task, GString *folded_header; dkim_sig = (GString *) dkim_sigs->data; - if (task->flags & RSPAMD_TASK_FLAG_MILTER) { + if (task->flags & RSPAMD_TASK_FLAG_MILTER || !task->message) { folded_header = rspamd_header_value_fold ("DKIM-Signature", dkim_sig->str, 80, RSPAMD_TASK_NEWLINES_LF, NULL); } else { folded_header = rspamd_header_value_fold ("DKIM-Signature", - dkim_sig->str, 80, task->nlines_type, NULL); + dkim_sig->str, 80, + MESSAGE_FIELD (task, nlines_type), + NULL); } ucl_array_append (ar, @@ -1365,7 +1364,8 @@ rspamd_protocol_write_ucl (struct rspamd_task *task, } else { folded_header = rspamd_header_value_fold ("DKIM-Signature", - dkim_sig->str, 80, task->nlines_type, NULL); + dkim_sig->str, 80, MESSAGE_FIELD (task, nlines_type), + NULL); } ucl_object_insert_key (top, @@ -1401,14 +1401,17 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, struct rspamd_task *task, ucl_object_t **pobj) { struct rspamd_metric_result *metric_res; - GHashTableIter hiter; const struct rspamd_re_cache_stat *restat; - gpointer h, v; + ucl_object_t *top = NULL; rspamd_fstring_t *reply; gint flags = RSPAMD_PROTOCOL_DEFAULT; struct rspamd_action *action; + /* Removed in 2.0 */ +#if 0 + GHashTableIter hiter; + gpointer h, v; /* Write custom headers */ g_hash_table_iter_init (&hiter, task->reply_headers); while (g_hash_table_iter_next (&hiter, &h, &v)) { @@ -1416,6 +1419,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, rspamd_http_message_add_header (msg, hn->begin, hv->begin); } +#endif flags |= RSPAMD_PROTOCOL_URLS; @@ -1787,7 +1791,8 @@ rspamd_protocol_write_reply (struct rspamd_task *task, ev_tstamp timeout) msg = rspamd_http_new_message (HTTP_RESPONSE); if (rspamd_http_connection_is_encrypted (task->http_conn)) { - msg_info_protocol ("<%s> writing encrypted reply", task->message_id); + msg_info_protocol ("<%s> writing encrypted reply", + MESSAGE_FIELD_CHECK (task, message_id)); } if (!RSPAMD_TASK_IS_JSON (task)) { diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 5517ad875..592cc31d4 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -24,6 +24,7 @@ #include "libutil/regexp.h" #include "lua/lua_common.h" #include "libstat/stat_api.h" +#include "contrib/uthash/utlist.h" #include "khash.h" @@ -950,6 +951,72 @@ rspamd_process_words_vector (GArray *words, return cnt; } +static guint +rspamd_re_cache_process_headers_list (struct rspamd_task *task, + struct rspamd_re_runtime *rt, + rspamd_regexp_t *re, + struct rspamd_re_class *re_class, + struct rspamd_mime_header *rh, + gboolean is_strong) +{ + const guchar **scvec, *in; + gboolean raw = FALSE; + guint *lenvec; + struct rspamd_mime_header *cur; + guint cnt = 0, i = 0, ret = 0; + + DL_COUNT (rh, cur, cnt); + + scvec = g_malloc (sizeof (*scvec) * cnt); + lenvec = g_malloc (sizeof (*lenvec) * cnt); + + DL_FOREACH (rh, cur) { + + if (is_strong && strcmp (cur->name, re_class->type_data) != 0) { + /* Skip a different case */ + continue; + } + + if (re_class->type == RSPAMD_RE_RAWHEADER) { + in = (const guchar *)cur->value; + lenvec[i] = strlen (cur->value); + + if (!g_utf8_validate (in, lenvec[i], NULL)) { + raw = TRUE; + } + } + else { + in = (const guchar *)cur->decoded; + /* Validate input^W^WNo need to validate as it is already valid */ + if (!in) { + lenvec[i] = 0; + scvec[i] = (guchar *)""; + continue; + } + + lenvec[i] = strlen (in); + } + + scvec[i] = in; + + i ++; + } + + if (i > 0) { + ret = rspamd_re_cache_process_regexp_data (rt, re, + task, scvec, lenvec, i, raw); + msg_debug_re_task ("checking header %s regexp: %s=%*s -> %d", + re_class->type_data, + rspamd_regexp_get_pattern (re), + (int) lenvec[0], scvec[0], ret); + } + + g_free (scvec); + g_free (lenvec); + + return ret; +} + /* * Calculates the specified regexp for the specified class if it's not calculated */ @@ -961,14 +1028,14 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, gboolean is_strong) { guint ret = 0, i, re_id; - GPtrArray *headerlist; GHashTableIter it; struct rspamd_mime_header *rh; - const gchar *in, *end; + const gchar *in; const guchar **scvec; guint *lenvec; gboolean raw = FALSE; - struct rspamd_mime_text_part *part; + struct rspamd_mime_text_part *text_part; + struct rspamd_mime_part *mime_part; struct rspamd_url *url; gpointer k, v; guint len, cnt; @@ -982,140 +1049,70 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, case RSPAMD_RE_HEADER: case RSPAMD_RE_RAWHEADER: /* Get list of specified headers */ - headerlist = rspamd_message_get_header_array (task, - re_class->type_data, - is_strong); + rh = rspamd_message_get_header_array (task, + re_class->type_data); - if (headerlist && headerlist->len > 0) { - scvec = g_malloc (sizeof (*scvec) * headerlist->len); - lenvec = g_malloc (sizeof (*lenvec) * headerlist->len); - - for (i = 0; i < headerlist->len; i ++) { - rh = g_ptr_array_index (headerlist, i); - - if (re_class->type == RSPAMD_RE_RAWHEADER) { - in = rh->value; - lenvec[i] = strlen (rh->value); - - if (!g_utf8_validate (in, lenvec[i], NULL)) { - raw = TRUE; - } - } - else { - in = rh->decoded; - /* Validate input */ - if (!in || !g_utf8_validate (in, -1, &end)) { - lenvec[i] = 0; - scvec[i] = (guchar *)""; - continue; - } - lenvec[i] = end - in; - } - - scvec[i] = (guchar *)in; - } - - ret = rspamd_re_cache_process_regexp_data (rt, re, - task, scvec, lenvec, headerlist->len, raw); - msg_debug_re_task ("checking header %s regexp: %s=%*s -> %d", - re_class->type_data, - rspamd_regexp_get_pattern (re), - (int)lenvec[0], scvec[0], ret); - g_free (scvec); - g_free (lenvec); + if (rh) { + ret = rspamd_re_cache_process_headers_list (task, rt, re, + re_class, rh, is_strong); } break; case RSPAMD_RE_ALLHEADER: raw = TRUE; - in = task->raw_headers_content.begin; - len = task->raw_headers_content.len; + in = MESSAGE_FIELD (task, raw_headers_content).begin; + len = MESSAGE_FIELD (task, raw_headers_content).len; ret = rspamd_re_cache_process_regexp_data (rt, re, task, (const guchar **)&in, &len, 1, raw); msg_debug_re_task ("checking allheader regexp: %s -> %d", rspamd_regexp_get_pattern (re), ret); break; case RSPAMD_RE_MIMEHEADER: - headerlist = rspamd_message_get_mime_header_array (task, - re_class->type_data, - is_strong); - - if (headerlist && headerlist->len > 0) { - scvec = g_malloc (sizeof (*scvec) * headerlist->len); - lenvec = g_malloc (sizeof (*lenvec) * headerlist->len); - - for (i = 0; i < headerlist->len; i ++) { - rh = g_ptr_array_index (headerlist, i); - - if (re_class->type == RSPAMD_RE_RAWHEADER) { - in = rh->value; - lenvec[i] = strlen (rh->value); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, mime_part) { + rh = rspamd_message_get_header_from_hash (mime_part->raw_headers, + re_class->type_data); - if (!g_utf8_validate (in, lenvec[i], NULL)) { - raw = TRUE; - } - } - else { - in = rh->decoded; - /* Validate input */ - if (!in || !g_utf8_validate (in, -1, &end)) { - lenvec[i] = 0; - scvec[i] = (guchar *)""; - continue; - } - - lenvec[i] = end - in; - } - - scvec[i] = (guchar *)in; + if (rh) { + ret += rspamd_re_cache_process_headers_list (task, rt, re, + re_class, rh, is_strong); } - - ret = rspamd_re_cache_process_regexp_data (rt, re, - task, scvec, lenvec, headerlist->len, raw); - msg_debug_re_task ("checking mime header %s regexp: %s -> %d", - re_class->type_data, - rspamd_regexp_get_pattern (re), ret); - g_free (scvec); - g_free (lenvec); } break; case RSPAMD_RE_MIME: case RSPAMD_RE_RAWMIME: /* Iterate through text parts */ - if (task->text_parts->len > 0) { - cnt = task->text_parts->len; + if (MESSAGE_FIELD (task, text_parts)->len > 0) { + cnt = MESSAGE_FIELD (task, text_parts)->len; scvec = g_malloc (sizeof (*scvec) * cnt); lenvec = g_malloc (sizeof (*lenvec) * cnt); - for (i = 0; i < task->text_parts->len; i++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) { /* Select data for regexp */ if (re_class->type == RSPAMD_RE_RAWMIME) { - if (part->raw.len == 0) { + if (text_part->raw.len == 0) { len = 0; in = ""; } else { - in = part->raw.begin; - len = part->raw.len; + in = text_part->raw.begin; + len = text_part->raw.len; } raw = TRUE; } else { /* Skip empty parts */ - if (IS_PART_EMPTY (part)) { + if (IS_PART_EMPTY (text_part)) { len = 0; in = ""; } else { /* Check raw flags */ - if (!IS_PART_UTF (part)) { + if (!IS_PART_UTF (text_part)) { raw = TRUE; } - in = part->utf_content->data; - len = part->utf_content->len; + in = text_part->utf_content->data; + len = text_part->utf_content->len; } } @@ -1132,12 +1129,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, } break; case RSPAMD_RE_URL: - cnt = g_hash_table_size (task->urls) + g_hash_table_size (task->emails); + cnt = g_hash_table_size (MESSAGE_FIELD (task, urls)) + + g_hash_table_size (MESSAGE_FIELD (task, emails)); if (cnt > 0) { scvec = g_malloc (sizeof (*scvec) * cnt); lenvec = g_malloc (sizeof (*lenvec) * cnt); - g_hash_table_iter_init (&it, task->urls); + g_hash_table_iter_init (&it, MESSAGE_FIELD (task, urls)); i = 0; while (g_hash_table_iter_next (&it, &k, &v)) { @@ -1150,7 +1148,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, lenvec[i++] = len; } - g_hash_table_iter_init (&it, task->emails); + g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails)); while (g_hash_table_iter_next (&it, &k, &v)) { url = v; @@ -1191,7 +1189,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, * paragraph when running the rules. All HTML tags and line breaks will * be removed before matching. */ - cnt = task->text_parts->len + 1; + cnt = MESSAGE_FIELD (task, text_parts)->len + 1; scvec = g_malloc (sizeof (*scvec) * cnt); lenvec = g_malloc (sizeof (*lenvec) * cnt); @@ -1200,11 +1198,9 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, * of the body content. */ - headerlist = rspamd_message_get_header_array (task, "Subject", FALSE); - - if (headerlist && headerlist->len > 0) { - rh = g_ptr_array_index (headerlist, 0); + rh = rspamd_message_get_header_array (task, "Subject"); + if (rh) { scvec[0] = (guchar *)rh->decoded; lenvec[0] = strlen (rh->decoded); } @@ -1212,14 +1208,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, scvec[0] = (guchar *)""; lenvec[0] = 0; } - for (i = 0; i < task->text_parts->len; i++) { - part = g_ptr_array_index (task->text_parts, i); - if (part->utf_stripped_content) { - scvec[i + 1] = (guchar *)part->utf_stripped_content->data; - lenvec[i + 1] = part->utf_stripped_content->len; + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) { + if (text_part->utf_stripped_content) { + scvec[i + 1] = (guchar *)text_part->utf_stripped_content->data; + lenvec[i + 1] = text_part->utf_stripped_content->len; - if (!IS_PART_UTF (part)) { + if (!IS_PART_UTF (text_part)) { raw = TRUE; } } @@ -1244,19 +1239,19 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, * Multiline expressions will need to be used to match strings that are * broken by line breaks. */ - if (task->text_parts->len > 0) { - cnt = task->text_parts->len; + if (MESSAGE_FIELD (task, text_parts)->len > 0) { + cnt = MESSAGE_FIELD (task, text_parts)->len; scvec = g_malloc (sizeof (*scvec) * cnt); lenvec = g_malloc (sizeof (*lenvec) * cnt); - for (i = 0; i < task->text_parts->len; i++) { - part = g_ptr_array_index (task->text_parts, i); + for (i = 0; i < cnt; i++) { + text_part = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), i); - if (part->parsed.len > 0) { - scvec[i] = (guchar *)part->parsed.begin; - lenvec[i] = part->parsed.len; + if (text_part->parsed.len > 0) { + scvec[i] = (guchar *)text_part->parsed.begin; + lenvec[i] = text_part->parsed.len; - if (!IS_PART_UTF (part)) { + if (!IS_PART_UTF (text_part)) { raw = TRUE; } } @@ -1277,13 +1272,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, case RSPAMD_RE_WORDS: case RSPAMD_RE_STEMWORDS: case RSPAMD_RE_RAWWORDS: - if (task->text_parts->len > 0) { + if (MESSAGE_FIELD (task, text_parts)->len > 0) { cnt = 0; raw = FALSE; - PTR_ARRAY_FOREACH (task->text_parts, i, part) { - if (part->utf_words) { - cnt += part->utf_words->len; + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) { + if (text_part->utf_words) { + cnt += text_part->utf_words->len; } } @@ -1297,9 +1292,9 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, cnt = 0; - PTR_ARRAY_FOREACH (task->text_parts, i, part) { - if (part->utf_words) { - cnt = rspamd_process_words_vector (part->utf_words, + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) { + if (text_part->utf_words) { + cnt = rspamd_process_words_vector (text_part->utf_words, scvec, lenvec, re_class, cnt, &raw); } } @@ -1522,6 +1517,7 @@ rspamd_re_cache_type_to_string (enum rspamd_re_type type) ret = "stem_words"; break; case RSPAMD_RE_MAX: + default: ret = "invalid class"; break; } diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c index c70246383..ceed8d04f 100644 --- a/src/libserver/roll_history.c +++ b/src/libserver/roll_history.c @@ -15,6 +15,7 @@ */ #include "config.h" #include "rspamd.h" +#include "libmime/message.h" #include "lua/lua_common.h" #include "unix-std.h" #include "cfg_file_private.h" @@ -139,8 +140,10 @@ rspamd_roll_history_update (struct roll_history *history, row->timestamp = task->task_timestamp; /* Strings */ - rspamd_strlcpy (row->message_id, task->message_id, - sizeof (row->message_id)); + if (task->message) { + rspamd_strlcpy (row->message_id, MESSAGE_FIELD (task, message_id), + sizeof (row->message_id)); + } if (task->user) { rspamd_strlcpy (row->user, task->user, sizeof (row->user)); } diff --git a/src/libserver/rspamd_symcache.c b/src/libserver/rspamd_symcache.c index 3de339654..2acff7e6d 100644 --- a/src/libserver/rspamd_symcache.c +++ b/src/libserver/rspamd_symcache.c @@ -1808,7 +1808,7 @@ rspamd_symcache_process_settings (struct rspamd_task *task, wl = ucl_object_lookup (task->settings, "whitelist"); if (wl != NULL) { - msg_info_task ("<%s> is whitelisted", task->message_id); + msg_info_task ("task is whitelisted"); task->flags |= RSPAMD_TASK_FLAG_SKIP; return TRUE; } @@ -2015,9 +2015,9 @@ rspamd_symcache_process_symbols (struct rspamd_task *task, if (!(item->type & SYMBOL_TYPE_FINE)) { if (rspamd_symcache_metric_limit (task, checkpoint)) { - msg_info_task ("<%s> has already scored more than %.2f, so do " + msg_info_task ("task has already scored more than %.2f, so do " "not " - "plan more checks", task->message_id, + "plan more checks", checkpoint->rs->score); all_done = TRUE; break; diff --git a/src/libserver/spf.c b/src/libserver/spf.c index 70db4adbd..220e7adfa 100644 --- a/src/libserver/spf.c +++ b/src/libserver/spf.c @@ -121,8 +121,8 @@ struct spf_dns_cb { do { \ if ((rec)->nested > SPF_MAX_NESTING || \ (rec)->dns_requests > SPF_MAX_DNS_REQUESTS) { \ - msg_info_spf ("<%s> spf recursion limit %d is reached, domain: %s", \ - (rec)->task->message_id, (rec)->dns_requests, \ + msg_info_spf ("spf recursion limit %d is reached, domain: %s", \ + (rec)->dns_requests, \ (rec)->sender_domain); \ return FALSE; \ } \ @@ -823,9 +823,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL; msg_debug_spf ( - "<%s>: spf error for domain %s: cannot find MX" + "spf error for domain %s: cannot find MX" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -836,9 +835,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL; msg_debug_spf ( - "<%s>: spf error for domain %s: cannot resolve A" + "spf error for domain %s: cannot resolve A" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -852,9 +850,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL; msg_debug_spf ( - "<%s>: spf error for domain %s: cannot resolve AAAA" + "spf error for domain %s: cannot resolve AAAA" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -866,9 +863,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) case SPF_RESOLVE_PTR: if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { msg_debug_spf ( - "<%s>: spf error for domain %s: cannot resolve PTR" + "spf error for domain %s: cannot resolve PTR" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -881,9 +877,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { cb->addr->flags |= RSPAMD_SPF_FLAG_PERMFAIL; msg_debug_spf ( - "<%s>: spf error for domain %s: cannot resolve REDIRECT" + "spf error for domain %s: cannot resolve REDIRECT" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -893,9 +888,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) case SPF_RESOLVE_INCLUDE: if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { msg_debug_spf ( - "<%s>: spf error for domain %s: cannot resolve INCLUDE" + "spf error for domain %s: cannot resolve INCLUDE" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -908,9 +902,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) case SPF_RESOLVE_EXISTS: if (!(cb->addr->flags & RSPAMD_SPF_FLAG_RESOLVED)) { msg_debug_spf ( - "<%s>: spf error for domain %s: cannot resolve EXISTS" + "spf error for domain %s: cannot resolve EXISTS" " record for %s: %s", - task->message_id, cb->rec->sender_domain, cb->resolved->cur_domain, rdns_strerror (reply->code)); @@ -922,9 +915,8 @@ spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) else { cb->addr->flags |= RSPAMD_SPF_FLAG_TEMPFAIL; msg_info_spf ( - "<%s>: spf error for domain %s: cannot resolve %s DNS record for" + "spf error for domain %s: cannot resolve %s DNS record for" " %s: %s", - task->message_id, cb->rec->sender_domain, rspamd_spf_dns_action_to_str (cb->cur_action), cb->ptr_host, @@ -1622,8 +1614,8 @@ expand_spf_macro (struct spf_record *rec, struct spf_resolved_element *resolved, else { /* Something unknown */ msg_info_spf ( - "<%s>: spf error for domain %s: unknown spf element", - task->message_id, rec->sender_domain); + "spf error for domain %s: unknown spf element", + rec->sender_domain); return begin; } p++; @@ -1657,9 +1649,8 @@ expand_spf_macro (struct spf_record *rec, struct spf_resolved_element *resolved, break; default: msg_info_spf ( - "<%s>: spf error for domain %s: unknown or " + "spf error for domain %s: unknown or " "unsupported spf macro %c in %s", - task->message_id, rec->sender_domain, *p, begin); @@ -1740,8 +1731,8 @@ expand_spf_macro (struct spf_record *rec, struct spf_resolved_element *resolved, else { /* Something unknown */ msg_info_spf ( - "<%s>: spf error for domain %s: unknown spf element", - task->message_id, rec->sender_domain); + "spf error for domain %s: unknown spf element", + rec->sender_domain); return begin; } p++; @@ -1836,9 +1827,8 @@ expand_spf_macro (struct spf_record *rec, struct spf_resolved_element *resolved, break; default: msg_info_spf ( - "<%s>: spf error for domain %s: unknown or " + "spf error for domain %s: unknown or " "unsupported spf macro %c in %s", - task->message_id, rec->sender_domain, *p, begin); @@ -1881,10 +1871,8 @@ expand_spf_macro (struct spf_record *rec, struct spf_resolved_element *resolved, delim = *p; } else { - msg_info_spf ( - "<%s>: spf error for domain %s: unknown or " + msg_info_spf ("spf error for domain %s: unknown or " "unsupported spf macro %c in %s", - task->message_id, rec->sender_domain, *p, begin); @@ -1939,9 +1927,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, res = parse_spf_a (rec, resolved, addr); } else { - msg_info_spf ( - "<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); } break; case 'i': @@ -1959,9 +1946,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, res = parse_spf_ip6 (rec, addr); } else { - msg_info_spf ( - "<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); } break; case 'm': @@ -1970,9 +1956,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, res = parse_spf_mx (rec, resolved, addr); } else { - msg_info_spf ( - "<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); } break; case 'p': @@ -1982,9 +1967,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, res = parse_spf_ptr (rec, resolved, addr); } else { - msg_info_spf ( - "<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); } break; case 'e': @@ -1998,9 +1982,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, res = parse_spf_exists (rec, addr); } else { - msg_info_spf ( - "<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); } break; case 'r': @@ -2010,9 +1993,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, res = parse_spf_redirect (rec, resolved, addr); } else { - msg_info_spf ( - "<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); } break; case 'v': @@ -2025,8 +2007,8 @@ parse_spf_record (struct spf_record *rec, struct spf_resolved_element *resolved, } break; default: - msg_info_spf ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); + msg_info_spf ("spf error for domain %s: bad spf command %s", + rec->sender_domain, begin); break; } @@ -2088,8 +2070,8 @@ start_spf_parse (struct spf_record *rec, struct spf_resolved_element *resolved, /* Skip one number of record, so no we are here spf2.0/ */ begin += sizeof (SPF_VER2_STR); if (*begin != '/') { - msg_info_spf ("<%s>: spf error for domain %s: sender id is invalid", - rec->task->message_id, rec->sender_domain); + msg_info_spf ("spf error for domain %s: sender id is invalid", + rec->sender_domain); } else { begin++; @@ -2099,8 +2081,7 @@ start_spf_parse (struct spf_record *rec, struct spf_resolved_element *resolved, } else { msg_debug_spf ( - "<%s>: spf error for domain %s: bad spf record start: %*s", - rec->task->message_id, + "spf error for domain %s: bad spf record start: %*s", rec->sender_domain, (gint)len, begin); diff --git a/src/libserver/task.c b/src/libserver/task.c index 04be61744..950af5ec8 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -123,50 +123,18 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg, new_task->task_pool = pool; } - new_task->raw_headers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); - new_task->headers_order = g_queue_new (); new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash, rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, rspamd_request_header_dtor); rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->request_headers); - new_task->reply_headers = g_hash_table_new_full (rspamd_ftok_icase_hash, - rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free, - rspamd_fstring_mapped_ftok_free); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->reply_headers); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->raw_headers); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_queue_free, - new_task->headers_order); - new_task->emails = g_hash_table_new (rspamd_email_hash, rspamd_emails_cmp); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->emails); - new_task->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp); - rspamd_mempool_add_destructor (new_task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, - new_task->urls); - new_task->parts = g_ptr_array_sized_new (4); - rspamd_mempool_add_destructor (new_task->task_pool, - rspamd_ptr_array_free_hard, new_task->parts); - new_task->text_parts = g_ptr_array_sized_new (2); - rspamd_mempool_add_destructor (new_task->task_pool, - rspamd_ptr_array_free_hard, new_task->text_parts); - new_task->received = g_ptr_array_sized_new (8); - rspamd_mempool_add_destructor (new_task->task_pool, - rspamd_ptr_array_free_hard, new_task->received); + new_task->request_headers); new_task->sock = -1; new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON); new_task->result = rspamd_create_metric_result (new_task); - new_task->message_id = new_task->queue_id = "undef"; + new_task->queue_id = "undef"; new_task->messages = ucl_object_typed_new (UCL_OBJECT); new_task->lua_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); @@ -231,8 +199,6 @@ rspamd_task_restore (void *arg) void rspamd_task_free (struct rspamd_task *task) { - struct rspamd_mime_part *p; - struct rspamd_mime_text_part *tp; struct rspamd_email_address *addr; struct rspamd_lua_cached_entry *entry; static guint free_iters = 0; @@ -243,38 +209,6 @@ rspamd_task_free (struct rspamd_task *task) if (task) { debug_task ("free pointer %p", task); - for (i = 0; i < task->parts->len; i ++) { - p = g_ptr_array_index (task->parts, i); - - if (p->raw_headers) { - g_hash_table_unref (p->raw_headers); - } - - if (p->headers_order) { - g_queue_free (p->headers_order); - } - - if (IS_CT_MULTIPART (p->ct)) { - if (p->specific.mp->children) { - g_ptr_array_free (p->specific.mp->children, TRUE); - } - } - } - - for (i = 0; i < task->text_parts->len; i ++) { - tp = g_ptr_array_index (task->text_parts, i); - - if (tp->utf_words) { - g_array_free (tp->utf_words, TRUE); - } - if (tp->normalized_hashes) { - g_array_free (tp->normalized_hashes, TRUE); - } - if (tp->languages) { - g_ptr_array_unref (tp->languages); - } - } - if (task->rcpt_envelope) { for (i = 0; i < task->rcpt_envelope->len; i ++) { addr = g_ptr_array_index (task->rcpt_envelope, i); @@ -380,6 +314,8 @@ rspamd_task_free (struct rspamd_task *task) REF_RELEASE (task->cfg); } + rspamd_message_unref (task->message); + if (task->flags & RSPAMD_TASK_FLAG_OWN_POOL) { rspamd_mempool_delete (task->task_pool); } @@ -994,8 +930,9 @@ rspamd_task_get_principal_recipient (struct rspamd_task *task) } } - if (task->rcpt_mime != NULL && task->rcpt_mime->len > 0) { - PTR_ARRAY_FOREACH (task->rcpt_mime, i, addr) { + GPtrArray *rcpt_mime = MESSAGE_FIELD_CHECK (task, rcpt_mime); + if (rcpt_mime != NULL && rcpt_mime->len > 0) { + PTR_ARRAY_FOREACH (rcpt_mime, i, addr) { if (addr->addr && !(addr->flags & RSPAMD_EMAIL_ADDR_ORIGINAL)) { return rspamd_task_cache_principal_recipient (task, addr->addr, addr->addr_len); @@ -1032,7 +969,8 @@ rspamd_task_log_check_condition (struct rspamd_task *task, switch (lf->type) { case RSPAMD_LOG_MID: - if (task->message_id && strcmp (task->message_id, "undef") != 0) { + if (MESSAGE_FIELD_CHECK (task, message_id) && + strcmp (MESSAGE_FIELD (task, message_id) , "undef") != 0) { ret = TRUE; } break; @@ -1059,7 +997,8 @@ rspamd_task_log_check_condition (struct rspamd_task *task, break; case RSPAMD_LOG_MIME_RCPT: case RSPAMD_LOG_MIME_RCPTS: - if (task->rcpt_mime && task->rcpt_mime->len > 0) { + if (MESSAGE_FIELD_CHECK (task, rcpt_mime) && + MESSAGE_FIELD (task, rcpt_mime)->len > 0) { ret = TRUE; } break; @@ -1069,7 +1008,8 @@ rspamd_task_log_check_condition (struct rspamd_task *task, } break; case RSPAMD_LOG_MIME_FROM: - if (task->from_mime && task->from_mime->len > 0) { + if (MESSAGE_FIELD_CHECK (task, from_mime) && + MESSAGE_FIELD (task, from_mime)->len > 0) { ret = TRUE; } break; @@ -1388,8 +1328,8 @@ rspamd_task_log_variable (struct rspamd_task *task, switch (lf->type) { /* String vars */ case RSPAMD_LOG_MID: - if (task->message_id) { - var.begin = task->message_id; + if (MESSAGE_FIELD_CHECK (task, message_id)) { + var.begin = MESSAGE_FIELD (task, message_id); var.len = strlen (var.begin); } else { @@ -1458,8 +1398,11 @@ rspamd_task_log_variable (struct rspamd_task *task, } break; case RSPAMD_LOG_MIME_FROM: - if (task->from_mime) { - return rspamd_task_write_ialist (task, task->from_mime, 1, lf, + if (MESSAGE_FIELD_CHECK (task, from_mime)) { + return rspamd_task_write_ialist (task, + MESSAGE_FIELD (task, from_mime), + 1, + lf, logbuf); } break; @@ -1470,8 +1413,11 @@ rspamd_task_log_variable (struct rspamd_task *task, } break; case RSPAMD_LOG_MIME_RCPT: - if (task->rcpt_mime) { - return rspamd_task_write_ialist (task, task->rcpt_mime, 1, lf, + if (MESSAGE_FIELD_CHECK (task, rcpt_mime)) { + return rspamd_task_write_ialist (task, + MESSAGE_FIELD (task, rcpt_mime), + 1, + lf, logbuf); } break; @@ -1482,15 +1428,25 @@ rspamd_task_log_variable (struct rspamd_task *task, } break; case RSPAMD_LOG_MIME_RCPTS: - if (task->rcpt_mime) { - return rspamd_task_write_ialist (task, task->rcpt_mime, -1, lf, + if (MESSAGE_FIELD_CHECK (task, rcpt_mime)) { + return rspamd_task_write_ialist (task, + MESSAGE_FIELD (task, rcpt_mime), + -1, /* All addresses */ + lf, logbuf); } break; case RSPAMD_LOG_DIGEST: - var.len = rspamd_snprintf (numbuf, sizeof (numbuf), "%*xs", - (gint)sizeof (task->digest), task->digest); - var.begin = numbuf; + if (task->message) { + var.len = rspamd_snprintf (numbuf, sizeof (numbuf), "%*xs", + (gint) sizeof (MESSAGE_FIELD (task, digest)), + MESSAGE_FIELD (task, digest)); + var.begin = numbuf; + } + else { + var.begin = undef; + var.len = sizeof (undef) - 1; + } break; case RSPAMD_LOG_FILENAME: if (task->msg.fpath) { diff --git a/src/libserver/task.h b/src/libserver/task.h index ac55dd910..e0e1fc808 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -134,6 +134,7 @@ enum rspamd_task_stage { struct rspamd_email_address; struct rspamd_lang_detector; enum rspamd_newlines_type; +struct rspamd_message; /** * Worker task structure @@ -147,15 +148,12 @@ struct rspamd_task { gulong message_len; /**< Message length */ gchar *helo; /**< helo header value */ gchar *queue_id; /**< queue id if specified */ - const gchar *message_id; /**< message id */ rspamd_inet_addr_t *from_addr; /**< from addr for a task */ rspamd_inet_addr_t *client_addr; /**< address of connected socket */ gchar *deliver_to; /**< address to deliver */ gchar *user; /**< user to deliver */ - gchar *subject; /**< subject (for non-mime) */ const gchar *hostname; /**< hostname reported by MTA */ GHashTable *request_headers; /**< HTTP headers in a request */ - GHashTable *reply_headers; /**< Custom reply headers */ struct { const gchar *begin; gsize len; @@ -163,29 +161,14 @@ struct rspamd_task { } msg; /**< message buffer */ struct rspamd_http_connection *http_conn; /**< HTTP server connection */ struct rspamd_async_session *s; /**< async session object */ - GPtrArray *parts; /**< list of parsed parts */ - GPtrArray *text_parts; /**< list of text parts */ - struct { - const gchar *begin; - gsize len; - const gchar *body_start; - } raw_headers_content; /**< list of raw headers */ - GPtrArray *received; /**< list of received headers */ - GHashTable *urls; /**< list of parsed urls */ - GHashTable *emails; /**< list of parsed emails */ - GHashTable *raw_headers; /**< list of raw headers */ - GQueue *headers_order; /**< order of raw headers */ struct rspamd_metric_result *result; /**< Metric result */ GHashTable *lua_cache; /**< cache of lua objects */ GPtrArray *tokens; /**< statistics tokens */ GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers (e.g. Subject) */ - GPtrArray *rcpt_mime; GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */ - GPtrArray *from_mime; struct rspamd_email_address *from_envelope; - enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ ucl_object_t *messages; /**< list of messages that would be reported */ struct rspamd_re_runtime *re_rt; /**< regexp runtime */ @@ -215,7 +198,7 @@ struct rspamd_task { const gchar *classifier; /**< Classifier to learn (if needed) */ struct rspamd_lang_detector *lang_det; /**< Languages detector */ - guchar digest[16]; + struct rspamd_message *message; }; /** @@ -252,7 +235,8 @@ gboolean rspamd_task_fin (void *arg); * @return */ gboolean rspamd_task_load_message (struct rspamd_task *task, - struct rspamd_http_message *msg, const gchar *start, gsize len); + struct rspamd_http_message *msg, + const gchar *start, gsize len); /** * Process task diff --git a/src/libserver/url.c b/src/libserver/url.c index 0b31007bb..26e328a6d 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -2949,11 +2949,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, if (url->protocol == PROTOCOL_MAILTO) { if (url->userlen > 0) { - target_tbl = task->emails; + target_tbl = MESSAGE_FIELD (task, emails); } } else { - target_tbl = task->urls; + target_tbl = MESSAGE_FIELD (task, urls); } if (target_tbl) { @@ -2996,11 +2996,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, if (query_url->protocol == PROTOCOL_MAILTO) { if (query_url->userlen > 0) { - target_tbl = task->emails; + target_tbl = MESSAGE_FIELD (task, emails); } } else { - target_tbl = task->urls; + target_tbl = MESSAGE_FIELD (task, urls); } if (target_tbl) { @@ -3115,9 +3115,10 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED|RSPAMD_URL_FLAG_SUBJECT; if (url->protocol == PROTOCOL_MAILTO) { - if (url->userlen > 0) { - if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) { - g_hash_table_insert (task->emails, url, + if (url->userlen > 0 && url->hostlen > 0) { + if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, emails), + url)) == NULL) { + g_hash_table_insert (MESSAGE_FIELD (task, emails), url, url); } else { @@ -3126,8 +3127,9 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, } } else { - if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) { - g_hash_table_insert (task->urls, url, url); + if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls), + url)) == NULL) { + g_hash_table_insert (MESSAGE_FIELD (task, urls), url, url); } else { existing->count ++; @@ -3156,9 +3158,9 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; } - if ((existing = g_hash_table_lookup (task->urls, + if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls), query_url)) == NULL) { - g_hash_table_insert (task->urls, + g_hash_table_insert (MESSAGE_FIELD (task, urls), query_url, query_url); } diff --git a/src/libstat/backends/sqlite3_backend.c b/src/libstat/backends/sqlite3_backend.c index a3d6ac9db..38f296177 100644 --- a/src/libstat/backends/sqlite3_backend.c +++ b/src/libstat/backends/sqlite3_backend.c @@ -387,8 +387,7 @@ rspamd_sqlite3_get_language (struct rspamd_stat_sqlite3_db *db, lua_State *L = db->L; if (db->cbref_language == -1) { - for (i = 0; i < task->text_parts->len; i++) { - tp = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { if (tp->language != NULL && tp->language[0] != '\0' && strcmp (tp->language, "en") != 0) { diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index eca94156c..38e82d187 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -379,10 +379,9 @@ bayes_classify (struct rspamd_classifier * ctx, if (isfinite (s) && isfinite (h)) { final_prob = (s + 1.0 - h) / 2.; msg_debug_bayes ( - "<%s> got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," + "got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f," " %L tokens processed of %ud total tokens;" " %uL text tokens found of %ud text tokens)", - task->message_id, cl.ham_prob, h, cl.spam_prob, @@ -399,18 +398,17 @@ bayes_classify (struct rspamd_classifier * ctx, */ if (isfinite (h)) { final_prob = 1.0; - msg_debug_bayes ("<%s> spam class is overflowed, as we have no" - " ham samples", task->message_id); + msg_debug_bayes ("spam class is overflowed, as we have no" + " ham samples"); } else if (isfinite (s)) { final_prob = 0.0; - msg_debug_bayes ("<%s> ham class is overflowed, as we have no" - " spam samples", task->message_id); + msg_debug_bayes ("ham class is overflowed, as we have no" + " spam samples"); } else { final_prob = 0.5; - msg_warn_bayes ("<%s> spam and ham classes are both overflowed", - task->message_id); + msg_warn_bayes ("spam and ham classes are both overflowed"); } } diff --git a/src/libstat/learn_cache/redis_cache.c b/src/libstat/learn_cache/redis_cache.c index 2313db0b2..82c354bb6 100644 --- a/src/libstat/learn_cache/redis_cache.c +++ b/src/libstat/learn_cache/redis_cache.c @@ -23,6 +23,7 @@ #include "hiredis.h" #include "adapters/libev.h" #include "lua/lua_common.h" +#include "libmime/message.h" #define REDIS_DEFAULT_TIMEOUT 0.5 #define REDIS_STAT_TIMEOUT 30 @@ -153,7 +154,7 @@ rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv) (val < 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM))) { /* Already learned */ msg_info_task ("<%s> has been already " - "learned as %s, ignore it", task->message_id, + "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? "spam" : "ham"); task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED; } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index e8e08f6d1..034e1a5be 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -131,9 +131,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, g_assert (st_ctx != NULL); - for (i = 0; i < task->text_parts->len; i++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { reserved_len += part->utf_words->len; } @@ -146,9 +144,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, rspamd_ptr_array_free_hard, task->tokens); pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { st_ctx->tokenizer->tokenize_func (st_ctx, task, part->utf_words, IS_PART_UTF (part), @@ -382,9 +378,8 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, if (!skip) { if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) { msg_debug_bayes ( - "<%s> contains less tokens than required for %s classifier: " + "contains less tokens than required for %s classifier: " "%ud < %ud", - task->message_id, cl->cfg->name, task->tokens->len, cl->cfg->min_tokens); @@ -392,9 +387,8 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, } else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) { msg_debug_bayes ( - "<%s> contains more tokens than allowed for %s classifier: " + "contains more tokens than allowed for %s classifier: " "%ud > %ud", - task->message_id, cl->cfg->name, task->tokens->len, cl->cfg->max_tokens); @@ -474,7 +468,7 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, if (learn_res == RSPAMD_LEARN_INGORE) { /* Do not learn twice */ g_set_error (err, rspamd_stat_quark (), 404, "<%s> has been already " - "learned as %s, ignore it", task->message_id, + "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), spam ? "spam" : "ham"); task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED; @@ -522,7 +516,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, *err == NULL) { /* Do not learn twice */ g_set_error (err, rspamd_stat_quark (), 208, "<%s> has been already " - "learned as %s, ignore it", task->message_id, + "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), spam ? "spam" : "ham"); return FALSE; @@ -545,10 +539,10 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, msg_info_task ( "<%s> contains less tokens than required for %s classifier: " "%ud < %ud", - task->message_id, - cl->cfg->name, - task->tokens->len, - cl->cfg->min_tokens); + MESSAGE_FIELD (task, message_id), + cl->cfg->name, + task->tokens->len, + cl->cfg->min_tokens); too_small = TRUE; continue; } @@ -556,10 +550,10 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, msg_info_task ( "<%s> contains more tokens than allowed for %s classifier: " "%ud > %ud", - task->message_id, - cl->cfg->name, - task->tokens->len, - cl->cfg->max_tokens); + MESSAGE_FIELD (task, message_id), + cl->cfg->name, + task->tokens->len, + cl->cfg->max_tokens); too_large = TRUE; continue; } @@ -633,7 +627,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, g_set_error (err, rspamd_stat_quark (), 204, "<%s> contains more tokens than allowed for %s classifier: " "%d > %d", - task->message_id, + MESSAGE_FIELD (task, message_id), sel->cfg->name, task->tokens->len, sel->cfg->max_tokens); @@ -642,7 +636,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, g_set_error (err, rspamd_stat_quark (), 204, "<%s> contains less tokens than required for %s classifier: " "%d < %d", - task->message_id, + MESSAGE_FIELD (task, message_id), sel->cfg->name, task->tokens->len, sel->cfg->min_tokens); @@ -651,7 +645,7 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, g_set_error (err, rspamd_stat_quark (), 204, "<%s> is skipped for %s classifier: " "%s", - task->message_id, + MESSAGE_FIELD (task, message_id), sel->cfg->name, cond_str ? cond_str : "unknown reason"); } @@ -1060,14 +1054,14 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) msg_info_task ("<%s>: autolearn ham for classifier " "'%s' as message's " "score is negative: %.2f", - task->message_id, cl->cfg->name, + MESSAGE_FIELD (task, message_id), cl->cfg->name, mres->score); } else { msg_info_task ("<%s>: autolearn spam for classifier " "'%s' as message's " "action is reject, score: %.2f", - task->message_id, cl->cfg->name, + MESSAGE_FIELD (task, message_id), cl->cfg->name, mres->score); } diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index f69378f9b..550ed2097 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -567,14 +567,15 @@ rspamd_tokenize_meta_words (struct rspamd_task *task) guint i = 0; rspamd_stat_token_t *tok; - if (task->subject) { - rspamd_add_metawords_from_str (task->subject, strlen (task->subject), task); + if (MESSAGE_FIELD (task, subject)) { + rspamd_add_metawords_from_str (MESSAGE_FIELD (task, subject), + strlen (MESSAGE_FIELD (task, subject)), task); } - if (task->from_mime && task->from_mime->len > 0) { + if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) { struct rspamd_email_address *addr; - addr = g_ptr_array_index (task->from_mime, 0); + addr = g_ptr_array_index (MESSAGE_FIELD (task, from_mime), 0); if (addr->name) { rspamd_add_metawords_from_str (addr->name, strlen (addr->name), task); @@ -584,8 +585,10 @@ rspamd_tokenize_meta_words (struct rspamd_task *task) if (task->meta_words != NULL) { const gchar *language = NULL; - if (task->text_parts && task->text_parts->len > 0) { - struct rspamd_mime_text_part *tp = g_ptr_array_index (task->text_parts, 0); + if (MESSAGE_FIELD (task, text_parts) && + MESSAGE_FIELD (task, text_parts)->len > 0) { + struct rspamd_mime_text_part *tp = g_ptr_array_index ( + MESSAGE_FIELD (task, text_parts), 0); if (tp->language) { language = tp->language; diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index d14ebba54..bef163c3c 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -120,7 +120,7 @@ struct rspamd_lua_map { struct rspamd_lua_cached_entry { gint ref; - guint id; + guchar id[4]; }; /* Common utility functions */ @@ -234,8 +234,10 @@ gint rspamd_lua_push_header (lua_State *L, * Push specific header to lua */ gint rspamd_lua_push_header_array (lua_State *L, - GPtrArray *hdrs, - enum rspamd_lua_task_header_type how); + const gchar *name, + struct rspamd_mime_header *rh, + enum rspamd_lua_task_header_type how, + gboolean strong); /** * Check for task at the specified position diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 5401ed031..dfc4ee8fa 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -21,6 +21,8 @@ #include "libcryptobox/cryptobox.h" #include "libutil/shingles.h" +#include "contrib/uthash/utlist.h" + /* Textpart methods */ /*** * @module rspamd_textpart @@ -1558,16 +1560,21 @@ lua_mimepart_get_header_common (lua_State *L, enum rspamd_lua_task_header_type h { struct rspamd_mime_part *part = lua_check_mimepart (L); const gchar *name; - GPtrArray *ar; + gboolean strong = FALSE; name = luaL_checkstring (L, 2); if (name && part) { - ar = rspamd_message_get_header_from_hash (part->raw_headers, NULL, - name, FALSE); + if (lua_isboolean (L, 3)) { + strong = lua_toboolean (L, 3); + } - return rspamd_lua_push_header_array (L, ar, how); + return rspamd_lua_push_header_array (L, + name, + rspamd_message_get_header_from_hash (part->raw_headers, name), + how, + strong); } lua_pushnil (L); @@ -1915,8 +1922,7 @@ lua_mimepart_headers_foreach (lua_State *L) struct rspamd_mime_part *part = lua_check_mimepart (L); enum rspamd_lua_task_header_type how = RSPAMD_TASK_HEADER_PUSH_SIMPLE; struct rspamd_lua_regexp *re = NULL; - GList *cur; - struct rspamd_mime_header *hdr; + struct rspamd_mime_header *hdr, *cur; gint old_top; if (part && lua_isfunction (L, 2)) { @@ -1951,23 +1957,20 @@ lua_mimepart_headers_foreach (lua_State *L) } if (part->headers_order) { - cur = part->headers_order->head; - - while (cur) { - hdr = cur->data; + hdr = part->headers_order; + LL_FOREACH (hdr, cur) { if (re && re->re) { - if (!rspamd_regexp_match (re->re, hdr->name, - strlen (hdr->name),FALSE)) { - cur = g_list_next (cur); + if (!rspamd_regexp_match (re->re, cur->name, + strlen (cur->name),FALSE)) { continue; } } old_top = lua_gettop (L); lua_pushvalue (L, 2); - lua_pushstring (L, hdr->name); - rspamd_lua_push_header (L, hdr, how); + lua_pushstring (L, cur->name); + rspamd_lua_push_header (L, cur, how); if (lua_pcall (L, 2, LUA_MULTRET, 0) != 0) { msg_err ("call to header_foreach failed: %s", @@ -1987,7 +1990,6 @@ lua_mimepart_headers_foreach (lua_State *L) } lua_settop (L, old_top); - cur = g_list_next (cur); } } } diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 99faa4b3e..068459922 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -1251,7 +1251,7 @@ lua_check_text (lua_State * L, gint pos) static void lua_task_set_cached (lua_State *L, struct rspamd_task *task, const gchar *key, - gint pos, guint id) + gint pos) { LUA_TRACE_POINT; struct rspamd_lua_cached_entry *entry; @@ -1271,20 +1271,24 @@ lua_task_set_cached (lua_State *L, struct rspamd_task *task, const gchar *key, } entry->ref = luaL_ref (L, LUA_REGISTRYINDEX); - entry->id = id; + + if (task->message) { + memcpy (entry->id, MESSAGE_FIELD (task, digest), sizeof (entry->id)); + } } static gboolean -lua_task_get_cached (lua_State *L, struct rspamd_task *task, const gchar *key, - guint id) +lua_task_get_cached (lua_State *L, struct rspamd_task *task, const gchar *key) { LUA_TRACE_POINT; struct rspamd_lua_cached_entry *entry; entry = g_hash_table_lookup (task->lua_cache, key); - if (entry != NULL && entry->id == id) { + if (entry != NULL && (task->message && + memcmp (entry->id, MESSAGE_FIELD (task, digest), + sizeof (entry->id)) == 0)) { lua_rawgeti (L, LUA_REGISTRYINDEX, entry->ref); return TRUE; @@ -1970,7 +1974,7 @@ lua_task_get_urls (lua_State * L) PROTOCOL_FILE|PROTOCOL_FTP; gsize sz; - if (task) { + if (task && task->message) { if (lua_gettop (L) >= 2) { if (lua_type (L, 2) == LUA_TBOOLEAN) { protocols_mask = default_mask; @@ -2031,38 +2035,45 @@ lua_task_get_urls (lua_State * L) cb.mask = protocols_mask; if (protocols_mask & PROTOCOL_MAILTO) { - sz = g_hash_table_size (task->urls) + g_hash_table_size (task->emails); + sz = g_hash_table_size (MESSAGE_FIELD (task, urls)) + + g_hash_table_size (MESSAGE_FIELD (task, emails)); if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) { /* Can use cached version */ - if (!lua_task_get_cached (L, task, "emails+urls", sz)) { + if (!lua_task_get_cached (L, task, "emails+urls")) { lua_createtable (L, sz, 0); - g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb); - g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb); + g_hash_table_foreach (MESSAGE_FIELD (task, urls), + lua_tree_url_callback, &cb); + g_hash_table_foreach (MESSAGE_FIELD (task, emails), + lua_tree_url_callback, &cb); - lua_task_set_cached (L, task, "emails+urls", -1, sz); + lua_task_set_cached (L, task, "emails+urls", -1); } } else { lua_createtable (L, sz, 0); - g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb); - g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb); + g_hash_table_foreach (MESSAGE_FIELD (task, urls), + lua_tree_url_callback, &cb); + g_hash_table_foreach (MESSAGE_FIELD (task, emails), + lua_tree_url_callback, &cb); } } else { - sz = g_hash_table_size (task->urls); + sz = g_hash_table_size (MESSAGE_FIELD (task, urls)); if (protocols_mask == (default_mask)) { - if (!lua_task_get_cached (L, task, "urls", sz)) { + if (!lua_task_get_cached (L, task, "urls")) { lua_createtable (L, sz, 0); - g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb); - lua_task_set_cached (L, task, "urls", -1, sz); + g_hash_table_foreach (MESSAGE_FIELD (task, urls), + lua_tree_url_callback, &cb); + lua_task_set_cached (L, task, "urls", -1); } } else { lua_createtable (L, sz, 0); - g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb); + g_hash_table_foreach (MESSAGE_FIELD (task, urls), + lua_tree_url_callback, &cb); } } } @@ -2080,16 +2091,16 @@ lua_task_has_urls (lua_State * L) struct rspamd_task *task = lua_check_task (L, 1); gboolean need_emails = FALSE, ret = FALSE; - if (task) { + if (task && task->message) { if (lua_gettop (L) >= 2) { need_emails = lua_toboolean (L, 2); } - if (g_hash_table_size (task->urls) > 0) { + if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) { ret = TRUE; } - if (need_emails && g_hash_table_size (task->emails) > 0) { + if (need_emails && g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) { ret = TRUE; } } @@ -2152,20 +2163,25 @@ lua_task_get_rawbody (lua_State * L) struct rspamd_lua_text *t; if (task) { - t = lua_newuserdata (L, sizeof (*t)); - rspamd_lua_setclass (L, "rspamd{text}", -1); + if (task->message != NULL) { + t = lua_newuserdata (L, sizeof (*t)); + rspamd_lua_setclass (L, "rspamd{text}", -1); - if (task->raw_headers_content.len > 0) { - g_assert (task->raw_headers_content.len <= task->msg.len); - t->start = task->msg.begin + task->raw_headers_content.len; - t->len = task->msg.len - task->raw_headers_content.len; + if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { + g_assert (MESSAGE_FIELD (task, raw_headers_content).len <= task->msg.len); + t->start = task->msg.begin + MESSAGE_FIELD (task, raw_headers_content).len; + t->len = task->msg.len - MESSAGE_FIELD (task, raw_headers_content).len; + } + else { + t->len = task->msg.len; + t->start = task->msg.begin; + } + + t->flags = 0; } else { - t->len = task->msg.len; - t->start = task->msg.begin; + lua_pushnil (L); } - - t->flags = 0; } else { return luaL_error (L, "invalid arguments"); @@ -2182,11 +2198,12 @@ lua_task_get_emails (lua_State * L) struct lua_tree_cb_data cb; if (task) { - lua_createtable (L, g_hash_table_size (task->emails), 0); + lua_createtable (L, g_hash_table_size (MESSAGE_FIELD (task, emails)), 0); cb.i = 1; cb.L = L; cb.mask = PROTOCOL_MAILTO; - g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb); + g_hash_table_foreach (MESSAGE_FIELD (task, emails), + lua_tree_url_callback, &cb); } else { return luaL_error (L, "invalid arguments"); @@ -2203,13 +2220,12 @@ lua_task_get_text_parts (lua_State * L) struct rspamd_task *task = lua_check_task (L, 1); struct rspamd_mime_text_part *part, **ppart; - if (task != NULL) { + if (task != NULL && task->message != NULL) { - if (!lua_task_get_cached (L, task, "text_parts", task->text_parts->len)) { - lua_createtable (L, task->text_parts->len, 0); + if (!lua_task_get_cached (L, task, "text_parts")) { + lua_createtable (L, MESSAGE_FIELD (task, text_parts)->len, 0); - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { ppart = lua_newuserdata (L, sizeof (struct rspamd_mime_text_part *)); *ppart = part; rspamd_lua_setclass (L, "rspamd{textpart}", -1); @@ -2217,7 +2233,7 @@ lua_task_get_text_parts (lua_State * L) lua_rawseti (L, -2, i + 1); } - lua_task_set_cached (L, task, "text_parts", -1, task->text_parts->len); + lua_task_set_cached (L, task, "text_parts", -1); } } else { @@ -2235,12 +2251,11 @@ lua_task_get_parts (lua_State * L) struct rspamd_task *task = lua_check_task (L, 1); struct rspamd_mime_part *part, **ppart; - if (task != NULL) { - if (!lua_task_get_cached (L, task, "mime_parts", task->parts->len)) { - lua_createtable (L, task->parts->len, 0); + if (task != NULL && task->message != NULL) { + if (!lua_task_get_cached (L, task, "mime_parts")) { + lua_createtable (L, MESSAGE_FIELD (task, parts)->len, 0); - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { ppart = lua_newuserdata (L, sizeof (struct rspamd_mime_part *)); *ppart = part; rspamd_lua_setclass (L, "rspamd{mimepart}", -1); @@ -2248,7 +2263,7 @@ lua_task_get_parts (lua_State * L) lua_rawseti (L, -2, i + 1); } - lua_task_set_cached (L, task, "mime_parts", -1, task->parts->len); + lua_task_set_cached (L, task, "mime_parts", -1); } } else { @@ -2301,7 +2316,7 @@ lua_task_set_request_header (lua_State *L) rspamd_fstring_t *buf; struct rspamd_lua_text *t; rspamd_ftok_t *hdr, *new_name; - gsize len, vlen; + gsize len, vlen = 0; s = luaL_checklstring (L, 2, &len); @@ -2364,10 +2379,10 @@ rspamd_lua_push_header (lua_State *L, struct rspamd_mime_header *rh, } lua_pushstring (L, "tab_separated"); - lua_pushboolean (L, rh->tab_separated); + lua_pushboolean (L, rh->flags & RSPAMD_HEADER_TAB_SEPARATED); lua_settable (L, -3); lua_pushstring (L, "empty_separator"); - lua_pushboolean (L, rh->empty_separator); + lua_pushboolean (L, rh->flags & RSPAMD_HEADER_EMPTY_SEPARATOR); lua_settable (L, -3); rspamd_lua_table_set (L, "separator", rh->separator); lua_pushstring (L, "order"); @@ -2400,15 +2415,17 @@ rspamd_lua_push_header (lua_State *L, struct rspamd_mime_header *rh, } gint -rspamd_lua_push_header_array (lua_State * L, - GPtrArray *ar, - enum rspamd_lua_task_header_type how) +rspamd_lua_push_header_array (lua_State *L, + const gchar *name, + struct rspamd_mime_header *rh, + enum rspamd_lua_task_header_type how, + gboolean strong) { LUA_TRACE_POINT; - struct rspamd_mime_header *rh; + struct rspamd_mime_header *cur; guint i; - if (ar == NULL || ar->len == 0) { + if (rh == NULL) { if (how == RSPAMD_TASK_HEADER_PUSH_COUNT) { lua_pushnumber (L, 0); } @@ -2420,19 +2437,36 @@ rspamd_lua_push_header_array (lua_State * L, } if (how == RSPAMD_TASK_HEADER_PUSH_FULL) { - lua_createtable (L, ar->len, 0); - PTR_ARRAY_FOREACH (ar, i, rh) { - rspamd_lua_push_header (L, rh, how); - lua_rawseti (L, -2, i + 1); + lua_createtable (L, 0, 0); + i = 0; + + DL_FOREACH (rh, cur) { + if (!strong || strcmp (name, cur->name) == 0) { + rspamd_lua_push_header (L, cur, how); + lua_rawseti (L, -2, ++i); + } } } else if (how == RSPAMD_TASK_HEADER_PUSH_COUNT) { - lua_pushinteger (L, ar->len); + i = 0; + + DL_FOREACH (rh, cur) { + if (!strong || strcmp (name, cur->name) == 0) { + i++; + } + } + + lua_pushinteger (L, i); } else { - rh = g_ptr_array_index (ar, 0); + DL_FOREACH (rh, cur) { + if (!strong || strcmp (name, cur->name) == 0) { + return rspamd_lua_push_header (L, cur, how); + } + } - return rspamd_lua_push_header (L, rh, how); + /* Not found with this case */ + lua_pushnil (L); } return 1; @@ -2444,8 +2478,8 @@ lua_task_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how) LUA_TRACE_POINT; gboolean strong = FALSE; struct rspamd_task *task = lua_check_task (L, 1); + struct rspamd_mime_header *rh; const gchar *name; - GPtrArray *ar; name = luaL_checkstring (L, 2); @@ -2454,9 +2488,9 @@ lua_task_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how) strong = lua_toboolean (L, 3); } - ar = rspamd_message_get_header_array (task, name, strong); + rh = rspamd_message_get_header_array (task, name); - return rspamd_lua_push_header_array (L, ar, how); + return rspamd_lua_push_header_array (L, name, rh, how, strong); } else { return luaL_error (L, "invalid arguments"); @@ -2494,11 +2528,11 @@ lua_task_get_raw_headers (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); struct rspamd_lua_text *t; - if (task) { + if (task && task->message) { t = lua_newuserdata (L, sizeof (*t)); rspamd_lua_setclass (L, "rspamd{text}", -1); - t->start = task->raw_headers_content.begin; - t->len = task->raw_headers_content.len; + t->start = MESSAGE_FIELD (task, raw_headers_content).begin; + t->len = MESSAGE_FIELD (task, raw_headers_content).len; t->flags = 0; } else { @@ -2514,17 +2548,15 @@ lua_task_get_received_headers (lua_State * L) { LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); - struct received_header *rh; + struct rspamd_received_header *rh; const gchar *proto; - guint i, k = 1; - - if (task) { - if (!lua_task_get_cached (L, task, "received", task->received->len)) { - lua_createtable (L, task->received->len, 0); + guint k = 1; - for (i = 0; i < task->received->len; i ++) { - rh = g_ptr_array_index (task->received, i); + if (task && task->message) { + if (!lua_task_get_cached (L, task, "received")) { + lua_createtable (L, 0, 0); + DL_FOREACH (MESSAGE_FIELD (task, received), rh) { lua_createtable (L, 0, 10); if (rh->hdr && rh->hdr->decoded) { @@ -2581,7 +2613,7 @@ lua_task_get_received_headers (lua_State * L) lua_settable (L, -3); lua_pushstring (L, "proto"); - switch (rh->type) { + switch (rh->flags & RSPAMD_RECEIVED_FLAG_TYPE_MASK) { case RSPAMD_RECEIVED_SMTP: proto = "smtp"; break; @@ -2630,7 +2662,7 @@ lua_task_get_received_headers (lua_State * L) lua_rawseti (L, -2, k ++); } - lua_task_set_cached (L, task, "received", -1, task->received->len); + lua_task_set_cached (L, task, "received", -1); } } else { @@ -3082,7 +3114,7 @@ lua_task_get_recipients (lua_State *L) break; case RSPAMD_ADDRESS_MIME: /* Here we check merely mime rcpt */ - ptrs = task->rcpt_mime; + ptrs = MESSAGE_FIELD_CHECK (task, rcpt_mime); break; case RSPAMD_ADDRESS_ANY: default: @@ -3090,7 +3122,7 @@ lua_task_get_recipients (lua_State *L) ptrs = task->rcpt_envelope; } else { - ptrs = task->rcpt_mime; + ptrs = MESSAGE_FIELD_CHECK (task, rcpt_mime); } break; } @@ -3117,6 +3149,7 @@ lua_task_set_recipients (lua_State *L) struct rspamd_email_address *addr = NULL; gint what = 0, pos = 3; const gchar *how = "rewrite"; + gboolean need_update_digest = FALSE; if (task && lua_gettop (L) >= 3) { @@ -3134,7 +3167,8 @@ lua_task_set_recipients (lua_State *L) break; case RSPAMD_ADDRESS_MIME: /* Here we check merely mime rcpt */ - ptrs = task->rcpt_mime; + ptrs = MESSAGE_FIELD_CHECK (task, rcpt_mime); + need_update_digest = TRUE; break; case RSPAMD_ADDRESS_ANY: default: @@ -3142,7 +3176,8 @@ lua_task_set_recipients (lua_State *L) ptrs = task->rcpt_envelope; } else { - ptrs = task->rcpt_mime; + ptrs = MESSAGE_FIELD_CHECK (task, rcpt_mime); + need_update_digest = TRUE; } break; } @@ -3162,6 +3197,11 @@ lua_task_set_recipients (lua_State *L) for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) { if (lua_import_email_address (L, task, lua_gettop (L), &addr)) { + + if (need_update_digest) { + rspamd_message_update_digest (task->message, + addr->addr, addr->addr_len); + } g_ptr_array_add (ptrs, addr); } } @@ -3220,14 +3260,14 @@ lua_task_has_from (lua_State *L) break; case RSPAMD_ADDRESS_MIME: /* Here we check merely mime rcpt */ - CHECK_EMAIL_ADDR_LIST (task->from_mime); + CHECK_EMAIL_ADDR_LIST (MESSAGE_FIELD_CHECK (task, from_mime)); break; case RSPAMD_ADDRESS_ANY: default: CHECK_EMAIL_ADDR (task->from_envelope); if (!ret) { - CHECK_EMAIL_ADDR_LIST (task->from_mime); + CHECK_EMAIL_ADDR_LIST (MESSAGE_FIELD_CHECK (task, from_mime)); } break; } @@ -3262,14 +3302,14 @@ lua_task_has_recipients (lua_State *L) break; case RSPAMD_ADDRESS_MIME: /* Here we check merely mime rcpt */ - CHECK_EMAIL_ADDR_LIST (task->rcpt_mime); + CHECK_EMAIL_ADDR_LIST (MESSAGE_FIELD_CHECK (task, rcpt_mime)); break; case RSPAMD_ADDRESS_ANY: default: CHECK_EMAIL_ADDR_LIST (task->rcpt_envelope); if (!ret) { - CHECK_EMAIL_ADDR_LIST (task->rcpt_mime); + CHECK_EMAIL_ADDR_LIST (MESSAGE_FIELD_CHECK (task, rcpt_mime)); } break; } @@ -3305,7 +3345,7 @@ lua_task_get_from (lua_State *L) break; case RSPAMD_ADDRESS_MIME: /* Here we check merely mime rcpt */ - addrs = task->from_mime; + addrs = MESSAGE_FIELD_CHECK (task, from_mime); break; case RSPAMD_ADDRESS_ANY: default: @@ -3313,7 +3353,7 @@ lua_task_get_from (lua_State *L) addr = task->from_envelope; } else { - addrs = task->from_mime; + addrs = MESSAGE_FIELD_CHECK (task, from_mime); } break; } @@ -3351,6 +3391,7 @@ lua_task_set_from (lua_State *L) const gchar *how = "rewrite"; GPtrArray *addrs = NULL; struct rspamd_email_address **paddr = NULL, *addr; + gboolean need_update_digest = FALSE; gint what = 0; if (task && lua_gettop (L) >= 3) { @@ -3367,7 +3408,8 @@ lua_task_set_from (lua_State *L) break; case RSPAMD_ADDRESS_MIME: /* Here we check merely mime rcpt */ - addrs = task->from_mime; + addrs = MESSAGE_FIELD_CHECK (task, from_mime); + need_update_digest = TRUE; break; case RSPAMD_ADDRESS_ANY: default: @@ -3375,7 +3417,8 @@ lua_task_set_from (lua_State *L) paddr = &task->from_envelope; } else { - addrs = task->from_mime; + addrs = MESSAGE_FIELD_CHECK (task, from_mime); + need_update_digest = TRUE; } break; } @@ -3393,6 +3436,11 @@ lua_task_set_from (lua_State *L) tmp->flags |= flags_add; } + if (need_update_digest) { + rspamd_message_update_digest (task->message, + addr->addr, addr->addr_len); + } + g_ptr_array_add (addrs, addr); lua_pushboolean (L, true); } @@ -3456,18 +3504,18 @@ lua_task_get_reply_sender (lua_State *L) struct rspamd_mime_header *rh; if (task) { - GPtrArray *ar; - ar = rspamd_message_get_header_array (task, "Reply-To", false); + rh = rspamd_message_get_header_array (task, "Reply-To"); - if (ar && ar->len == 1) { - rh = (struct rspamd_mime_header *)g_ptr_array_index (ar, 0); + if (rh) { lua_pushstring (L, rh->decoded); } - else if (task->from_mime && task->from_mime->len == 1) { + else if (MESSAGE_FIELD_CHECK (task, from_mime) && + MESSAGE_FIELD (task, from_mime)->len == 1) { struct rspamd_email_address *addr; - addr = (struct rspamd_email_address *)g_ptr_array_index (task->from_mime, 0); + addr = (struct rspamd_email_address *)g_ptr_array_index ( + MESSAGE_FIELD (task, from_mime), 0); lua_pushlstring (L, addr->addr, addr->addr_len); } @@ -3650,8 +3698,8 @@ lua_task_get_subject (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); if (task) { - if (task->subject != NULL) { - lua_pushstring (L, task->subject); + if (MESSAGE_FIELD_CHECK (task, subject) != NULL) { + lua_pushstring (L, MESSAGE_FIELD (task, subject)); return 1; } else { @@ -3750,13 +3798,11 @@ lua_task_get_images (lua_State *L) struct rspamd_mime_part *part; struct rspamd_image **pimg; - if (task) { - if (!lua_task_get_cached (L, task, "images", task->parts->len)) { - lua_createtable (L, task->parts->len, 0); - - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); + if (task && task->message) { + if (!lua_task_get_cached (L, task, "images")) { + lua_createtable (L, MESSAGE_FIELD (task, parts)->len, 0); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (part->flags & RSPAMD_MIME_PART_IMAGE) { pimg = lua_newuserdata (L, sizeof (struct rspamd_image *)); rspamd_lua_setclass (L, "rspamd{image}", -1); @@ -3765,7 +3811,7 @@ lua_task_get_images (lua_State *L) } } - lua_task_set_cached (L, task, "images", -1, task->parts->len); + lua_task_set_cached (L, task, "images", -1); } } else { @@ -3784,13 +3830,11 @@ lua_task_get_archives (lua_State *L) struct rspamd_mime_part *part; struct rspamd_archive **parch; - if (task) { - if (!lua_task_get_cached (L, task, "archives", task->parts->len)) { - lua_createtable (L, task->parts->len, 0); - - for (i = 0; i < task->parts->len; i ++) { - part = g_ptr_array_index (task->parts, i); + if (task && task->message) { + if (!lua_task_get_cached (L, task, "archives")) { + lua_createtable (L, MESSAGE_FIELD (task, parts)->len, 0); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { if (part->flags & RSPAMD_MIME_PART_ARCHIVE) { parch = lua_newuserdata (L, sizeof (struct rspamd_archive *)); rspamd_lua_setclass (L, "rspamd{archive}", -1); @@ -3799,7 +3843,7 @@ lua_task_get_archives (lua_State *L) } } - lua_task_set_cached (L, task, "archives", -1, task->parts->len); + lua_task_set_cached (L, task, "archives", -1); } } else { @@ -3818,7 +3862,7 @@ lua_task_get_dkim_results (lua_State *L) struct rspamd_dkim_check_result **pres, **cur; if (task) { - if (!lua_task_get_cached (L, task, "dkim_results", 0)) { + if (!lua_task_get_cached (L, task, "dkim_results")) { pres = rspamd_mempool_get_variable (task->task_pool, RSPAMD_MEMPOOL_DKIM_CHECK_RESULTS); @@ -3883,7 +3927,7 @@ lua_task_get_dkim_results (lua_State *L) } } - lua_task_set_cached (L, task, "dkim_results", -1, 0); + lua_task_set_cached (L, task, "dkim_results", -1); } } else { @@ -4348,7 +4392,7 @@ lua_task_get_date (lua_State *L) { LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); - GPtrArray *hdrs; + struct rspamd_mime_header *h; gdouble tim; enum lua_date_type type = DATE_CONNECT; gboolean gmt = TRUE; @@ -4375,15 +4419,12 @@ lua_task_get_date (lua_State *L) } } else { - hdrs = rspamd_message_get_header_array (task, "Date", - FALSE); + h = rspamd_message_get_header_array (task, "Date"); - if (hdrs && hdrs->len > 0) { + if (h) { time_t tt; struct tm t; - struct rspamd_mime_header *h; - h = g_ptr_array_index (hdrs, 0); tt = rspamd_parse_smtp_date (h->decoded, strlen (h->decoded)); if (!gmt) { @@ -4430,8 +4471,8 @@ lua_task_get_message_id (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); if (task != NULL) { - if (task->message_id != NULL) { - lua_pushstring (L, task->message_id); + if (MESSAGE_FIELD_CHECK (task, message_id) != NULL) { + lua_pushstring (L, MESSAGE_FIELD (task, message_id)); } else { lua_pushnil (L); @@ -4698,11 +4739,12 @@ lua_task_get_digest (lua_State *L) { LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); - gchar hexbuf[33]; + gchar hexbuf[sizeof(MESSAGE_FIELD (task, digest)) * 2 + 1]; gint r; - if (task) { - r = rspamd_encode_hex_buf (task->digest, sizeof (task->digest), + if (task && task->message) { + r = rspamd_encode_hex_buf (MESSAGE_FIELD (task, digest), + sizeof (MESSAGE_FIELD (task, digest)), hexbuf, sizeof (hexbuf) - 1); if (r > 0) { @@ -4987,14 +5029,9 @@ lua_task_cache_get (lua_State *L) LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); const gchar *key = luaL_checkstring (L, 2); - guint id = 0; if (task && key) { - if (lua_type (L, 3) == LUA_TNUMBER) { - id = lua_tonumber (L, 3); - } - - if (!lua_task_get_cached (L, task, key, id)) { + if (!lua_task_get_cached (L, task, key)) { lua_pushnil (L); } } @@ -5011,14 +5048,9 @@ lua_task_cache_set (lua_State *L) LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); const gchar *key = luaL_checkstring (L, 2); - guint id = 0; if (task && key && lua_gettop (L) >= 3) { - if (lua_type (L, 4) == LUA_TNUMBER) { - id = lua_tonumber (L, 4); - } - - lua_task_set_cached (L, task, key, 3, id); + lua_task_set_cached (L, task, key, 3); } else { luaL_error (L, "invalid arguments"); @@ -5074,7 +5106,7 @@ lua_task_store_in_file (lua_State *L) } } else if (lua_isnumber (L, 2)) { - mode = lua_tonumber (L, 2); + mode = lua_tointeger (L, 2); } if (!force_new && (task->flags & RSPAMD_TASK_FLAG_FILE) && @@ -5385,17 +5417,22 @@ lua_task_get_newlines_type (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); if (task) { - switch (task->nlines_type) { - case RSPAMD_TASK_NEWLINES_CR: - lua_pushstring (L, "cr"); - break; - case RSPAMD_TASK_NEWLINES_LF: - lua_pushstring (L, "lf"); - break; - case RSPAMD_TASK_NEWLINES_CRLF: - default: + if (task->message) { + switch (MESSAGE_FIELD (task, nlines_type)) { + case RSPAMD_TASK_NEWLINES_CR: + lua_pushstring (L, "cr"); + break; + case RSPAMD_TASK_NEWLINES_LF: + lua_pushstring (L, "lf"); + break; + case RSPAMD_TASK_NEWLINES_CRLF: + default: + lua_pushstring (L, "crlf"); + break; + } + } + else { lua_pushstring (L, "crlf"); - break; } } else { @@ -5604,11 +5641,10 @@ lua_task_headers_foreach (lua_State *L) struct rspamd_task *task = lua_check_task (L, 1); enum rspamd_lua_task_header_type how = RSPAMD_TASK_HEADER_PUSH_SIMPLE; struct rspamd_lua_regexp *re = NULL; - GList *cur; - struct rspamd_mime_header *hdr; + struct rspamd_mime_header *hdr, *cur; gint old_top; - if (task && lua_isfunction (L, 2)) { + if (task && task->message && lua_isfunction (L, 2)) { if (lua_istable (L, 3)) { lua_pushstring (L, "full"); lua_gettable (L, 3); @@ -5639,24 +5675,21 @@ lua_task_headers_foreach (lua_State *L) lua_pop (L, 1); } - if (task->headers_order) { - cur = task->headers_order->head; - - while (cur) { - hdr = cur->data; + if (MESSAGE_FIELD (task, headers_order)) { + hdr = MESSAGE_FIELD (task, headers_order); + LL_FOREACH (hdr, cur) { if (re && re->re) { - if (!rspamd_regexp_match (re->re, hdr->name, - strlen (hdr->name),FALSE)) { - cur = g_list_next (cur); + if (!rspamd_regexp_match (re->re, cur->name, + strlen (cur->name),FALSE)) { continue; } } old_top = lua_gettop (L); lua_pushvalue (L, 2); - lua_pushstring (L, hdr->name); - rspamd_lua_push_header (L, hdr, how); + lua_pushstring (L, cur->name); + rspamd_lua_push_header (L, cur, how); if (lua_pcall (L, 2, LUA_MULTRET, 0) != 0) { msg_err ("call to header_foreach failed: %s", @@ -5676,7 +5709,6 @@ lua_task_headers_foreach (lua_State *L) } lua_settop (L, old_top); - cur = g_list_next (cur); } } } @@ -5801,7 +5833,8 @@ lua_task_lookup_words (lua_State *L) guint i, matches = 0; - if (task == NULL || map == NULL || lua_type (L, 3) != LUA_TFUNCTION) { + if (task == NULL || map == NULL || task->message == NULL + || lua_type (L, 3) != LUA_TFUNCTION) { return luaL_error (L, "invalid arguments"); } @@ -5812,7 +5845,7 @@ lua_task_lookup_words (lua_State *L) return luaL_error (L, "invalid map type"); } - PTR_ARRAY_FOREACH (task->text_parts, i, tp) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { if (tp->utf_words) { matches += lua_lookup_words_array (L, 3, task, map, tp->utf_words); } diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c index e6a6052d4..bc90fef27 100644 --- a/src/lua/lua_trie.c +++ b/src/lua/lua_trie.c @@ -259,9 +259,7 @@ lua_trie_search_mime (lua_State *L) gboolean found = FALSE; if (trie && task) { - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (!IS_PART_EMPTY (part) && part->utf_content != NULL) { text = part->utf_content->data; len = part->utf_content->len; @@ -327,9 +325,9 @@ lua_trie_search_rawbody (lua_State *L) gboolean found = FALSE; if (trie && task) { - if (task->raw_headers_content.len > 0) { - text = task->msg.begin + task->raw_headers_content.len; - len = task->msg.len - task->raw_headers_content.len; + if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { + text = task->msg.begin + MESSAGE_FIELD (task, raw_headers_content).len; + len = task->msg.len - MESSAGE_FIELD (task, raw_headers_content).len; } else { /* Treat as raw message */ diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index b6e42457a..815afd95a 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -616,8 +616,7 @@ chartable_symbol_callback (struct rspamd_task *task, struct rspamd_mime_text_part *part; struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg); - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { rspamd_chartable_process_part (task, part, chartable_module_ctx); } diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c index 7d3475867..b55ce7bf5 100644 --- a/src/plugins/dkim_check.c +++ b/src/plugins/dkim_check.c @@ -1107,13 +1107,12 @@ dkim_symbol_callback (struct rspamd_task *task, struct rspamd_symcache_item *item, void *unused) { - GPtrArray *hlist; rspamd_dkim_context_t *ctx; rspamd_dkim_key_t *key; GError *err = NULL; - struct rspamd_mime_header *rh; + struct rspamd_mime_header *rh, *rh_cur; struct dkim_check_result *res = NULL, *cur; - guint checked = 0, i, *dmarc_checks; + guint checked = 0, *dmarc_checks; struct dkim_ctx *dkim_module_ctx = dkim_get_context (task->cfg); /* Allow dmarc */ @@ -1153,16 +1152,13 @@ dkim_symbol_callback (struct rspamd_task *task, rspamd_symcache_item_async_inc (task, item, M); /* Now check if a message has its signature */ - hlist = rspamd_message_get_header_array (task, - RSPAMD_DKIM_SIGNHEADER, - FALSE); - if (hlist != NULL && hlist->len > 0) { + rh = rspamd_message_get_header_array (task, RSPAMD_DKIM_SIGNHEADER); + if (rh) { msg_debug_task ("dkim signature found"); - PTR_ARRAY_FOREACH (hlist, i, rh) { - if (rh->decoded == NULL || rh->decoded[0] == '\0') { - msg_info_task ("<%s> cannot load empty DKIM context", - task->message_id); + DL_FOREACH (rh, rh_cur) { + if (rh_cur->decoded == NULL || rh_cur->decoded[0] == '\0') { + msg_info_task ("cannot load empty DKIM signature"); continue; } @@ -1174,7 +1170,7 @@ dkim_symbol_callback (struct rspamd_task *task, cur->mult_deny = 1.0; cur->item = item; - ctx = rspamd_create_dkim_context (rh->decoded, + ctx = rspamd_create_dkim_context (rh_cur->decoded, task->task_pool, dkim_module_ctx->time_jitter, RSPAMD_DKIM_NORMAL, @@ -1191,15 +1187,14 @@ dkim_symbol_callback (struct rspamd_task *task, if (ctx == NULL) { if (err != NULL) { - msg_info_task ("<%s> cannot parse DKIM context: %e", - task->message_id, err); + msg_info_task ("cannot parse DKIM signature: %e", + err); g_error_free (err); err = NULL; } else { - msg_info_task ("<%s> cannot parse DKIM context: " - "unknown error", - task->message_id); + msg_info_task ("cannot parse DKIM signature: " + "unknown error"); } continue; diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 8d1c63010..91d341507 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -1475,10 +1475,10 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task, rspamd_cryptobox_hash_update (&st, part->utf_stripped_content->data, part->utf_stripped_content->len); - if (task->subject) { + if (MESSAGE_FIELD (task, subject)) { /* We also include subject */ - rspamd_cryptobox_hash_update (&st, task->subject, - strlen (task->subject)); + rspamd_cryptobox_hash_update (&st, MESSAGE_FIELD (task, subject), + strlen (MESSAGE_FIELD (task, subject))); } rspamd_cryptobox_hash_final (&st, cmd->digest); @@ -2127,7 +2127,7 @@ fuzzy_insert_metric_results (struct rspamd_task *task, GPtrArray *results) struct rspamd_mime_text_part *tp; /* About 5 words */ - static const text_length_cutoff = 25; + static const unsigned int text_length_cutoff = 25; PTR_ARRAY_FOREACH (results, i, res) { if (res->type == FUZZY_RESULT_TXT) { @@ -2139,7 +2139,7 @@ fuzzy_insert_metric_results (struct rspamd_task *task, GPtrArray *results) } } - PTR_ARRAY_FOREACH (task->text_parts, i, tp) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { if (!IS_PART_EMPTY (tp)) { seen_text = TRUE; } @@ -2434,8 +2434,8 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) return; } - msg_info_task ("cannot process fuzzy hash for message <%s>: %s", - session->task->message_id, strerror (errno)); + msg_info_task ("cannot process fuzzy hash for message: %s", + strerror (errno)); if (*(session->err) == NULL) { g_set_error (session->err, g_quark_from_static_string (M), @@ -2485,7 +2485,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) (gint)sizeof (rep->digest), rep->digest, symbol, rep->v1.flag, - session->task->message_id); + MESSAGE_FIELD (session->task, message_id)); } else { if (rep->v1.value == 401) { @@ -2495,7 +2495,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) "list %s:%d, skipped by server", ftype, op, - session->task->message_id, + MESSAGE_FIELD (session->task, message_id), (gint)sizeof (rep->digest), rep->digest, symbol, rep->v1.flag); @@ -2513,7 +2513,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) "list %s:%d, error: %d", ftype, op, - session->task->message_id, + MESSAGE_FIELD (session->task, message_id), (gint)sizeof (rep->digest), rep->digest, symbol, rep->v1.flag, @@ -2671,7 +2671,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, GPtrArray *res; gboolean check_part, fuzzy_check; - res = g_ptr_array_sized_new (task->parts->len + 1); + res = g_ptr_array_sized_new (MESSAGE_FIELD (task, parts)->len + 1); if (c == FUZZY_STAT) { io = fuzzy_cmd_stat (rule, c, flag, value, task->task_pool); @@ -2682,7 +2682,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, goto end; } - PTR_ARRAY_FOREACH (task->parts, i, mime_part) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, mime_part) { check_part = FALSE; fuzzy_check = FALSE; @@ -2829,8 +2829,8 @@ fuzzy_symbol_callback (struct rspamd_task *task, if (rspamd_match_radix_map_addr (fuzzy_module_ctx->whitelist, task->from_addr) != NULL) { msg_info_task ("<%s>, address %s is whitelisted, skip fuzzy check", - task->message_id, - rspamd_inet_address_to_string (task->from_addr)); + MESSAGE_FIELD (task, message_id), + rspamd_inet_address_to_string (task->from_addr)); rspamd_symcache_finalize_item (task, item); return; @@ -2963,7 +2963,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, if (r == -1) { msg_warn_task ("<%s>: cannot process message for fuzzy", - task->message_id); + MESSAGE_FIELD (task, message_id)); rspamd_task_free (task); rspamd_controller_send_error (conn_ent, 400, "Message processing error"); @@ -3102,7 +3102,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, } if (res == -1) { - msg_warn_task ("<%s>: cannot send fuzzy request: %s", task->message_id, + msg_warn_task ("cannot send fuzzy request: %s", strerror (errno)); rspamd_controller_send_error (conn_ent, 400, "Message sending error"); rspamd_task_free (task); @@ -3111,8 +3111,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, } else if (!processed) { if (rules) { - msg_warn_task ("<%s>: no content to generate fuzzy", - task->message_id); + msg_warn_task ("no content to generate fuzzy"); rspamd_controller_send_error (conn_ent, 404, "No content to generate fuzzy for flag %d", flag); } @@ -3122,9 +3121,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, "Message is conditionally skipped for flag %d", flag); } else { - msg_warn_task ("<%s>: no fuzzy rules found for flag %d", - task->message_id, - flag); + msg_warn_task ("no fuzzy rules found for flag %d", flag); rspamd_controller_send_error (conn_ent, 404, "No fuzzy rules matched for flag %d", flag); } @@ -3342,20 +3339,17 @@ fuzzy_check_lua_process_learn (struct rspamd_task *task, } if (res == -1) { - msg_warn_task ("<%s>: cannot send fuzzy request: %s", task->message_id, + msg_warn_task ("cannot send fuzzy request: %s", strerror (errno)); } else if (!processed) { if (rules) { - msg_warn_task ("<%s>: no content to generate fuzzy", - task->message_id); + msg_warn_task ("no content to generate fuzzy"); return FALSE; } else { - msg_warn_task ("<%s>: no fuzzy rules found for flag %d", - task->message_id, - flag); + msg_warn_task ("no fuzzy rules found for flag %d", flag); return FALSE; } } diff --git a/src/plugins/lua/greylist.lua b/src/plugins/lua/greylist.lua index f39f197f5..796623b7f 100644 --- a/src/plugins/lua/greylist.lua +++ b/src/plugins/lua/greylist.lua @@ -63,6 +63,7 @@ local settings = { local rspamd_logger = require "rspamd_logger" local rspamd_util = require "rspamd_util" +local lua_redis = require "lua_redis" local fun = require "fun" local hash = require "rspamd_cryptobox_hash" local rspamd_lua_utils = require "lua_util" @@ -258,13 +259,13 @@ local function greylist_check(task) end end - local ret = rspamd_redis_make_request(task, - redis_params, -- connect params - hash_key, -- hash key - false, -- is write - redis_get_cb, --callback - 'MGET', -- command - {body_key, meta_key} -- arguments + local ret = lua_redis.redis_make_request(task, + redis_params, -- connect params + hash_key, -- hash key + false, -- is write + redis_get_cb, --callback + 'MGET', -- command + {body_key, meta_key} -- arguments ) if not ret then rspamd_logger.errx(task, 'cannot make redis request to check results') @@ -373,7 +374,7 @@ local function greylist_set(task) if not settings.check_local and is_rspamc then return end - ret,conn,upstream = rspamd_redis_make_request(task, + ret,conn,upstream = lua_redis.make_request(task, redis_params, -- connect params hash_key, -- hash key true, -- is write @@ -396,7 +397,7 @@ local function greylist_set(task) rspamd_logger.infox(task, 'greylisted until "%s", new record', end_time) greylist_message(task, end_time, 'new record') -- Create new record - ret,conn,upstream = rspamd_redis_make_request(task, + ret,conn,upstream = lua_redis.redis_make_request(task, redis_params, -- connect params hash_key, -- hash key true, -- is write @@ -458,7 +459,7 @@ if opts then whitelist_domains_map = lua_map.rspamd_map_add(N, 'whitelist_domains_url', 'map', 'Greylist whitelist domains map') - redis_params = rspamd_parse_redis_server(N) + redis_params = lua_redis.parse_redis_server(N) if not redis_params then rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') rspamd_lua_utils.disable_module(N, "redis") @@ -468,14 +469,12 @@ if opts then type = 'postfilter', callback = greylist_set, priority = 6, - flags = 'empty', }) rspamd_config:register_symbol({ name = 'GREYLIST_CHECK', type = 'prefilter', callback = greylist_check, priority = 6, - flags = 'empty', }) end end diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua index eb2e3ed82..70339fd85 100644 --- a/src/plugins/lua/rbl.lua +++ b/src/plugins/lua/rbl.lua @@ -547,7 +547,7 @@ local function add_rbl(key, rbl) flags_tbl[#flags_tbl + 1] = 'nice' end - if not (rbl.dkim or rbl.emails) then + if not (rbl.dkim or rbl.emails or rbl.received) then flags_tbl[#flags_tbl + 1] = 'empty' end diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 89adefbe7..9c54bb696 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -665,8 +665,7 @@ spf_symbol_callback (struct rspamd_task *task, else { if (!rspamd_spf_resolve (task, spf_plugin_callback, item)) { - msg_info_task ("cannot make spf request for [%s]", - task->message_id); + msg_info_task ("cannot make spf request for %s", domain); rspamd_task_insert_result (task, spf_module_ctx->symbol_dnsfail, 1, diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index baf985c39..860f4b5e5 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -1579,8 +1579,7 @@ process_dns_results (struct rspamd_task *task, bit = g_hash_table_lookup (suffix->ips, &addr); if (bit != NULL) { - msg_info_surbl ("<%s> domain [%s] is in surbl %s(%xd)", - task->message_id, + msg_info_surbl ("domain [%s] is in surbl %s(%xd)", resolved_name, suffix->suffix, bit->bit); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); @@ -1602,8 +1601,7 @@ process_dns_results (struct rspamd_task *task, if (((gint)bit->bit & (gint)ntohl (addr)) != 0) { got_result = TRUE; - msg_info_surbl ("<%s> domain [%s] is in surbl %s(%xd)", - task->message_id, + msg_info_surbl ("domain [%s] is in surbl %s(%xd)", resolved_name, suffix->suffix, bit->bit); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); @@ -1617,8 +1615,7 @@ process_dns_results (struct rspamd_task *task, if (!got_result) { if ((suffix->bits == NULL || suffix->bits->len == 0) && suffix->ips == NULL) { - msg_info_surbl ("<%s> domain [%s] is in surbl %s", - task->message_id, + msg_info_surbl ("domain [%s] is in surbl %s", resolved_name, suffix->suffix); rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name); @@ -1628,8 +1625,7 @@ process_dns_results (struct rspamd_task *task, } else { ina.s_addr = addr; - msg_info_surbl ("<%s> domain [%s] is in surbl %s but at unknown result: %s", - task->message_id, + msg_info_surbl ("domain [%s] is in surbl %s but at unknown result: %s", resolved_name, suffix->suffix, inet_ntoa (ina)); } @@ -1645,8 +1641,7 @@ surbl_dns_callback (struct rdns_reply *reply, gpointer arg) task = param->task; if (reply->code == RDNS_RC_NOERROR && reply->entries) { - msg_debug_surbl ("<%s> domain [%s] is in surbl %s", - param->task->message_id, + msg_debug_surbl ("domain [%s] is in surbl %s", param->host_orig, param->suffix->suffix); DL_FOREACH (reply->entries, elt) { @@ -1659,8 +1654,8 @@ surbl_dns_callback (struct rdns_reply *reply, gpointer arg) } else { if (reply->code == RDNS_RC_NXDOMAIN || reply->code == RDNS_RC_NOREC) { - msg_debug_surbl ("<%s> domain [%s] is not in surbl %s", - param->task->message_id, param->host_orig, + msg_debug_surbl ("domain [%s] is not in surbl %s", + param->host_orig, param->suffix->suffix); } else { @@ -1705,8 +1700,7 @@ surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg) ip_addr >> 8 & 0xff, ip_addr & 0xff, param->suffix->suffix); msg_debug_surbl ( - "<%s> domain [%s] send %v request to surbl", - param->task->message_id, + "domain [%s] send %v request to surbl", param->host_orig, to_resolve); @@ -1721,8 +1715,8 @@ surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg) } } else { - msg_debug_surbl ("<%s> domain [%s] cannot be resolved for SURBL check %s", - param->task->message_id, param->host_resolve, + msg_debug_surbl ("domain [%s] cannot be resolved for SURBL check %s", + param->host_resolve, param->suffix->suffix); } @@ -1778,8 +1772,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, hdr = rspamd_http_message_find_header (msg, "Uri"); if (hdr != NULL) { - msg_info_surbl ("<%s> got reply from redirector: '%*s' -> '%T'", - param->task->message_id, + msg_info_surbl ("got reply from redirector: '%*s' -> '%T'", param->url->urllen, param->url->string, hdr); urllen = hdr->len; @@ -1792,8 +1785,9 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, task->task_pool, RSPAMD_URL_PARSE_TEXT); if (r == URI_ERRNO_OK) { - if ((existing = g_hash_table_lookup (task->urls, redirected_url)) == NULL) { - g_hash_table_insert (task->urls, redirected_url, + if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls), + redirected_url)) == NULL) { + g_hash_table_insert (MESSAGE_FIELD (task, urls), redirected_url, redirected_url); redirected_url->phished_url = param->url; redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; @@ -1813,8 +1807,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, } } else { - msg_info_surbl ("<%s> could not resolve '%*s' on redirector", - param->task->message_id, + msg_info_surbl ("could not resolve '%*s' on redirector", param->url->urllen, param->url->string); } @@ -1852,8 +1845,7 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, } if (param->conn == NULL) { - msg_info_surbl ("<%s> cannot create tcp socket failed: %s", - task->message_id, + msg_info_surbl ("cannot create tcp socket failed: %s", strerror (errno)); return; @@ -1880,8 +1872,7 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, NULL, param, surbl_module_ctx->read_timeout); msg_info_surbl ( - "<%s> registered redirector call for %*s to %s, according to rule: %s", - task->message_id, + "registered redirector call for %*s to %s, according to rule: %s", url->urllen, url->string, rspamd_upstream_name (param->redirector), rule); @@ -1909,8 +1900,7 @@ surbl_test_tags (struct rspamd_task *task, struct redirector_param *param, /* We know results for this URL */ DL_FOREACH (tag, cur) { - msg_info_surbl ("<%s> domain [%s] is in surbl %s (tags)", - task->message_id, + msg_info_surbl ("domain [%s] is in surbl %s (tags)", ftld, cur->data); rspamd_task_insert_result (task, cur->data, 1, ftld); } @@ -2084,19 +2074,19 @@ surbl_test_url (struct rspamd_task *task, rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, param->tree); - g_hash_table_foreach (task->urls, surbl_tree_url_callback, param); + g_hash_table_foreach (MESSAGE_FIELD (task, urls), + surbl_tree_url_callback, param); rspamd_symcache_item_async_inc (task, item, M); if (suffix->options & SURBL_OPTION_CHECKEMAILS) { - g_hash_table_foreach (task->emails, surbl_tree_url_callback, param); + g_hash_table_foreach (MESSAGE_FIELD (task, emails), + surbl_tree_url_callback, param); } /* We also need to check and process img URLs */ if (suffix->options & SURBL_OPTION_CHECKIMAGES) { - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); - + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (part->html && part->html->images) { for (j = 0; j < part->html->images->len; j ++) { img = g_ptr_array_index (part->html->images, j); @@ -2166,11 +2156,11 @@ surbl_test_redirector (struct rspamd_task *task, param->redirector_requests = 0; param->ctx = surbl_module_ctx; param->item = item; - g_hash_table_foreach (task->urls, surbl_tree_redirector_callback, param); + g_hash_table_foreach (MESSAGE_FIELD (task, urls), + surbl_tree_redirector_callback, param); /* We also need to check and process img URLs */ - for (i = 0; i < task->text_parts->len; i ++) { - part = g_ptr_array_index (task->text_parts, i); + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { if (part->html && part->html->images) { for (j = 0; j < part->html->images->len; j ++) { img = g_ptr_array_index (part->html->images, j); @@ -2314,8 +2304,7 @@ surbl_continue_process_handler (lua_State *L) surbl_module_ctx = surbl_get_context (task->cfg); if (nurl != NULL) { - msg_info_surbl ("<%s> got reply from redirector: '%*s' -> '%*s'", - param->task->message_id, + msg_info_surbl ("got reply from redirector: '%*s' -> '%*s'", param->url->urllen, param->url->string, (gint)urllen, nurl); urlstr = rspamd_mempool_alloc (task->task_pool, @@ -2327,8 +2316,10 @@ surbl_continue_process_handler (lua_State *L) task->task_pool, RSPAMD_URL_PARSE_TEXT); if (r == URI_ERRNO_OK) { - if (!g_hash_table_lookup (task->urls, redirected_url)) { - g_hash_table_insert (task->urls, redirected_url, + if (!g_hash_table_lookup (MESSAGE_FIELD (task, urls), + redirected_url)) { + g_hash_table_insert (MESSAGE_FIELD (task, urls), + redirected_url, redirected_url); redirected_url->phished_url = param->url; redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; @@ -2340,14 +2331,12 @@ surbl_continue_process_handler (lua_State *L) } } else { - msg_info_surbl ("<%s> could not resolve '%*s' on redirector", - param->task->message_id, + msg_info_surbl ("could not resolve '%*s' on redirector", param->url->urllen, param->url->string); } } else { - msg_info_surbl ("<%s> could not resolve '%*s' on redirector", - param->task->message_id, + msg_info_surbl ("could not resolve '%*s' on redirector", param->url->urllen, param->url->string); } } diff --git a/test/lua/unit/selectors.lua b/test/lua/unit/selectors.lua index be77454e5..a9506fa31 100644 --- a/test/lua/unit/selectors.lua +++ b/test/lua/unit/selectors.lua @@ -62,7 +62,7 @@ context("Selectors test", function() ["digest"] = { selector = "digest", - expect = {"c459a21bd1f33fb4ba035481f46ef0c7"} + expect = {"5b756ff185494c36f26c17a70b042f21"} }, ["user"] = { |