From 4aee16e53d385803b2bae74255909aa0167fd823 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 26 Oct 2020 14:49:37 +0000 Subject: [PATCH] [Minor] Fix rspamd_has_only_html_part function + refactor macro names --- src/libmime/images.c | 2 +- src/libmime/message.c | 38 ++++++++++++++++++++-------------- src/libmime/message.h | 9 ++++---- src/libmime/mime_expressions.c | 23 +++++++++++--------- src/libserver/re_cache.c | 8 +++---- src/libstat/stat_process.c | 6 +++--- src/lua/lua_mimepart.c | 28 ++++++++++++------------- src/lua/lua_trie.c | 2 +- src/plugins/fuzzy_check.c | 2 +- 9 files changed, 64 insertions(+), 54 deletions(-) diff --git a/src/libmime/images.c b/src/libmime/images.c index c54f5845a..b3baa8e4c 100644 --- a/src/libmime/images.c +++ b/src/libmime/images.c @@ -684,7 +684,7 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa } PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { - if (IS_PART_HTML (tp) && tp->html != NULL && + if (IS_TEXT_PART_HTML (tp) && tp->html != NULL && tp->html->images != NULL) { for (j = 0; j < tp->html->images->len; j ++) { himg = g_ptr_array_index (tp->html->images, j); diff --git a/src/libmime/message.c b/src/libmime/message.c index 2702d0f51..411b872c9 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -147,7 +147,7 @@ rspamd_mime_part_create_words (struct rspamd_task *task, { enum rspamd_tokenize_type tok_type; - if (IS_PART_UTF (part)) { + if (IS_TEXT_PART_UTF (part)) { #if U_ICU_VERSION_MAJOR_NUM < 50 /* Hack to prevent hang with Thai in old libicu */ @@ -209,8 +209,8 @@ rspamd_mime_part_detect_language (struct rspamd_task *task, { struct rspamd_lang_detector_res *lang; - if (!IS_PART_EMPTY (part) && part->utf_words && part->utf_words->len > 0 && - task->lang_det) { + if (!IS_TEXT_PART_EMPTY (part) && part->utf_words && part->utf_words->len > 0 && + task->lang_det) { if (rspamd_language_detector_detect (task, task->lang_det, part)) { lang = g_ptr_array_index (part->languages, 0); part->language = lang->lang; @@ -240,7 +240,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, } state = normal_char; while (p < pe) { - if (IS_PART_UTF (part)) { + if (IS_TEXT_PART_UTF (part)) { gint32 off = p - begin; U8_NEXT (begin, off, pe - begin, uc); @@ -324,7 +324,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, c = p + 1; - if (IS_PART_HTML (part) || !url_open_bracket) { + if (IS_TEXT_PART_HTML (part) || !url_open_bracket) { g_byte_array_append (part->utf_stripped_content, (const guint8 *)" ", 1); g_ptr_array_add (part->newlines, @@ -339,7 +339,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, case seen_cr: /* \r\n */ if (!crlf_added) { - if (IS_PART_HTML (part) || !url_open_bracket) { + if (IS_TEXT_PART_HTML (part) || !url_open_bracket) { g_byte_array_append (part->utf_stripped_content, (const guint8 *) " ", 1); crlf_added = TRUE; @@ -509,7 +509,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, part->newlines = g_ptr_array_sized_new (128); - if (IS_PART_EMPTY (part)) { + if (IS_TEXT_PART_EMPTY (part)) { part->utf_stripped_content = g_byte_array_new (); } else { @@ -532,7 +532,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, } } - if (IS_PART_UTF (part)) { + if (IS_TEXT_PART_UTF (part)) { utext_openUTF8 (&part->utf_stripped_text, part->utf_stripped_content->data, part->utf_stripped_content->len, @@ -780,6 +780,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, struct rspamd_mime_text_part *text_part; rspamd_ftok_t html_tok, xhtml_tok; gboolean found_html = FALSE, found_txt = FALSE; + guint flags = 0; enum rspamd_action_type act; if ((mime_part->ct && (mime_part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) || @@ -802,10 +803,14 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, /* Skip attachments */ if ((found_txt || found_html) && - (mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT) && - (!task->cfg->check_text_attachements)) { - debug_task ("skip attachments for checking as text parts"); - return FALSE; + (mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT)) { + if (!task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return FALSE; + } + else { + flags |= RSPAMD_MIME_TEXT_PART_ATTACHMENT; + } } else if (!(found_txt || found_html)) { /* Not a text part */ @@ -820,6 +825,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, text_part->parsed.begin = mime_part->parsed_data.begin; text_part->parsed.len = mime_part->parsed_data.len; text_part->utf_stripped_text = (UText)UTEXT_INITIALIZER; + text_part->flags |= flags; if (found_html) { if (!rspamd_message_process_html_text_part (task, text_part)) { @@ -859,7 +865,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, /* Post process part */ rspamd_normalize_text_part (task, text_part); - if (!IS_PART_HTML (text_part)) { + if (!IS_TEXT_PART_HTML (text_part)) { if (mime_part->parent_part) { struct rspamd_mime_part *parent = mime_part->parent_part; @@ -1509,7 +1515,7 @@ rspamd_message_process (struct rspamd_task *task) srch.len = 11; if (rspamd_ftok_cmp (&p1->mime_part->parent_part->ct->subtype, &srch) == 0) { - if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) && + if (!IS_TEXT_PART_EMPTY (p1) && !IS_TEXT_PART_EMPTY (p2) && p1->normalized_hashes && p2->normalized_hashes) { /* * We also detect language on one part and propagate it to @@ -1518,10 +1524,10 @@ rspamd_message_process (struct rspamd_task *task) struct rspamd_mime_text_part *sel; /* Prefer HTML as text part is not displayed normally */ - if (IS_PART_HTML (p1)) { + if (IS_TEXT_PART_HTML (p1)) { sel = p1; } - else if (IS_PART_HTML (p2)) { + else if (IS_TEXT_PART_HTML (p2)) { sel = p2; } else { diff --git a/src/libmime/message.h b/src/libmime/message.h index d6f1b76c0..13e40e2ef 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -120,11 +120,12 @@ struct rspamd_mime_part { #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5) #define RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL (1 << 6) #define RSPAMD_MIME_TEXT_PART_NORMALISED (1 << 7) +#define RSPAMD_MIME_TEXT_PART_ATTACHMENT (1 << 8) -#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY) -#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF) -#define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)) -#define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML) +#define IS_TEXT_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY) +#define IS_TEXT_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF) +#define IS_TEXT_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML) +#define IS_TEXT_PART_ATTACHMENT(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_ATTACHMENT) struct rspamd_mime_text_part { diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index d35bc136f..a06baffaf 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -1455,20 +1455,23 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, void *unused) { struct rspamd_mime_text_part *p; + guint i, cnt_html = 0, cnt_txt = 0; gboolean res = FALSE; - if (MESSAGE_FIELD (task, text_parts)->len == 1) { + PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0); - if (IS_PART_HTML (p)) { - res = TRUE; - } - else { - res = FALSE; + if (!IS_TEXT_PART_ATTACHMENT (p)) { + if (IS_TEXT_PART_HTML (p)) { + cnt_html++; + } + else { + cnt_txt++; + } } } - return res; + return (cnt_html > 0 && cnt_txt == 0); } static gboolean @@ -1565,7 +1568,7 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) gboolean res = TRUE; PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { - if (IS_PART_HTML (p)) { + if (IS_TEXT_PART_HTML (p)) { if (p->flags & RSPAMD_MIME_TEXT_PART_FLAG_BALANCED) { res = TRUE; } @@ -1600,7 +1603,7 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) } PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { - if (IS_PART_HTML (p) && p->html) { + if (IS_TEXT_PART_HTML (p) && p->html) { res = rspamd_html_tag_seen (p->html, arg->data); } @@ -1621,7 +1624,7 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) gboolean res = FALSE; PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { - if (IS_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) { + if (IS_TEXT_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) { res = TRUE; } diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 73082bb2d..078563103 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -1198,13 +1198,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, } else { /* Skip empty parts */ - if (IS_PART_EMPTY (text_part)) { + if (IS_TEXT_PART_EMPTY (text_part)) { len = 0; in = ""; } else { /* Check raw flags */ - if (!IS_PART_UTF (text_part)) { + if (!IS_TEXT_PART_UTF (text_part)) { raw = TRUE; } @@ -1345,7 +1345,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, scvec[i + 1] = (guchar *)text_part->utf_stripped_content->data; lenvec[i + 1] = text_part->utf_stripped_content->len; - if (!IS_PART_UTF (text_part)) { + if (!IS_TEXT_PART_UTF (text_part)) { raw = TRUE; } } @@ -1382,7 +1382,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, scvec[i] = (guchar *)text_part->parsed.begin; lenvec[i] = text_part->parsed.len; - if (!IS_PART_UTF (text_part)) { + if (!IS_TEXT_PART_UTF (text_part)) { raw = TRUE; } } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 9cd425206..0b22cbd11 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -132,7 +132,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, g_assert (st_ctx != NULL); PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { - if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { + if (!IS_TEXT_PART_EMPTY (part) && part->utf_words != NULL) { reserved_len += part->utf_words->len; } /* XXX: normal window size */ @@ -146,9 +146,9 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { - if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { + if (!IS_TEXT_PART_EMPTY (part) && part->utf_words != NULL) { st_ctx->tokenizer->tokenize_func (st_ctx, task, - part->utf_words, IS_PART_UTF (part), + part->utf_words, IS_TEXT_PART_UTF (part), NULL, task->tokens); } diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 9748cfde3..9cc1374be 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -617,12 +617,12 @@ lua_textpart_is_utf (lua_State * L) LUA_TRACE_POINT; struct rspamd_mime_text_part *part = lua_check_textpart (L); - if (part == NULL || IS_PART_EMPTY (part)) { + if (part == NULL || IS_TEXT_PART_EMPTY (part)) { lua_pushboolean (L, FALSE); return 1; } - lua_pushboolean (L, IS_PART_UTF (part)); + lua_pushboolean (L, IS_TEXT_PART_UTF (part)); return 1; } @@ -690,7 +690,7 @@ lua_textpart_get_content (lua_State * L) } if (!type) { - if (IS_PART_EMPTY (part)) { + if (IS_TEXT_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -698,7 +698,7 @@ lua_textpart_get_content (lua_State * L) len = part->utf_content->len; } else if (strcmp (type, "content") == 0) { - if (IS_PART_EMPTY (part)) { + if (IS_TEXT_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -707,7 +707,7 @@ lua_textpart_get_content (lua_State * L) len = part->utf_content->len; } else if (strcmp (type, "content_oneline") == 0) { - if (IS_PART_EMPTY (part)) { + if (IS_TEXT_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -763,7 +763,7 @@ lua_textpart_get_raw_content (lua_State * L) struct rspamd_mime_text_part *part = lua_check_textpart (L); struct rspamd_lua_text *t; - if (part == NULL || IS_PART_EMPTY (part)) { + if (part == NULL || IS_TEXT_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -784,7 +784,7 @@ lua_textpart_get_content_oneline (lua_State * L) struct rspamd_mime_text_part *part = lua_check_textpart (L); struct rspamd_lua_text *t; - if (part == NULL || IS_PART_EMPTY (part)) { + if (part == NULL || IS_TEXT_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -809,7 +809,7 @@ lua_textpart_get_length (lua_State * L) return 1; } - if (IS_PART_EMPTY (part) || part->utf_content == NULL) { + if (IS_TEXT_PART_EMPTY (part) || part->utf_content == NULL) { lua_pushinteger (L, 0); } else { @@ -873,7 +873,7 @@ lua_textpart_get_lines_count (lua_State * L) return 1; } - if (IS_PART_EMPTY (part)) { + if (IS_TEXT_PART_EMPTY (part)) { lua_pushinteger (L, 0); } else { @@ -894,7 +894,7 @@ lua_textpart_get_words_count (lua_State *L) return 1; } - if (IS_PART_EMPTY (part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) { lua_pushinteger (L, 0); } else { @@ -936,7 +936,7 @@ lua_textpart_get_words (lua_State *L) return luaL_error (L, "invalid arguments"); } - if (IS_PART_EMPTY (part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) { lua_createtable (L, 0, 0); } else { @@ -969,7 +969,7 @@ lua_textpart_filter_words (lua_State *L) return luaL_error (L, "invalid arguments"); } - if (IS_PART_EMPTY (part) || part->utf_words == NULL) { + if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) { lua_createtable (L, 0, 0); } else { @@ -1055,7 +1055,7 @@ lua_textpart_is_empty (lua_State * L) return 1; } - lua_pushboolean (L, IS_PART_EMPTY (part)); + lua_pushboolean (L, IS_TEXT_PART_EMPTY (part)); return 1; } @@ -1071,7 +1071,7 @@ lua_textpart_is_html (lua_State * L) return 1; } - lua_pushboolean (L, IS_PART_HTML (part)); + lua_pushboolean (L, IS_TEXT_PART_HTML (part)); return 1; } diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c index 3941a5a85..7c63fc687 100644 --- a/src/lua/lua_trie.c +++ b/src/lua/lua_trie.c @@ -342,7 +342,7 @@ lua_trie_search_mime (lua_State *L) if (trie && task) { PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { - if (!IS_PART_EMPTY (part) && part->utf_content != NULL) { + if (!IS_TEXT_PART_EMPTY (part) && part->utf_content != NULL) { text = part->utf_content->data; len = part->utf_content->len; diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 640afcc32..633ce50ae 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -2399,7 +2399,7 @@ fuzzy_insert_metric_results (struct rspamd_task *task, struct fuzzy_rule *rule, if (task->message) { PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { - if (!IS_PART_EMPTY (tp) && tp->utf_words != NULL && tp->utf_words->len > 0) { + if (!IS_TEXT_PART_EMPTY (tp) && tp->utf_words != NULL && tp->utf_words->len > 0) { seen_text_part = TRUE; if (tp->utf_stripped_text.magic == UTEXT_MAGIC) { -- 2.39.5