diff options
-rw-r--r-- | src/libmime/message.c | 45 | ||||
-rw-r--r-- | src/libmime/message.h | 16 | ||||
-rw-r--r-- | src/libmime/mime_expressions.c | 20 | ||||
-rw-r--r-- | src/libserver/html.c | 2 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 6 | ||||
-rw-r--r-- | src/libutil/fuzzy.c | 4 | ||||
-rw-r--r-- | src/lua/lua_mimepart.c | 21 | ||||
-rw-r--r-- | src/lua/lua_trie.c | 2 | ||||
-rw-r--r-- | src/plugins/chartable.c | 4 | ||||
-rw-r--r-- | src/plugins/fuzzy_check.c | 4 |
10 files changed, 67 insertions, 57 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 95a9bea95..d29aeb2cd 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -38,6 +38,9 @@ #define RECURSION_LIMIT 30 #define UTF8_CHARSET "UTF-8" +#define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_PART_FLAG_UTF) +#define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_PART_FLAG_UTF) + GByteArray * strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, @@ -280,7 +283,10 @@ reg_char: /* Check tag balancing */ if (level_ptr && level_ptr->data != NULL) { - part->is_balanced = FALSE; + part->flags &= ~RSPAMD_MIME_PART_FLAG_BALANCED; + } + else { + part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED; } if (stateptr) { @@ -957,6 +963,7 @@ rspamd_text_to_utf8 (struct rspamd_task *task, return res; } + static GByteArray * convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, @@ -970,35 +977,34 @@ convert_text_to_utf (struct rspamd_task *task, GByteArray *result_array; if (task->cfg->raw_mode) { - text_part->is_raw = TRUE; + SET_PART_RAW (text_part); return part_content; } if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { - text_part->is_raw = TRUE; + SET_PART_RAW (text_part); return part_content; } if (!charset_validate (task->task_pool, charset, &ocharset)) { msg_info ( "<%s>: has invalid charset", task->message_id); - text_part->is_raw = TRUE; + SET_PART_RAW (text_part); return part_content; } if (g_ascii_strcasecmp (ocharset, "utf-8") == 0 || g_ascii_strcasecmp (ocharset, "utf8") == 0) { if (g_utf8_validate (part_content->data, part_content->len, NULL)) { - text_part->is_raw = FALSE; - text_part->is_utf = TRUE; + SET_PART_UTF (text_part); return part_content; } else { msg_info ( "<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); - text_part->is_raw = TRUE; + SET_PART_RAW (text_part); return part_content; } } @@ -1013,8 +1019,7 @@ convert_text_to_utf (struct rspamd_task *task, task->message_id, ocharset, err ? err->message : "unknown problem"); - text_part->is_raw = TRUE; - text_part->is_utf = FALSE; + SET_PART_RAW (text_part); g_error_free (err); return part_content; } @@ -1023,8 +1028,7 @@ convert_text_to_utf (struct rspamd_task *task, result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); result_array->data = res_str; result_array->len = write_bytes; - text_part->is_raw = FALSE; - text_part->is_utf = TRUE; + SET_PART_UTF (text_part); return result_array; } @@ -1128,7 +1132,7 @@ detect_text_language (struct mime_text_part *part) const int max_chars = 32; if (part != NULL) { - if (part->is_utf) { + if (IS_PART_UTF (part)) { /* Try to detect encoding by several symbols */ const gchar *p, *pp; gunichar c; @@ -1183,7 +1187,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, guint i, nlen; GArray *tmp; - if (part->language && part->language[0] != '\0' && part->is_utf) { + if (part->language && part->language[0] != '\0' && IS_PART_UTF (part)) { stem = sb_stemmer_new (part->language, "UTF_8"); if (stem == NULL) { msg_info ("<%s> cannot create lemmatizer for %s language", @@ -1193,7 +1197,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, /* Ugly workaround */ tmp = rspamd_tokenize_text (part->content->data, - part->content->len, part->is_utf, task->cfg->min_word_len, + part->content->len, IS_PART_UTF (part), task->cfg->min_word_len, part->urls_offset, FALSE); if (tmp) { @@ -1210,7 +1214,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, w->len = nlen; } else { - if (part->is_utf) { + if (IS_PART_UTF (part)) { rspamd_str_lc_utf8 (w->begin, w->len); } else { @@ -1263,9 +1267,9 @@ process_text_part (struct rspamd_task *task, text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); - text_part->is_html = TRUE; + text_part->flags |= RSPAMD_MIME_PART_FLAG_HTML; if (is_empty) { - text_part->is_empty = TRUE; + text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY; text_part->orig = NULL; text_part->content = NULL; task->text_parts = g_list_prepend (task->text_parts, text_part); @@ -1276,10 +1280,10 @@ process_text_part (struct rspamd_task *task, text_part->orig, type, text_part); - text_part->is_balanced = TRUE; text_part->html_nodes = NULL; text_part->parent = parent; + text_part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED; text_part->content = strip_html_tags (task, task->task_pool, text_part, @@ -1303,10 +1307,9 @@ process_text_part (struct rspamd_task *task, text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); - text_part->is_html = FALSE; text_part->parent = parent; if (is_empty) { - text_part->is_empty = TRUE; + text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY; text_part->orig = NULL; text_part->content = NULL; task->text_parts = g_list_prepend (task->text_parts, text_part); @@ -1328,7 +1331,7 @@ process_text_part (struct rspamd_task *task, /* Post process part */ detect_text_language (text_part); text_part->words = rspamd_tokenize_text (text_part->content->data, - text_part->content->len, text_part->is_utf, task->cfg->min_word_len, + text_part->content->len, IS_PART_UTF (text_part), task->cfg->min_word_len, text_part->urls_offset, TRUE); rspamd_normalize_text_part (task, text_part); } diff --git a/src/libmime/message.h b/src/libmime/message.h index ef881ebd1..0f3a0ccb4 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -21,12 +21,18 @@ struct mime_part { const gchar *filename; }; +#define RSPAMD_MIME_PART_FLAG_UTF (1 << 0) +#define RSPAMD_MIME_PART_FLAG_BALANCED (1 << 1) +#define RSPAMD_MIME_PART_FLAG_EMPTY (1 << 2) +#define RSPAMD_MIME_PART_FLAG_HTML (1 << 3) + +#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_EMPTY) +#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_UTF) +#define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_PART_FLAG_UTF)) +#define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_HTML) + struct mime_text_part { - gboolean is_html; - gboolean is_raw; - gboolean is_balanced; - gboolean is_empty; - gboolean is_utf; + guint flags; GUnicodeScript script; const gchar *lang_code; const gchar *language; diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index b4271f1dc..aaeb19619 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -867,13 +867,13 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, while (cur) { part = (struct mime_text_part *)cur->data; /* Skip empty parts */ - if (part->is_empty) { + if (IS_PART_EMPTY (part)) { cur = g_list_next (cur); continue; } /* Check raw flags */ - if (part->is_raw) { + if (!IS_PART_UTF (part)) { raw = TRUE; } /* Select data for regexp */ @@ -1248,7 +1248,7 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) NULL); return FALSE; } - if (!p1->is_empty && !p2->is_empty) { + if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) { if (p1->diff_str != NULL && p2->diff_str != NULL) { diff = rspamd_diff_distance_normalized (p1->diff_str, p2->diff_str); @@ -1278,8 +1278,8 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) } } } - else if ((p1->is_empty && - !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { + else if ((IS_PART_EMPTY (p1) && + !IS_PART_EMPTY (p2)) || (!IS_PART_EMPTY (p1)&& IS_PART_EMPTY (p2))) { /* Empty and non empty parts are different */ *pdiff = 0; rspamd_mempool_set_variable (task->task_pool, @@ -1430,7 +1430,7 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, cur = g_list_first (task->text_parts); while (cur) { p = cur->data; - if (p->is_html) { + if (IS_PART_HTML (p)) { res = TRUE; } else { @@ -1601,8 +1601,8 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) cur = g_list_first (task->text_parts); while (cur) { p = cur->data; - if (!p->is_empty && p->is_html) { - if (p->is_balanced) { + if (!IS_PART_EMPTY (p) && IS_PART_HTML (p)) { + if (p->flags & RSPAMD_MIME_PART_FLAG_BALANCED) { res = TRUE; } else { @@ -1673,7 +1673,7 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) while (cur && res == FALSE) { p = cur->data; - if (!p->is_empty && p->is_html && p->html_nodes) { + if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes) { g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, @@ -1699,7 +1699,7 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) while (cur && res == FALSE) { p = cur->data; - if (!p->is_empty && p->is_html && p->html_nodes == NULL) { + if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes == NULL) { res = TRUE; } cur = g_list_next (cur); diff --git a/src/libserver/html.c b/src/libserver/html.c index f978ff1c7..2470310b4 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -941,7 +941,7 @@ add_html_node (struct rspamd_task *task, if (!check_balance (new, cur_level)) { debug_task ( "mark part as unbalanced as it has not pairable closing tags"); - part->is_balanced = FALSE; + part->flags &= ~RSPAMD_MIME_PART_FLAG_BALANCED; } } else if ((data->flags & (FL_XML|FL_SGML)) == 0) { diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 8bea1baea..e4e4c692b 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -296,14 +296,14 @@ rspamd_stat_process_tokenize (struct rspamd_tokenizer_config *cf, while (cur != NULL) { part = (struct mime_text_part *)cur->data; - if (!part->is_empty && part->words != NULL) { + if (!IS_PART_EMPTY (part) && part->words != NULL) { if (compat) { tok->tokenizer->tokenize_func (cf, task->task_pool, - part->words, tok->tokens, part->is_utf); + part->words, tok->tokens, IS_PART_UTF (part)); } else { tok->tokenizer->tokenize_func (cf, task->task_pool, - part->normalized_words, tok->tokens, part->is_utf); + part->normalized_words, tok->tokens, IS_PART_UTF (part)); } } diff --git a/src/libutil/fuzzy.c b/src/libutil/fuzzy.c index 83cb9cd29..a15be12e2 100644 --- a/src/libutil/fuzzy.c +++ b/src/libutil/fuzzy.c @@ -340,7 +340,7 @@ rspamd_fuzzy_from_text_part (struct mime_text_part *part, bzero (&rs, sizeof (rs)); end = c + len; - if (part->is_utf) { + if (IS_PART_UTF (part)) { while (c < end) { if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { c += cur_ex->len + 1; @@ -400,7 +400,7 @@ rspamd_fuzzy_from_text_part (struct mime_text_part *part, begin = (gchar *)part->content->data; c = begin; end = c + len; - if (part->is_utf) { + if (IS_PART_UTF (part)) { while (c < end) { if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index a19b7a8f4..e83ba1770 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -238,12 +238,12 @@ lua_textpart_is_utf (lua_State * L) { struct mime_text_part *part = lua_check_textpart (L); - if (part == NULL || part->is_empty) { + if (part == NULL || IS_PART_EMPTY (part)) { lua_pushboolean (L, FALSE); return 1; } - lua_pushboolean (L, part->is_utf); + lua_pushboolean (L, IS_PART_UTF (part)); return 1; } @@ -255,7 +255,7 @@ lua_textpart_get_content (lua_State * L) struct mime_text_part *part = lua_check_textpart (L); struct rspamd_lua_text *t; - if (part == NULL || part->is_empty) { + if (part == NULL || IS_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -278,7 +278,7 @@ lua_textpart_get_length (lua_State * L) return 1; } - if (part->is_empty) { + if (IS_PART_EMPTY (part)) { lua_pushnumber (L, 0); } else { @@ -298,7 +298,7 @@ lua_textpart_is_empty (lua_State * L) return 1; } - lua_pushboolean (L, part->is_empty); + lua_pushboolean (L, IS_PART_EMPTY (part)); return 1; } @@ -313,7 +313,7 @@ lua_textpart_is_html (lua_State * L) return 1; } - lua_pushboolean (L, part->is_html); + lua_pushboolean (L, IS_PART_HTML (part)); return 1; } @@ -324,7 +324,7 @@ lua_textpart_get_fuzzy (lua_State * L) struct mime_text_part *part = lua_check_textpart (L); gchar *out; - if (part == NULL || part->is_empty) { + if (part == NULL || IS_PART_EMPTY (part)) { lua_pushnil (L); return 1; } @@ -380,7 +380,7 @@ lua_textpart_compare_distance (lua_State * L) } else { - if (!part->is_empty && !other->is_empty) { + if (!IS_PART_EMPTY (part) && !IS_PART_EMPTY (other)) { if (part->diff_str != NULL && other->diff_str != NULL) { diff = rspamd_diff_distance (part->diff_str, other->diff_str); @@ -389,8 +389,9 @@ lua_textpart_compare_distance (lua_State * L) diff = rspamd_fuzzy_compare_parts (part, other); } } - else if ((part->is_empty && - !other->is_empty) || (!part->is_empty && other->is_empty)) { + else if ((IS_PART_EMPTY (part) && + !IS_PART_EMPTY (other)) || (!IS_PART_EMPTY (part) && + IS_PART_EMPTY (other))) { /* Empty and non empty parts are different */ diff = 0; } diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c index 63b716756..f1b9088db 100644 --- a/src/lua/lua_trie.c +++ b/src/lua/lua_trie.c @@ -272,7 +272,7 @@ lua_trie_search_mime (lua_State *L) while (cur) { part = cur->data; - if (!part->is_empty && part->content != NULL) { + if (!IS_PART_EMPTY (part) && part->content != NULL) { text = part->content->data; len = part->content->len; diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index f8ad15be9..3efec040b 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -133,7 +133,7 @@ check_part (struct mime_text_part *part, gboolean raw_mode) p = part->content->data; - if (part->is_raw || raw_mode) { + if (IS_PART_UTF (part) || raw_mode) { while (remain > 1) { if ((g_ascii_isalpha (*p) && (*(p + 1) & 0x80)) || @@ -213,7 +213,7 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused) cur = g_list_first (task->text_parts); while (cur) { part = cur->data; - if (!part->is_empty && check_part (part, task->cfg->raw_mode)) { + if (!IS_PART_EMPTY (part) && check_part (part, task->cfg->raw_mode)) { rspamd_task_insert_result (task, chartable_module_ctx->symbol, 1, NULL); } cur = g_list_next (cur); diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index b5410e6cf..af0aab420 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -524,7 +524,7 @@ fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool) { GArray *res; - if (!part->is_utf || !part->language || part->language[0] == '\0' || + if (!IS_PART_UTF (part) || !part->language || part->language[0] == '\0' || part->normalized_words == NULL) { res = part->words; } @@ -961,7 +961,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, while (cur) { part = cur->data; - if (part->is_empty) { + if (IS_PART_EMPTY (part)) { cur = g_list_next (cur); continue; } |