From d664f2fd52d6050460c9ca0ddebe8ef990fa0fb5 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 29 Nov 2018 15:45:34 +0000 Subject: [PATCH] [Feature] Allow to get task flags in C expressions --- src/libmime/content_type.c | 3 +- src/libmime/email_addr.c | 2 +- src/libmime/message.c | 6 ++ src/libmime/mime_expressions.c | 177 ++++++++++++++++++++++----------- src/libmime/mime_headers.c | 15 ++- src/libmime/mime_headers.h | 2 +- src/libserver/task.h | 1 + src/lua/lua_task.c | 2 + 8 files changed, 146 insertions(+), 62 deletions(-) diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c index 524b6f636..91c09e4bc 100644 --- a/src/libmime/content_type.c +++ b/src/libmime/content_type.c @@ -477,7 +477,8 @@ rspamd_content_disposition_add_param (rspamd_mempool_t *pool, nparam = rspamd_mempool_alloc (pool, sizeof (*nparam)); nparam->name.begin = name_start; nparam->name.len = name_end - name_start; - decoded = rspamd_mime_header_decode (pool, value_start, value_end - value_start); + decoded = rspamd_mime_header_decode (pool, value_start, + value_end - value_start, NULL); RSPAMD_FTOK_FROM_STR (&nparam->value, decoded); if (!found) { diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c index e1d52e185..b936af586 100644 --- a/src/libmime/email_addr.c +++ b/src/libmime/email_addr.c @@ -137,7 +137,7 @@ rspamd_email_address_add (rspamd_mempool_t *pool, } if (name->len > 0) { - elt->name = rspamd_mime_header_decode (pool, name->str, name->len); + elt->name = rspamd_mime_header_decode (pool, name->str, name->len, NULL); } g_ptr_array_add (ar, elt); diff --git a/src/libmime/message.c b/src/libmime/message.c index 35ea5bb25..3c29b1170 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -88,6 +88,12 @@ rspamd_mime_part_extract_words (struct rspamd_task *task, short_len++; } } + + if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE| + RSPAMD_STAT_TOKEN_FLAG_NORMALISED| + RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES)) { + task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE; + } } if (part->utf_words && part->utf_words->len) { diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index 535b8a124..7bfbf34d4 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -24,62 +24,65 @@ #include "utlist.h" gboolean rspamd_compare_encoding (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_header_exists (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_parts_distance (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_recipients_distance (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_has_only_html_part (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_is_html_balanced (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_has_html_tag (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); gboolean rspamd_has_fake_html (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_raw_header_exists (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_check_smtp_data (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_content_type_is_type (struct rspamd_task * task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_content_type_has_param (struct rspamd_task * task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_content_type_compare_param (struct rspamd_task * task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_has_content_part (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_has_content_part_len (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); static gboolean rspamd_is_empty_body (struct rspamd_task *task, - GArray * args, - void *unused); + GArray * args, + void *unused); +static gboolean rspamd_has_flag_expr (struct rspamd_task *task, + GArray * args, + void *unused); static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len, rspamd_mempool_t *pool, gpointer ud, GError **err); @@ -136,25 +139,26 @@ static struct _fl { rspamd_internal_func_t func; void *user_data; } rspamd_functions_list[] = { - {"check_smtp_data", rspamd_check_smtp_data, NULL}, - {"compare_encoding", rspamd_compare_encoding, NULL}, - {"compare_parts_distance", rspamd_parts_distance, NULL}, - {"compare_recipients_distance", rspamd_recipients_distance, NULL}, - {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, - {"content_type_compare_param", rspamd_content_type_compare_param, NULL}, - {"content_type_has_param", rspamd_content_type_has_param, NULL}, - {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL}, - {"content_type_is_type", rspamd_content_type_is_type, NULL}, - {"has_content_part", rspamd_has_content_part, NULL}, - {"has_content_part_len", rspamd_has_content_part_len, NULL}, - {"has_fake_html", rspamd_has_fake_html, NULL}, - {"has_html_tag", rspamd_has_html_tag, NULL}, - {"has_only_html_part", rspamd_has_only_html_part, NULL}, - {"header_exists", rspamd_header_exists, NULL}, - {"is_empty_body", rspamd_is_empty_body, NULL}, - {"is_html_balanced", rspamd_is_html_balanced, NULL}, - {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}, - {"raw_header_exists", rspamd_raw_header_exists, NULL} + {"check_smtp_data", rspamd_check_smtp_data, NULL}, + {"compare_encoding", rspamd_compare_encoding, NULL}, + {"compare_parts_distance", rspamd_parts_distance, NULL}, + {"compare_recipients_distance", rspamd_recipients_distance, NULL}, + {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, + {"content_type_compare_param", rspamd_content_type_compare_param, NULL}, + {"content_type_has_param", rspamd_content_type_has_param, NULL}, + {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL}, + {"content_type_is_type", rspamd_content_type_is_type, NULL}, + {"has_content_part", rspamd_has_content_part, NULL}, + {"has_content_part_len", rspamd_has_content_part_len, NULL}, + {"has_fake_html", rspamd_has_fake_html, NULL}, + {"has_html_tag", rspamd_has_html_tag, NULL}, + {"has_only_html_part", rspamd_has_only_html_part, NULL}, + {"header_exists", rspamd_header_exists, NULL}, + {"is_empty_body", rspamd_is_empty_body, NULL}, + {"is_html_balanced", rspamd_is_html_balanced, NULL}, + {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}, + {"raw_header_exists", rspamd_raw_header_exists, NULL}, + {"has_flag", rspamd_has_flag_expr, NULL}, }; const struct rspamd_atom_subr mime_expr_subr = { @@ -2060,4 +2064,63 @@ rspamd_is_empty_body (struct rspamd_task *task, } return TRUE; +} + +#define TASK_FLAG_READ(flag) do { \ + result = !!(task->flags & (flag)); \ +} while(0) + +#define TASK_GET_FLAG(flag, strname, macro) do { \ + if (!found && strcmp ((flag), strname) == 0) { \ + TASK_FLAG_READ((macro)); \ + found = TRUE; \ + } \ +} while(0) + +static gboolean +rspamd_has_flag_expr (struct rspamd_task *task, + GArray * args, + void *unused) +{ + gboolean found = FALSE, result = FALSE; + struct expression_argument *flag_arg; + const gchar *flag_str; + + if (args == NULL) { + msg_warn_task ("no parameters to function"); + return FALSE; + } + + flag_arg = &g_array_index (args, struct expression_argument, 0); + + if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn_task ("invalid parameter to function"); + return FALSE; + } + + flag_str = (const gchar *)flag_arg->data; + + TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL); + TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG); + TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT); + TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP); + TASK_GET_FLAG (flag_str, "extended_urls", RSPAMD_TASK_FLAG_EXT_URLS); + TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM); + TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM); + TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED); + TASK_GET_FLAG (flag_str, "broken_headers", + RSPAMD_TASK_FLAG_BROKEN_HEADERS); + TASK_GET_FLAG (flag_str, "skip_process", + RSPAMD_TASK_FLAG_SKIP_PROCESS); + TASK_GET_FLAG (flag_str, "milter", + RSPAMD_TASK_FLAG_MILTER); + TASK_GET_FLAG (flag_str, "bad_unicode", + RSPAMD_TASK_FLAG_BAD_UNICODE); + + if (!found) { + msg_warn_task ("invalid flag name %s", flag_str); + return FALSE; + } + + return result; } \ No newline at end of file diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index b782db010..12661f0ea 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -348,8 +348,15 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, } nh->value = tmp; + + gboolean broken_utf = FALSE; + nh->decoded = rspamd_mime_header_decode (task->task_pool, - nh->value, strlen (tmp)); + nh->value, strlen (tmp), &broken_utf); + + if (broken_utf) { + task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE; + } if (nh->decoded == NULL) { nh->decoded = ""; @@ -531,7 +538,7 @@ rspamd_mime_header_sanity_check (GString *str) gchar * rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, - gsize inlen) + gsize inlen, gboolean *invalid_utf) { GString *out; const guchar *c, *p, *end; @@ -583,6 +590,10 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, off = 0; U8_APPEND_UNSAFE (out->str + out->len - 3, off, 0xfffd); + + if (invalid_utf) { + *invalid_utf = TRUE; + } } else { c = p; diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 03a7beae9..3c0c23a36 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -76,7 +76,7 @@ void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, * @return */ gchar * rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, - gsize inlen); + gsize inlen, gboolean *invalid_utf); /** * Encode mime header if needed diff --git a/src/libserver/task.h b/src/libserver/task.h index b41b308e4..34e160dc0 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -116,6 +116,7 @@ enum rspamd_task_stage { #define RSPAMD_TASK_FLAG_OWN_POOL (1 << 27) #define RSPAMD_TASK_FLAG_MILTER (1 << 28) #define RSPAMD_TASK_FLAG_SSL (1 << 29) +#define RSPAMD_TASK_FLAG_BAD_UNICODE (1 << 30) #define RSPAMD_TASK_IS_SKIPPED(task) (((task)->flags & RSPAMD_TASK_FLAG_SKIP)) #define RSPAMD_TASK_IS_JSON(task) (((task)->flags & RSPAMD_TASK_FLAG_JSON)) diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 1b5f33cb7..05257fcdd 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -3980,6 +3980,8 @@ lua_task_has_flag (lua_State *L) RSPAMD_TASK_FLAG_SKIP_PROCESS); LUA_TASK_GET_FLAG (flag, "milter", RSPAMD_TASK_FLAG_MILTER); + LUA_TASK_GET_FLAG (flag, "bad_unicode", + RSPAMD_TASK_FLAG_BAD_UNICODE); if (!found) { msg_warn_task ("unknown flag requested: %s", flag); -- 2.39.5