diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-26 10:49:23 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-26 10:49:23 +0100 |
commit | 537a7180a0d5132c11636c4fd8b1450cd99d352c (patch) | |
tree | fb9f8c84955a411bdffbd6371ea32f2716fb3687 /src/libmime/mime_parser.c | |
parent | 5fd7a90fdaa33f52c59bdb0ca84451e5c1e22365 (diff) | |
download | rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.tar.gz rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.zip |
[Rework] Use clang-format to unify formatting in all sources
No meaningful changes.
Diffstat (limited to 'src/libmime/mime_parser.c')
-rw-r--r-- | src/libmime/mime_parser.c | 1030 |
1 files changed, 542 insertions, 488 deletions
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 48b946d8f..217f0b87d 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -38,10 +38,10 @@ struct rspamd_mime_parser_lib_ctx *lib_ctx = NULL; static const guint max_nested = 64; static const guint max_key_usages = 10000; -#define msg_debug_mime(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ - rspamd_mime_log_id, "mime", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_debug_mime(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \ + rspamd_mime_log_id, "mime", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(mime) @@ -57,7 +57,7 @@ struct rspamd_mime_boundary { }; struct rspamd_mime_parser_ctx { - GPtrArray *stack; /* Stack of parts */ + GPtrArray *stack; /* Stack of parts */ GArray *boundaries; /* Boundaries found in the whole message */ const gchar *start; const gchar *pos; @@ -67,40 +67,40 @@ struct rspamd_mime_parser_ctx { }; static enum rspamd_mime_parse_error -rspamd_mime_parse_multipart_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err); +rspamd_mime_parse_multipart_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err); static enum rspamd_mime_parse_error -rspamd_mime_parse_message (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err); +rspamd_mime_parse_message(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err); static enum rspamd_mime_parse_error -rspamd_mime_parse_normal_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - struct rspamd_content_type *ct, - GError **err); +rspamd_mime_parse_normal_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + struct rspamd_content_type *ct, + GError **err); static enum rspamd_mime_parse_error -rspamd_mime_process_multipart_node (struct rspamd_task *task, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_part *multipart, - const gchar *start, const gchar *end, - gboolean is_finished, - GError **err); +rspamd_mime_process_multipart_node(struct rspamd_task *task, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_part *multipart, + const gchar *start, const gchar *end, + gboolean is_finished, + GError **err); #define RSPAMD_MIME_QUARK (rspamd_mime_parser_quark()) static GQuark -rspamd_mime_parser_quark (void) +rspamd_mime_parser_quark(void) { - return g_quark_from_static_string ("mime-parser"); + return g_quark_from_static_string("mime-parser"); } -const gchar* -rspamd_cte_to_string (enum rspamd_cte ct) +const gchar * +rspamd_cte_to_string(enum rspamd_cte ct) { const gchar *ret = "unknown"; @@ -128,31 +128,31 @@ rspamd_cte_to_string (enum rspamd_cte ct) } enum rspamd_cte -rspamd_cte_from_string (const gchar *str) +rspamd_cte_from_string(const gchar *str) { enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN; - g_assert (str != NULL); + g_assert(str != NULL); - if (strcmp (str, "7bit") == 0) { + if (strcmp(str, "7bit") == 0) { ret = RSPAMD_CTE_7BIT; } - else if (strcmp (str, "8bit") == 0) { + else if (strcmp(str, "8bit") == 0) { ret = RSPAMD_CTE_8BIT; } - else if (strcmp (str, "quoted-printable") == 0) { + else if (strcmp(str, "quoted-printable") == 0) { ret = RSPAMD_CTE_QP; } - else if (strcmp (str, "base64") == 0) { + else if (strcmp(str, "base64") == 0) { ret = RSPAMD_CTE_B64; } - else if (strcmp (str, "X-uuencode") == 0) { + else if (strcmp(str, "X-uuencode") == 0) { ret = RSPAMD_CTE_UUE; } - else if (strcmp (str, "uuencode") == 0) { + else if (strcmp(str, "uuencode") == 0) { ret = RSPAMD_CTE_UUE; } - else if (strcmp (str, "X-uue") == 0) { + else if (strcmp(str, "X-uue") == 0) { ret = RSPAMD_CTE_UUE; } @@ -160,32 +160,32 @@ rspamd_cte_from_string (const gchar *str) } static void -rspamd_mime_parser_init_lib (void) +rspamd_mime_parser_init_lib(void) { - lib_ctx = g_malloc0 (sizeof (*lib_ctx)); - lib_ctx->mp_boundary = rspamd_multipattern_create (RSPAMD_MULTIPATTERN_DEFAULT); - g_assert (lib_ctx->mp_boundary != NULL); - rspamd_multipattern_add_pattern (lib_ctx->mp_boundary, "\r--", 0); - rspamd_multipattern_add_pattern (lib_ctx->mp_boundary, "\n--", 0); + lib_ctx = g_malloc0(sizeof(*lib_ctx)); + lib_ctx->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT); + g_assert(lib_ctx->mp_boundary != NULL); + rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\r--", 0); + rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\n--", 0); GError *err = NULL; - if (!rspamd_multipattern_compile (lib_ctx->mp_boundary, &err)) { - msg_err ("fatal error: cannot compile multipattern for mime parser boundaries: %e", err); - g_error_free (err); + if (!rspamd_multipattern_compile(lib_ctx->mp_boundary, &err)) { + msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err); + g_error_free(err); g_abort(); } - ottery_rand_bytes (lib_ctx->hkey, sizeof (lib_ctx->hkey)); + ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey)); } static enum rspamd_cte -rspamd_mime_parse_cte (const gchar *in, gsize len) +rspamd_mime_parse_cte(const gchar *in, gsize len) { guint64 h; enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN; - in = rspamd_string_len_strip (in, &len, " \t;,.+-#!`~'"); - h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, - in, len, 0xdeadbabe); + in = rspamd_string_len_strip(in, &len, " \t;,.+-#!`~'"); + h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64, + in, len, 0xdeadbabe); switch (h) { case 0xCEDAA7056B4753F7ULL: /* 7bit */ @@ -213,65 +213,65 @@ rspamd_mime_parse_cte (const gchar *in, gsize len) } static enum rspamd_cte -rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, - struct rspamd_mime_part *part) +rspamd_mime_part_get_cte_heuristic(struct rspamd_task *task, + struct rspamd_mime_part *part) { const guint check_len = 128; guint real_len, nspaces = 0, neqsign = 0, n8bit = 0, nqpencoded = 0, - padeqsign = 0, nupper = 0, nlower = 0; + padeqsign = 0, nupper = 0, nlower = 0; gboolean b64_chars = TRUE; const guchar *p, *end; enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN; - real_len = MIN (check_len, part->raw_data.len); - p = (const guchar *)part->raw_data.begin; + real_len = MIN(check_len, part->raw_data.len); + p = (const guchar *) part->raw_data.begin; end = p + part->raw_data.len; - while (p < end && g_ascii_isspace (*p)) { - p ++; + while (p < end && g_ascii_isspace(*p)) { + p++; } - if (end - p > sizeof ("begin-base64 ")) { + if (end - p > sizeof("begin-base64 ")) { const guchar *uue_start; - if (memcmp (p, "begin ", sizeof ("begin ") - 1) == 0) { - uue_start = p + sizeof ("begin ") - 1; + if (memcmp(p, "begin ", sizeof("begin ") - 1) == 0) { + uue_start = p + sizeof("begin ") - 1; - while (uue_start < end && g_ascii_isspace (*uue_start)) { - uue_start ++; + while (uue_start < end && g_ascii_isspace(*uue_start)) { + uue_start++; } - if (uue_start < end && g_ascii_isdigit (*uue_start)) { + if (uue_start < end && g_ascii_isdigit(*uue_start)) { return RSPAMD_CTE_UUE; } } - else if (memcmp (p, "begin-base64 ", sizeof ("begin-base64 ") - 1) == 0) { - uue_start = p + sizeof ("begin ") - 1; + else if (memcmp(p, "begin-base64 ", sizeof("begin-base64 ") - 1) == 0) { + uue_start = p + sizeof("begin ") - 1; - while (uue_start < end && g_ascii_isspace (*uue_start)) { - uue_start ++; + while (uue_start < end && g_ascii_isspace(*uue_start)) { + uue_start++; } - if (uue_start < end && g_ascii_isdigit (*uue_start)) { + if (uue_start < end && g_ascii_isdigit(*uue_start)) { return RSPAMD_CTE_UUE; } } } /* Skip trailing spaces */ - while (end > p && g_ascii_isspace (*(end - 1))) { - end --; + while (end > p && g_ascii_isspace(*(end - 1))) { + end--; } if (end > p + 2) { if (*(end - 1) == '=') { - padeqsign ++; - end --; + padeqsign++; + end--; } if (*(end - 1) == '=') { - padeqsign ++; - end --; + padeqsign++; + end--; } } @@ -282,35 +282,35 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, while (p < end) { if (*p == ' ') { - nspaces ++; + nspaces++; } else if (*p == '=') { b64_chars = FALSE; /* Eqsign must not be inside base64 */ - neqsign ++; - p ++; + neqsign++; + p++; - if (p + 2 < end && g_ascii_isxdigit (*p) && g_ascii_isxdigit (*(p + 1))) { - p ++; - nqpencoded ++; + if (p + 2 < end && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*(p + 1))) { + p++; + nqpencoded++; } continue; } else if (*p >= 0x80) { - n8bit ++; + n8bit++; b64_chars = FALSE; } - else if (!(g_ascii_isalnum (*p) || *p == '/' || *p == '+')) { + else if (!(g_ascii_isalnum(*p) || *p == '/' || *p == '+')) { b64_chars = FALSE; } - else if (g_ascii_isupper (*p)) { - nupper ++; + else if (g_ascii_isupper(*p)) { + nupper++; } - else if (g_ascii_islower (*p)) { - nlower ++; + else if (g_ascii_islower(*p)) { + nlower++; } - p ++; + p++; } if (b64_chars && neqsign <= 2 && nspaces == 0) { @@ -342,7 +342,7 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, } else { - if (((end - (const guchar *)part->raw_data.begin) + padeqsign) % 4 == 0) { + if (((end - (const guchar *) part->raw_data.begin) + padeqsign) % 4 == 0) { if (padeqsign == 0) { /* * It can be either base64 or plain text, hard to say @@ -355,7 +355,6 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, else { ret = RSPAMD_CTE_7BIT; } - } else { ret = RSPAMD_CTE_B64; @@ -384,16 +383,16 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, ret = RSPAMD_CTE_8BIT; } - msg_debug_mime ("detected cte: %s", rspamd_cte_to_string (ret)); + msg_debug_mime("detected cte: %s", rspamd_cte_to_string(ret)); return ret; } static void -rspamd_mime_part_get_cte (struct rspamd_task *task, - struct rspamd_mime_headers_table *hdrs, - struct rspamd_mime_part *part, - gboolean apply_heuristic) +rspamd_mime_part_get_cte(struct rspamd_task *task, + struct rspamd_mime_headers_table *hdrs, + struct rspamd_mime_part *part, + gboolean apply_heuristic) { struct rspamd_mime_header *hdr, *cur; enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN; @@ -403,7 +402,7 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, if (hdr == NULL) { if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN && - !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) { + !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) { part->cte = part->parent_part->cte; parent_propagated = TRUE; @@ -411,21 +410,22 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, } if (apply_heuristic) { - part->cte = rspamd_mime_part_get_cte_heuristic (task, part); - msg_info_task ("detected missing CTE for part as: %s", - rspamd_cte_to_string (part->cte)); + part->cte = rspamd_mime_part_get_cte_heuristic(task, part); + msg_info_task("detected missing CTE for part as: %s", + rspamd_cte_to_string(part->cte)); } part->flags |= RSPAMD_MIME_PART_MISSING_CTE; } else { - DL_FOREACH (hdr, cur) { + DL_FOREACH(hdr, cur) + { gsize hlen; gchar lc_buf[128]; - hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", cur->value); - rspamd_str_lc (lc_buf, hlen); - cte = rspamd_mime_parse_cte (lc_buf, hlen); + hlen = rspamd_snprintf(lc_buf, sizeof(lc_buf), "%s", cur->value); + rspamd_str_lc(lc_buf, hlen); + cte = rspamd_mime_parse_cte(lc_buf, hlen); if (cte != RSPAMD_CTE_UNKNOWN) { part->cte = cte; @@ -433,45 +433,45 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, } } -check_cte: + check_cte: if (apply_heuristic) { if (part->cte == RSPAMD_CTE_UNKNOWN) { - part->cte = rspamd_mime_part_get_cte_heuristic (task, part); + part->cte = rspamd_mime_part_get_cte_heuristic(task, part); - msg_info_task ("corrected bad CTE for part to: %s", - rspamd_cte_to_string (part->cte)); + msg_info_task("corrected bad CTE for part to: %s", + rspamd_cte_to_string(part->cte)); } else if (part->cte == RSPAMD_CTE_B64 || - part->cte == RSPAMD_CTE_QP) { + part->cte == RSPAMD_CTE_QP) { /* Additionally check sanity */ - cte = rspamd_mime_part_get_cte_heuristic (task, part); + cte = rspamd_mime_part_get_cte_heuristic(task, part); if (cte == RSPAMD_CTE_8BIT) { - msg_info_task ( - "incorrect cte specified for part: %s, %s detected", - rspamd_cte_to_string (part->cte), - rspamd_cte_to_string (cte)); + msg_info_task( + "incorrect cte specified for part: %s, %s detected", + rspamd_cte_to_string(part->cte), + rspamd_cte_to_string(cte)); part->cte = cte; part->flags |= RSPAMD_MIME_PART_BAD_CTE; } else if (cte != part->cte && parent_propagated) { part->cte = cte; - msg_info_task ("detected missing CTE for part as: %s", - rspamd_cte_to_string (part->cte)); + msg_info_task("detected missing CTE for part as: %s", + rspamd_cte_to_string(part->cte)); } } else { - msg_debug_mime ("processed cte: %s", - rspamd_cte_to_string (cte)); + msg_debug_mime("processed cte: %s", + rspamd_cte_to_string(cte)); } } else { - msg_debug_mime ("processed cte: %s", rspamd_cte_to_string (cte)); + msg_debug_mime("processed cte: %s", rspamd_cte_to_string(cte)); } } } static void -rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part) +rspamd_mime_part_get_cd(struct rspamd_task *task, struct rspamd_mime_part *part) { struct rspamd_mime_header *hdr, *cur; struct rspamd_content_disposition *cd = NULL; @@ -479,62 +479,63 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part struct rspamd_content_type_param *found; hdr = rspamd_message_get_header_from_hash(part->raw_headers, - "Content-Disposition", FALSE); + "Content-Disposition", FALSE); if (hdr == NULL) { - cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd)); + cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*cd)); cd->type = RSPAMD_CT_INLINE; /* We can also have content disposition definitions in Content-Type */ if (part->ct && part->ct->attrs) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "name"); + found = g_hash_table_lookup(part->ct->attrs, &srch); if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "filename"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "filename"); + found = g_hash_table_lookup(part->ct->attrs, &srch); } if (found) { cd->type = RSPAMD_CT_ATTACHMENT; - memcpy (&cd->filename, &found->value, sizeof (cd->filename)); + memcpy(&cd->filename, &found->value, sizeof(cd->filename)); } } } else { - DL_FOREACH (hdr, cur) { + DL_FOREACH(hdr, cur) + { gsize hlen; cd = NULL; if (cur->value) { - hlen = strlen (cur->value); - cd = rspamd_content_disposition_parse (cur->value, hlen, - task->task_pool); + hlen = strlen(cur->value); + cd = rspamd_content_disposition_parse(cur->value, hlen, + task->task_pool); } if (cd) { /* We still need to check filename */ if (cd->filename.len == 0) { if (part->ct && part->ct->attrs) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "name"); + found = g_hash_table_lookup(part->ct->attrs, &srch); if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "filename"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "filename"); + found = g_hash_table_lookup(part->ct->attrs, &srch); } if (found) { cd->type = RSPAMD_CT_ATTACHMENT; - memcpy (&cd->filename, &found->value, - sizeof (cd->filename)); + memcpy(&cd->filename, &found->value, + sizeof(cd->filename)); } } } - msg_debug_mime ("processed content disposition: %s, file: \"%T\"", - cd->lc_data, &cd->filename); + msg_debug_mime("processed content disposition: %s, file: \"%T\"", + cd->lc_data, &cd->filename); break; } else if (part->ct) { @@ -542,22 +543,22 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part * Even in case of malformed Content-Disposition, we can still * fall back to Content-Type */ - cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd)); + cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*cd)); cd->type = RSPAMD_CT_INLINE; /* We can also have content disposition definitions in Content-Type */ if (part->ct->attrs) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "name"); + found = g_hash_table_lookup(part->ct->attrs, &srch); if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "filename"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "filename"); + found = g_hash_table_lookup(part->ct->attrs, &srch); } if (found) { cd->type = RSPAMD_CT_ATTACHMENT; - memcpy (&cd->filename, &found->value, sizeof (cd->filename)); + memcpy(&cd->filename, &found->value, sizeof(cd->filename)); } } } @@ -567,43 +568,98 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part part->cd = cd; } -void -rspamd_mime_parser_calc_digest (struct rspamd_mime_part *part) +void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part) { /* Blake2b applied to string 'rspamd' */ static const guchar hash_key[] = { - 0xef,0x43,0xae,0x80,0xcc,0x8d,0xc3,0x4c, - 0x6f,0x1b,0xd6,0x18,0x1b,0xae,0x87,0x74, - 0x0c,0xca,0xf7,0x8e,0x5f,0x2e,0x54,0x32, - 0xf6,0x79,0xb9,0x27,0x26,0x96,0x20,0x92, - 0x70,0x07,0x85,0xeb,0x83,0xf7,0x89,0xe0, - 0xd7,0x32,0x2a,0xd2,0x1a,0x64,0x41,0xef, - 0x49,0xff,0xc3,0x8c,0x54,0xf9,0x67,0x74, - 0x30,0x1e,0x70,0x2e,0xb7,0x12,0x09,0xfe, + 0xef, + 0x43, + 0xae, + 0x80, + 0xcc, + 0x8d, + 0xc3, + 0x4c, + 0x6f, + 0x1b, + 0xd6, + 0x18, + 0x1b, + 0xae, + 0x87, + 0x74, + 0x0c, + 0xca, + 0xf7, + 0x8e, + 0x5f, + 0x2e, + 0x54, + 0x32, + 0xf6, + 0x79, + 0xb9, + 0x27, + 0x26, + 0x96, + 0x20, + 0x92, + 0x70, + 0x07, + 0x85, + 0xeb, + 0x83, + 0xf7, + 0x89, + 0xe0, + 0xd7, + 0x32, + 0x2a, + 0xd2, + 0x1a, + 0x64, + 0x41, + 0xef, + 0x49, + 0xff, + 0xc3, + 0x8c, + 0x54, + 0xf9, + 0x67, + 0x74, + 0x30, + 0x1e, + 0x70, + 0x2e, + 0xb7, + 0x12, + 0x09, + 0xfe, }; if (part->parsed_data.len > 0) { - rspamd_cryptobox_hash (part->digest, - part->parsed_data.begin, part->parsed_data.len, - hash_key, sizeof (hash_key)); + rspamd_cryptobox_hash(part->digest, + part->parsed_data.begin, part->parsed_data.len, + hash_key, sizeof(hash_key)); } } static enum rspamd_mime_parse_error -rspamd_mime_parse_normal_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - struct rspamd_content_type *ct, - GError **err) +rspamd_mime_parse_normal_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + struct rspamd_content_type *ct, + GError **err) { rspamd_fstring_t *parsed; gssize r; - g_assert (part != NULL); + g_assert(part != NULL); - rspamd_mime_part_get_cte (task, part->raw_headers, part, - part->ct && !(part->ct->flags & RSPAMD_CONTENT_TYPE_MESSAGE)); - rspamd_mime_part_get_cd (task, part); + rspamd_mime_part_get_cte(task, part->raw_headers, part, + part->ct && !(part->ct->flags & RSPAMD_CONTENT_TYPE_MESSAGE)); + rspamd_mime_part_get_cd(task, part); switch (part->cte) { case RSPAMD_CTE_7BIT: @@ -626,8 +682,8 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, * UTF8, we can still imply Content-Type == text/plain */ - if (rspamd_str_has_8bit (part->raw_data.begin, part->raw_data.len) && - !rspamd_fast_utf8_validate (part->raw_data.begin, part->raw_data.len)) { + if (rspamd_str_has_8bit(part->raw_data.begin, part->raw_data.len) && + !rspamd_fast_utf8_validate(part->raw_data.begin, part->raw_data.len)) { part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT; part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; } @@ -636,14 +692,14 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) { /* Need to copy text as we have couple of in-place change functions */ - parsed = rspamd_fstring_sized_new (part->raw_data.len); + parsed = rspamd_fstring_sized_new(part->raw_data.len); parsed->len = part->raw_data.len; - memcpy (parsed->str, part->raw_data.begin, parsed->len); + memcpy(parsed->str, part->raw_data.begin, parsed->len); part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); } else { part->parsed_data.begin = part->raw_data.begin; @@ -651,138 +707,138 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, } break; case RSPAMD_CTE_QP: - parsed = rspamd_fstring_sized_new (part->raw_data.len); - r = rspamd_decode_qp_buf (part->raw_data.begin, part->raw_data.len, - parsed->str, parsed->allocated); + parsed = rspamd_fstring_sized_new(part->raw_data.len); + r = rspamd_decode_qp_buf(part->raw_data.begin, part->raw_data.len, + parsed->str, parsed->allocated); if (r != -1) { parsed->len = r; part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); } else { - msg_err_task ("invalid quoted-printable encoded part, assume 8bit"); + msg_err_task("invalid quoted-printable encoded part, assume 8bit"); if (part->ct) { part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; } part->cte = RSPAMD_CTE_8BIT; - memcpy (parsed->str, part->raw_data.begin, part->raw_data.len); + memcpy(parsed->str, part->raw_data.begin, part->raw_data.len); parsed->len = part->raw_data.len; part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); } break; case RSPAMD_CTE_B64: - parsed = rspamd_fstring_sized_new (part->raw_data.len / 4 * 3 + 12); - rspamd_cryptobox_base64_decode (part->raw_data.begin, - part->raw_data.len, - parsed->str, &parsed->len); + parsed = rspamd_fstring_sized_new(part->raw_data.len / 4 * 3 + 12); + rspamd_cryptobox_base64_decode(part->raw_data.begin, + part->raw_data.len, + parsed->str, &parsed->len); part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); break; case RSPAMD_CTE_UUE: - parsed = rspamd_fstring_sized_new (part->raw_data.len / 4 * 3 + 12); - r = rspamd_decode_uue_buf (part->raw_data.begin, part->raw_data.len, - parsed->str, parsed->allocated); - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + parsed = rspamd_fstring_sized_new(part->raw_data.len / 4 * 3 + 12); + r = rspamd_decode_uue_buf(part->raw_data.begin, part->raw_data.len, + parsed->str, parsed->allocated); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); if (r != -1) { parsed->len = r; part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; } else { - msg_err_task ("invalid uuencoding in encoded part, assume 8bit"); + msg_err_task("invalid uuencoding in encoded part, assume 8bit"); if (part->ct) { part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; } part->cte = RSPAMD_CTE_8BIT; - parsed->len = MIN (part->raw_data.len, parsed->allocated); - memcpy (parsed->str, part->raw_data.begin, parsed->len); - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); + parsed->len = MIN(part->raw_data.len, parsed->allocated); + memcpy(parsed->str, part->raw_data.begin, parsed->len); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; } break; default: - g_assert_not_reached (); + g_assert_not_reached(); } - part->part_number = MESSAGE_FIELD (task, parts)->len; - part->urls = g_ptr_array_new (); - g_ptr_array_add (MESSAGE_FIELD (task, parts), part); - msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte", - &part->ct->type, &part->ct->subtype, part->parsed_data.len, - part->raw_data.len, rspamd_cte_to_string (part->cte)); - rspamd_mime_parser_calc_digest (part); + part->part_number = MESSAGE_FIELD(task, parts)->len; + part->urls = g_ptr_array_new(); + g_ptr_array_add(MESSAGE_FIELD(task, parts), part); + msg_debug_mime("parsed data part %T/%T of length %z (%z orig), %s cte", + &part->ct->type, &part->ct->subtype, part->parsed_data.len, + part->raw_data.len, rspamd_cte_to_string(part->cte)); + rspamd_mime_parser_calc_digest(part); if (ct && (ct->flags & RSPAMD_CONTENT_TYPE_SMIME)) { CMS_ContentInfo *cms; const unsigned char *der_beg = part->parsed_data.begin; - cms = d2i_CMS_ContentInfo (NULL, &der_beg, part->parsed_data.len); + cms = d2i_CMS_ContentInfo(NULL, &der_beg, part->parsed_data.len); if (cms) { - const ASN1_OBJECT *asn_ct = CMS_get0_eContentType (cms); - int ct_nid = OBJ_obj2nid (asn_ct); + const ASN1_OBJECT *asn_ct = CMS_get0_eContentType(cms); + int ct_nid = OBJ_obj2nid(asn_ct); if (ct_nid == NID_pkcs7_data) { - BIO *bio = BIO_new_mem_buf (part->parsed_data.begin, - part->parsed_data.len); + BIO *bio = BIO_new_mem_buf(part->parsed_data.begin, + part->parsed_data.len); PKCS7 *p7; - p7 = d2i_PKCS7_bio (bio, NULL); + p7 = d2i_PKCS7_bio(bio, NULL); if (p7) { - ct_nid = OBJ_obj2nid (p7->type); + ct_nid = OBJ_obj2nid(p7->type); if (ct_nid == NID_pkcs7_signed) { PKCS7 *p7_signed_content = p7->d.sign->contents; - ct_nid = OBJ_obj2nid (p7_signed_content->type); + ct_nid = OBJ_obj2nid(p7_signed_content->type); if (ct_nid == NID_pkcs7_data && p7_signed_content->d.data) { int ret; - msg_debug_mime ("found an additional part inside of " - "smime structure of type %T/%T; length=%d", - &ct->type, &ct->subtype, p7_signed_content->d.data->length); + msg_debug_mime("found an additional part inside of " + "smime structure of type %T/%T; length=%d", + &ct->type, &ct->subtype, p7_signed_content->d.data->length); /* * Since ASN.1 structures are freed, we need to copy * the content */ - gchar *cpy = rspamd_mempool_alloc (task->task_pool, - p7_signed_content->d.data->length); - memcpy (cpy, p7_signed_content->d.data->data, - p7_signed_content->d.data->length); - ret = rspamd_mime_process_multipart_node (task, - st, NULL, - cpy,cpy + p7_signed_content->d.data->length, - TRUE, err); - - PKCS7_free (p7); - BIO_free (bio); - CMS_ContentInfo_free (cms); + gchar *cpy = rspamd_mempool_alloc(task->task_pool, + p7_signed_content->d.data->length); + memcpy(cpy, p7_signed_content->d.data->data, + p7_signed_content->d.data->length); + ret = rspamd_mime_process_multipart_node(task, + st, NULL, + cpy, cpy + p7_signed_content->d.data->length, + TRUE, err); + + PKCS7_free(p7); + BIO_free(bio); + CMS_ContentInfo_free(cms); return ret; } } - PKCS7_free (p7); + PKCS7_free(p7); } - BIO_free (bio); + BIO_free(bio); } - CMS_ContentInfo_free (cms); + CMS_ContentInfo_free(cms); } } @@ -800,12 +856,12 @@ struct rspamd_mime_multipart_cbdata { }; static enum rspamd_mime_parse_error -rspamd_mime_process_multipart_node (struct rspamd_task *task, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_part *multipart, - const gchar *start, const gchar *end, - gboolean is_finished, - GError **err) +rspamd_mime_process_multipart_node(struct rspamd_task *task, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_part *multipart, + const gchar *start, const gchar *end, + gboolean is_finished, + GError **err) { struct rspamd_content_type *ct, *sel = NULL; struct rspamd_mime_header *hdr = NULL, *cur; @@ -815,7 +871,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL; - str.str = (gchar *)start; + str.str = (gchar *) start; str.len = end - start; if (*start == '\n' || *start == '\r') { @@ -833,11 +889,11 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, gboolean seen_something = FALSE; while (p < end) { - if (g_ascii_isalnum (*p)) { + if (g_ascii_isalnum(*p)) { seen_something = TRUE; break; } - p ++; + p++; } if (!seen_something) { @@ -846,21 +902,21 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, } } else { - hdr_pos = rspamd_string_find_eoh (&str, &body_pos); + hdr_pos = rspamd_string_find_eoh(&str, &body_pos); } - npart = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_part)); + npart = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_part)); npart->parent_part = multipart; - npart->raw_headers = rspamd_message_headers_new (); + npart->raw_headers = rspamd_message_headers_new(); npart->headers_order = NULL; if (multipart) { if (multipart->specific.mp->children == NULL) { - multipart->specific.mp->children = g_ptr_array_sized_new (2); + multipart->specific.mp->children = g_ptr_array_sized_new(2); } - g_ptr_array_add (multipart->specific.mp->children, npart); + g_ptr_array_add(multipart->specific.mp->children, npart); } if (hdr_pos > 0 && hdr_pos < str.len) { @@ -870,21 +926,20 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, npart->raw_data.len = (end - start) - body_pos; if (npart->raw_headers_len > 0) { - rspamd_mime_headers_process (task, npart->raw_headers, - &npart->headers_order, - npart->raw_headers_str, - npart->raw_headers_len, - FALSE); + rspamd_mime_headers_process(task, npart->raw_headers, + &npart->headers_order, + npart->raw_headers_str, + npart->raw_headers_len, + FALSE); /* Preserve the natural order */ if (npart->headers_order) { - LL_REVERSE2 (npart->headers_order, ord_next); + LL_REVERSE2(npart->headers_order, ord_next); } } hdr = rspamd_message_get_header_from_hash(npart->raw_headers, - "Content-Type", FALSE); - + "Content-Type", FALSE); } else { npart->raw_headers_str = 0; @@ -896,9 +951,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, if (hdr != NULL) { - DL_FOREACH (hdr, cur) { - ct = rspamd_content_type_parse (cur->value, strlen (cur->value), - task->task_pool); + DL_FOREACH(hdr, cur) + { + ct = rspamd_content_type_parse(cur->value, strlen(cur->value), + task->task_pool); /* Here we prefer multipart content-type or any content-type */ if (ct) { @@ -913,46 +969,45 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, } if (sel == NULL) { - sel = rspamd_mempool_alloc0 (task->task_pool, sizeof (*sel)); - RSPAMD_FTOK_ASSIGN (&sel->type, "text"); - RSPAMD_FTOK_ASSIGN (&sel->subtype, "plain"); + sel = rspamd_mempool_alloc0(task->task_pool, sizeof(*sel)); + RSPAMD_FTOK_ASSIGN(&sel->type, "text"); + RSPAMD_FTOK_ASSIGN(&sel->subtype, "plain"); } npart->ct = sel; if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) { - st->nesting ++; - g_ptr_array_add (st->stack, npart); + st->nesting++; + g_ptr_array_add(st->stack, npart); npart->part_type = RSPAMD_MIME_PART_MULTIPART; - npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_multipart)); - memcpy (&npart->specific.mp->boundary, &sel->orig_boundary, - sizeof (rspamd_ftok_t)); - ret = rspamd_mime_parse_multipart_part (task, npart, st, err); + npart->specific.mp = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_multipart)); + memcpy(&npart->specific.mp->boundary, &sel->orig_boundary, + sizeof(rspamd_ftok_t)); + ret = rspamd_mime_parse_multipart_part(task, npart, st, err); } else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) { - st->nesting ++; - g_ptr_array_add (st->stack, npart); + st->nesting++; + g_ptr_array_add(st->stack, npart); npart->part_type = RSPAMD_MIME_PART_MESSAGE; - if ((ret = rspamd_mime_parse_normal_part (task, npart, st, sel, err)) - == RSPAMD_MIME_PARSE_OK) { - ret = rspamd_mime_parse_message (task, npart, st, err); + if ((ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err)) == RSPAMD_MIME_PARSE_OK) { + ret = rspamd_mime_parse_message(task, npart, st, err); } } else { - ret = rspamd_mime_parse_normal_part (task, npart, st, sel, err); + ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err); } return ret; } static enum rspamd_mime_parse_error -rspamd_mime_parse_multipart_cb (struct rspamd_task *task, - struct rspamd_mime_part *multipart, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_multipart_cbdata *cb, - struct rspamd_mime_boundary *b) +rspamd_mime_parse_multipart_cb(struct rspamd_task *task, + struct rspamd_mime_part *multipart, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_multipart_cbdata *cb, + struct rspamd_mime_boundary *b) { const gchar *pos = st->start + b->boundary; enum rspamd_mime_parse_error ret; @@ -971,9 +1026,8 @@ rspamd_mime_parse_multipart_cb (struct rspamd_task *task, */ if (cb->part_start < pos && cb->cur_boundary) { - if ((ret = rspamd_mime_process_multipart_node (task, cb->st, - cb->multipart, cb->part_start, pos, TRUE, cb->err)) - != RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_process_multipart_node(task, cb->st, + cb->multipart, cb->part_start, pos, TRUE, cb->err)) != RSPAMD_MIME_PARSE_OK) { return ret; } @@ -992,10 +1046,10 @@ rspamd_mime_parse_multipart_cb (struct rspamd_task *task, } static enum rspamd_mime_parse_error -rspamd_multipart_boundaries_filter (struct rspamd_task *task, - struct rspamd_mime_part *multipart, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_multipart_cbdata *cb) +rspamd_multipart_boundaries_filter(struct rspamd_task *task, + struct rspamd_mime_part *multipart, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_multipart_cbdata *cb) { struct rspamd_mime_boundary *cur; goffset last_offset; @@ -1003,17 +1057,17 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, enum rspamd_mime_parse_error ret; last_offset = (multipart->raw_data.begin - st->start) + - multipart->raw_data.len; + multipart->raw_data.len; /* Find the first offset suitable for this part */ - for (i = 0; i < st->boundaries->len; i ++) { - cur = &g_array_index (st->boundaries, struct rspamd_mime_boundary, i); + for (i = 0; i < st->boundaries->len; i++) { + cur = &g_array_index(st->boundaries, struct rspamd_mime_boundary, i); if (cur->start >= multipart->raw_data.begin - st->start) { if (cb->cur_boundary) { /* Check boundary */ - msg_debug_mime ("compare %L and %L (and %L)", - cb->bhash, cur->hash, cur->closed_hash); + msg_debug_mime("compare %L and %L (and %L)", + cb->bhash, cur->hash, cur->closed_hash); if (cb->bhash == cur->hash) { sel = i; @@ -1029,8 +1083,8 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, } else { /* Set current boundary */ - cb->cur_boundary = rspamd_mempool_alloc (task->task_pool, - sizeof (rspamd_ftok_t)); + cb->cur_boundary = rspamd_mempool_alloc(task->task_pool, + sizeof(rspamd_ftok_t)); cb->cur_boundary->begin = st->start + cur->boundary; cb->cur_boundary->len = 0; cb->bhash = cur->hash; @@ -1041,16 +1095,16 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, } /* Now we can go forward with boundaries that are same to what we have */ - for (i = sel; i < st->boundaries->len; i ++) { - cur = &g_array_index (st->boundaries, struct rspamd_mime_boundary, i); + for (i = sel; i < st->boundaries->len; i++) { + cur = &g_array_index(st->boundaries, struct rspamd_mime_boundary, i); if (cur->boundary > last_offset) { break; } if (cur->hash == cb->bhash || cur->closed_hash == cb->bhash) { - if ((ret = rspamd_mime_parse_multipart_cb (task, multipart, st, - cb, cur)) != RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_parse_multipart_cb(task, multipart, st, + cb, cur)) != RSPAMD_MIME_PARSE_OK) { return ret; } @@ -1060,11 +1114,11 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, cur->hash = cur->closed_hash; } - if (RSPAMD_BOUNDARY_IS_CLOSED (cur)) { + if (RSPAMD_BOUNDARY_IS_CLOSED(cur)) { /* We also might check the next boundary... */ if (i < st->boundaries->len - 1) { - cur = &g_array_index (st->boundaries, - struct rspamd_mime_boundary, i + 1); + cur = &g_array_index(st->boundaries, + struct rspamd_mime_boundary, i + 1); if (cur->hash == cb->bhash) { continue; @@ -1089,8 +1143,8 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, fb.boundary = last_offset; fb.start = -1; - if ((ret = rspamd_mime_parse_multipart_cb (task, multipart, st, - cb, &fb)) != RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_parse_multipart_cb(task, multipart, st, + cb, &fb)) != RSPAMD_MIME_PARSE_OK) { return ret; } } @@ -1099,25 +1153,25 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, } static enum rspamd_mime_parse_error -rspamd_mime_parse_multipart_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err) +rspamd_mime_parse_multipart_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err) { struct rspamd_mime_multipart_cbdata cbdata; enum rspamd_mime_parse_error ret; if (st->nesting > max_nested) { - g_set_error (err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", - st->nesting); + g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", + st->nesting); return RSPAMD_MIME_PARSE_NESTING; } - part->part_number = MESSAGE_FIELD (task, parts)->len; - part->urls = g_ptr_array_new (); - g_ptr_array_add (MESSAGE_FIELD (task, parts), part); - st->nesting ++; - rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE); + part->part_number = MESSAGE_FIELD(task, parts)->len; + part->urls = g_ptr_array_new(); + g_ptr_array_add(MESSAGE_FIELD(task, parts), part); + st->nesting++; + rspamd_mime_part_get_cte(task, part->raw_headers, part, FALSE); st->pos = part->raw_data.begin; cbdata.multipart = part; @@ -1129,10 +1183,10 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, if (part->ct->boundary.len > 0) { /* We know our boundary */ cbdata.cur_boundary = &part->ct->boundary; - rspamd_cryptobox_siphash ((guchar *)&cbdata.bhash, - cbdata.cur_boundary->begin, cbdata.cur_boundary->len, - lib_ctx->hkey); - msg_debug_mime ("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash); + rspamd_cryptobox_siphash((guchar *) &cbdata.bhash, + cbdata.cur_boundary->begin, cbdata.cur_boundary->len, + lib_ctx->hkey); + msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash); } else { /* Guess boundary */ @@ -1140,23 +1194,23 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, cbdata.bhash = 0; } - ret = rspamd_multipart_boundaries_filter (task, part, st, &cbdata); + ret = rspamd_multipart_boundaries_filter(task, part, st, &cbdata); /* Cleanup stack */ - st->nesting --; - g_ptr_array_remove_index_fast (st->stack, st->stack->len - 1); + st->nesting--; + g_ptr_array_remove_index_fast(st->stack, st->stack->len - 1); return ret; } /* Process boundary like structures in a message */ static gint -rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, - guint strnum, - gint match_start, - gint match_pos, - const gchar *text, - gsize len, - void *context) +rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp, + guint strnum, + gint match_start, + gint match_pos, + const gchar *text, + gsize len, + void *context) { const gchar *end = text + len, *p = text + match_pos, *bend; gsize blen; @@ -1167,7 +1221,7 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, task = st->task; - if (G_LIKELY (p < end)) { + if (G_LIKELY(p < end)) { blen = 0; @@ -1176,8 +1230,8 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, break; } - blen ++; - p ++; + blen++; + p++; } if (blen > 0) { @@ -1189,34 +1243,34 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, /* We need to verify last -- */ if (bend > p + 1 && *(bend - 1) == '-') { closing = TRUE; - bend --; + bend--; blen -= 2; } else { /* Not a closing boundary somehow, e.g. if a boundary=='-' */ - bend ++; + bend++; } } else { - bend ++; + bend++; } while (bend < end) { if (*bend == '\r') { - bend ++; + bend++; /* \r\n */ if (bend < end && *bend == '\n') { - bend ++; + bend++; } } else if (*bend == '\n') { /* \n */ - bend ++; + bend++; } - else if (g_ascii_isspace (*bend)){ + else if (g_ascii_isspace(*bend)) { /* Spaces in the same line, skip them */ - bend ++; + bend++; continue; } @@ -1233,32 +1287,32 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, lc_copy = lc_copy_buf; } else { - lc_copy = g_malloc (blen + 2); + lc_copy = g_malloc(blen + 2); } if (closing) { - memcpy (lc_copy, p, blen + 2); - rspamd_str_lc (lc_copy, blen + 2); + memcpy(lc_copy, p, blen + 2); + rspamd_str_lc(lc_copy, blen + 2); } else { - memcpy (lc_copy, p, blen); - rspamd_str_lc (lc_copy, blen); + memcpy(lc_copy, p, blen); + rspamd_str_lc(lc_copy, blen); } - rspamd_cryptobox_siphash ((guchar *)&b.hash, lc_copy, blen, - lib_ctx->hkey); - msg_debug_mime ("normal hash: %*s -> %L, %d boffset, %d data offset", - (gint)blen, lc_copy, b.hash, (int)b.boundary, (int)b.start); + rspamd_cryptobox_siphash((guchar *) &b.hash, lc_copy, blen, + lib_ctx->hkey); + msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset", + (gint) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start); if (closing) { b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED; - rspamd_cryptobox_siphash ((guchar *)&b.closed_hash, lc_copy, - blen + 2, - lib_ctx->hkey); - msg_debug_mime ("closing hash: %*s -> %L, %d boffset, %d data offset", - (gint)blen + 2, lc_copy, - b.closed_hash, - (int)b.boundary, (int)b.start); + rspamd_cryptobox_siphash((guchar *) &b.closed_hash, lc_copy, + blen + 2, + lib_ctx->hkey); + msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset", + (gint) blen + 2, lc_copy, + b.closed_hash, + (int) b.boundary, (int) b.start); } else { b.flags = 0; @@ -1269,7 +1323,7 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, if (blen + 2 >= sizeof(lc_copy_buf)) { g_free(lc_copy); } - g_array_append_val (st->boundaries, b); + g_array_append_val(st->boundaries, b); } } @@ -1277,10 +1331,10 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, } static goffset -rspamd_mime_parser_headers_heuristic (GString *input, goffset *body_start) +rspamd_mime_parser_headers_heuristic(GString *input, goffset *body_start) { const gsize default_max_len = 76; - gsize max_len = MIN (input->len, default_max_len); + gsize max_len = MIN(input->len, default_max_len); const gchar *p, *end; enum { st_before_colon = 0, @@ -1296,29 +1350,29 @@ rspamd_mime_parser_headers_heuristic (GString *input, goffset *body_start) while (p < end) { switch (state) { case st_before_colon: - if (G_UNLIKELY (*p == ':')) { + if (G_UNLIKELY(*p == ':')) { state = st_colon; } - else if (G_UNLIKELY (!g_ascii_isgraph (*p))) { + else if (G_UNLIKELY(!g_ascii_isgraph(*p))) { state = st_error; } - p ++; + p++; break; case st_colon: - if (g_ascii_isspace (*p)) { + if (g_ascii_isspace(*p)) { state = st_spaces_after_colon; } else { state = st_value; } - p ++; + p++; break; case st_spaces_after_colon: - if (!g_ascii_isspace (*p)) { + if (!g_ascii_isspace(*p)) { state = st_value; } - p ++; + p++; break; case st_value: /* We accept any value */ @@ -1343,40 +1397,40 @@ end: } static void -rspamd_mime_preprocess_message (struct rspamd_task *task, - struct rspamd_mime_part *top, - struct rspamd_mime_parser_ctx *st) +rspamd_mime_preprocess_message(struct rspamd_task *task, + struct rspamd_mime_part *top, + struct rspamd_mime_parser_ctx *st) { if (top->raw_data.begin >= st->pos) { - rspamd_multipattern_lookup (lib_ctx->mp_boundary, - top->raw_data.begin - 1, - top->raw_data.len + 1, - rspamd_mime_preprocess_cb, st, NULL); + rspamd_multipattern_lookup(lib_ctx->mp_boundary, + top->raw_data.begin - 1, + top->raw_data.len + 1, + rspamd_mime_preprocess_cb, st, NULL); } else { - rspamd_multipattern_lookup (lib_ctx->mp_boundary, - st->pos, - st->end - st->pos, - rspamd_mime_preprocess_cb, st, NULL); + rspamd_multipattern_lookup(lib_ctx->mp_boundary, + st->pos, + st->end - st->pos, + rspamd_mime_preprocess_cb, st, NULL); } } static void -rspamd_mime_parse_stack_free (struct rspamd_mime_parser_ctx *st) +rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st) { if (st) { - g_ptr_array_free (st->stack, TRUE); - g_array_free (st->boundaries, TRUE); - g_free (st); + g_ptr_array_free(st->stack, TRUE); + g_array_free(st->boundaries, TRUE); + g_free(st); } } static enum rspamd_mime_parse_error -rspamd_mime_parse_message (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err) +rspamd_mime_parse_message(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err) { struct rspamd_content_type *ct, *sel = NULL; struct rspamd_mime_header *hdr = NULL, *cur; @@ -1390,80 +1444,80 @@ rspamd_mime_parse_message (struct rspamd_task *task, struct rspamd_mime_parser_ctx *nst = st; if (st->nesting > max_nested) { - g_set_error (err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", - st->nesting); + g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", + st->nesting); return RSPAMD_MIME_PARSE_NESTING; } /* Allocate real part */ - npart = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_part)); + npart = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_part)); if (part == NULL) { /* Top level message */ p = task->msg.begin; len = task->msg.len; - str.str = (gchar *)p; + str.str = (gchar *) p; str.len = len; - hdr_pos = rspamd_string_find_eoh (&str, &body_pos); + hdr_pos = rspamd_string_find_eoh(&str, &body_pos); if (hdr_pos > 0 && hdr_pos < str.len) { - MESSAGE_FIELD (task, raw_headers_content).begin = str.str; - MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; - MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + body_pos; + MESSAGE_FIELD(task, raw_headers_content).begin = str.str; + MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD(task, raw_headers_content).body_start = str.str + body_pos; - if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { - rspamd_mime_headers_process (task, - MESSAGE_FIELD (task, raw_headers), - &MESSAGE_FIELD (task, headers_order), - MESSAGE_FIELD (task, raw_headers_content).begin, - MESSAGE_FIELD (task, raw_headers_content).len, - TRUE); - npart->raw_headers = rspamd_message_headers_ref ( - MESSAGE_FIELD (task, raw_headers)); + if (MESSAGE_FIELD(task, raw_headers_content).len > 0) { + rspamd_mime_headers_process(task, + MESSAGE_FIELD(task, raw_headers), + &MESSAGE_FIELD(task, headers_order), + MESSAGE_FIELD(task, raw_headers_content).begin, + MESSAGE_FIELD(task, raw_headers_content).len, + TRUE); + npart->raw_headers = rspamd_message_headers_ref( + MESSAGE_FIELD(task, raw_headers)); /* Preserve the natural order */ - if (MESSAGE_FIELD (task, headers_order)) { - LL_REVERSE2 (MESSAGE_FIELD (task, headers_order), ord_next); + if (MESSAGE_FIELD(task, headers_order)) { + LL_REVERSE2(MESSAGE_FIELD(task, headers_order), ord_next); } } hdr = rspamd_message_get_header_from_hash( - MESSAGE_FIELD (task, raw_headers), - "Content-Type", FALSE); + MESSAGE_FIELD(task, raw_headers), + "Content-Type", FALSE); } else { /* First apply heuristic, maybe we have just headers */ - hdr_pos = rspamd_mime_parser_headers_heuristic (&str, &body_pos); + hdr_pos = rspamd_mime_parser_headers_heuristic(&str, &body_pos); if (hdr_pos > 0 && hdr_pos <= str.len) { - MESSAGE_FIELD (task, raw_headers_content).begin = str.str; - MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; - MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + - body_pos; - - if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { - rspamd_mime_headers_process (task, - MESSAGE_FIELD (task, raw_headers), - &MESSAGE_FIELD (task, headers_order), - MESSAGE_FIELD (task, raw_headers_content).begin, - MESSAGE_FIELD (task, raw_headers_content).len, - TRUE); - npart->raw_headers = rspamd_message_headers_ref ( - MESSAGE_FIELD (task, raw_headers)); + MESSAGE_FIELD(task, raw_headers_content).begin = str.str; + MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD(task, raw_headers_content).body_start = str.str + + body_pos; + + if (MESSAGE_FIELD(task, raw_headers_content).len > 0) { + rspamd_mime_headers_process(task, + MESSAGE_FIELD(task, raw_headers), + &MESSAGE_FIELD(task, headers_order), + MESSAGE_FIELD(task, raw_headers_content).begin, + MESSAGE_FIELD(task, raw_headers_content).len, + TRUE); + npart->raw_headers = rspamd_message_headers_ref( + MESSAGE_FIELD(task, raw_headers)); /* Preserve the natural order */ - if (MESSAGE_FIELD (task, headers_order)) { - LL_REVERSE2 (MESSAGE_FIELD (task, headers_order), ord_next); + if (MESSAGE_FIELD(task, headers_order)) { + LL_REVERSE2(MESSAGE_FIELD(task, headers_order), ord_next); } } hdr = rspamd_message_get_header_from_hash( - MESSAGE_FIELD (task, raw_headers), - "Content-Type", FALSE); + MESSAGE_FIELD(task, raw_headers), + "Content-Type", FALSE); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } else { @@ -1480,22 +1534,22 @@ rspamd_mime_parse_message (struct rspamd_task *task, * Here are dragons: * We allocate new parser context as we need to shift pointers */ - nst = g_malloc0 (sizeof (*st)); - nst->stack = g_ptr_array_sized_new (4); - nst->boundaries = g_array_sized_new (FALSE, FALSE, - sizeof (struct rspamd_mime_boundary), 8); + nst = g_malloc0(sizeof(*st)); + nst->stack = g_ptr_array_sized_new(4); + nst->boundaries = g_array_sized_new(FALSE, FALSE, + sizeof(struct rspamd_mime_boundary), 8); nst->start = part->parsed_data.begin; nst->end = nst->start + part->parsed_data.len; nst->pos = nst->start; nst->task = st->task; nst->nesting = st->nesting; - st->nesting ++; + st->nesting++; - str.str = (gchar *)part->parsed_data.begin; + str.str = (gchar *) part->parsed_data.begin; str.len = part->parsed_data.len; - hdr_pos = rspamd_string_find_eoh (&str, &body_pos); - npart->raw_headers = rspamd_message_headers_new (); + hdr_pos = rspamd_string_find_eoh(&str, &body_pos); + npart->raw_headers = rspamd_message_headers_new(); npart->headers_order = NULL; if (hdr_pos > 0 && hdr_pos < str.len) { @@ -1504,21 +1558,21 @@ rspamd_mime_parse_message (struct rspamd_task *task, npart->raw_data.begin = str.str + body_pos; if (npart->raw_headers_len > 0) { - rspamd_mime_headers_process (task, - npart->raw_headers, - &npart->headers_order, - npart->raw_headers_str, - npart->raw_headers_len, - FALSE); + rspamd_mime_headers_process(task, + npart->raw_headers, + &npart->headers_order, + npart->raw_headers_str, + npart->raw_headers_len, + FALSE); /* Preserve the natural order */ if (npart->headers_order) { - LL_REVERSE2 (npart->headers_order, ord_next); + LL_REVERSE2(npart->headers_order, ord_next); } } hdr = rspamd_message_get_header_from_hash(npart->raw_headers, - "Content-Type", FALSE); + "Content-Type", FALSE); } else { body_pos = 0; @@ -1536,9 +1590,10 @@ rspamd_mime_parse_message (struct rspamd_task *task, sel = NULL; } else { - DL_FOREACH (hdr, cur) { - ct = rspamd_content_type_parse (cur->value, strlen (cur->value), - task->task_pool); + DL_FOREACH(hdr, cur) + { + ct = rspamd_content_type_parse(cur->value, strlen(cur->value), + task->task_pool); /* Here we prefer multipart content-type or any content-type */ if (ct) { @@ -1554,40 +1609,39 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (sel == NULL) { /* For messages we automatically assume plaintext */ - msg_info_task ("cannot find content-type for a message, assume text/plain"); - sel = rspamd_mempool_alloc0 (task->task_pool, sizeof (*sel)); - sel->flags = RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_MISSING; - RSPAMD_FTOK_ASSIGN (&sel->type, "text"); - RSPAMD_FTOK_ASSIGN (&sel->subtype, "plain"); + msg_info_task("cannot find content-type for a message, assume text/plain"); + sel = rspamd_mempool_alloc0(task->task_pool, sizeof(*sel)); + sel->flags = RSPAMD_CONTENT_TYPE_TEXT | RSPAMD_CONTENT_TYPE_MISSING; + RSPAMD_FTOK_ASSIGN(&sel->type, "text"); + RSPAMD_FTOK_ASSIGN(&sel->subtype, "plain"); } npart->ct = sel; if ((part == NULL || nst != st) && - (sel->flags & (RSPAMD_CONTENT_TYPE_MULTIPART|RSPAMD_CONTENT_TYPE_MESSAGE))) { + (sel->flags & (RSPAMD_CONTENT_TYPE_MULTIPART | RSPAMD_CONTENT_TYPE_MESSAGE))) { /* Not a trivial message, need to preprocess */ - rspamd_mime_preprocess_message (task, npart, nst); + rspamd_mime_preprocess_message(task, npart, nst); } if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) { - g_ptr_array_add (nst->stack, npart); - nst->nesting ++; + g_ptr_array_add(nst->stack, npart); + nst->nesting++; npart->part_type = RSPAMD_MIME_PART_MULTIPART; - npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_multipart)); - memcpy (&npart->specific.mp->boundary, &sel->orig_boundary, - sizeof (rspamd_ftok_t)); - ret = rspamd_mime_parse_multipart_part (task, npart, nst, err); + npart->specific.mp = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_multipart)); + memcpy(&npart->specific.mp->boundary, &sel->orig_boundary, + sizeof(rspamd_ftok_t)); + ret = rspamd_mime_parse_multipart_part(task, npart, nst, err); } else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) { - if ((ret = rspamd_mime_parse_normal_part (task, npart, nst, sel, err)) - == RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err)) == RSPAMD_MIME_PARSE_OK) { npart->part_type = RSPAMD_MIME_PART_MESSAGE; - ret = rspamd_mime_parse_message (task, npart, nst, err); + ret = rspamd_mime_parse_message(task, npart, nst, err); } } else { - ret = rspamd_mime_parse_normal_part (task, npart, nst, sel, err); + ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err); } if (ret != RSPAMD_MIME_PARSE_OK) { @@ -1596,28 +1650,28 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (part && st->stack->len > 0) { /* Remove message part from the parent stack */ - g_ptr_array_remove_index_fast (st->stack, st->stack->len - 1); - st->nesting --; + g_ptr_array_remove_index_fast(st->stack, st->stack->len - 1); + st->nesting--; } /* Process leftovers for boundaries */ if (nst->boundaries) { struct rspamd_mime_boundary *boundary, *start_boundary = NULL, - *end_boundary = NULL; + *end_boundary = NULL; goffset cur_offset = nst->pos - nst->start, - end_offset = st->end - st->start; + end_offset = st->end - st->start; guint sel_idx = 0; for (;;) { start_boundary = NULL; for (i = sel_idx; i < nst->boundaries->len; i++) { - boundary = &g_array_index (nst->boundaries, - struct rspamd_mime_boundary, i); + boundary = &g_array_index(nst->boundaries, + struct rspamd_mime_boundary, i); if (boundary->start > cur_offset && boundary->boundary < end_offset && - !RSPAMD_BOUNDARY_IS_CLOSED (boundary)) { + !RSPAMD_BOUNDARY_IS_CLOSED(boundary)) { start_boundary = boundary; sel_idx = i; break; @@ -1628,24 +1682,24 @@ rspamd_mime_parse_message (struct rspamd_task *task, const gchar *start, *end; if (nst->boundaries->len > sel_idx + 1) { - end_boundary = &g_array_index (nst->boundaries, - struct rspamd_mime_boundary, sel_idx + 1); + end_boundary = &g_array_index(nst->boundaries, + struct rspamd_mime_boundary, sel_idx + 1); end = nst->start + end_boundary->boundary; } else { end = nst->end; } - sel_idx ++; + sel_idx++; start = nst->start + start_boundary->start; if (end > start && - (ret = rspamd_mime_process_multipart_node (task, nst, - NULL, start, end, FALSE, err)) != RSPAMD_MIME_PARSE_OK) { + (ret = rspamd_mime_process_multipart_node(task, nst, + NULL, start, end, FALSE, err)) != RSPAMD_MIME_PARSE_OK) { if (nst != st) { - rspamd_mime_parse_stack_free (nst); + rspamd_mime_parse_stack_free(nst); } if (ret == RSPAMD_MIME_PARSE_NO_PART) { @@ -1662,34 +1716,34 @@ rspamd_mime_parse_message (struct rspamd_task *task, } if (nst != st) { - rspamd_mime_parse_stack_free (nst); + rspamd_mime_parse_stack_free(nst); } return ret; } enum rspamd_mime_parse_error -rspamd_mime_parse_task (struct rspamd_task *task, GError **err) +rspamd_mime_parse_task(struct rspamd_task *task, GError **err) { struct rspamd_mime_parser_ctx *st; enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK; if (lib_ctx == NULL) { - rspamd_mime_parser_init_lib (); + rspamd_mime_parser_init_lib(); } if (++lib_ctx->key_usages > max_key_usages) { /* Regenerate siphash key */ - ottery_rand_bytes (lib_ctx->hkey, sizeof (lib_ctx->hkey)); + ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey)); lib_ctx->key_usages = 0; } - st = g_malloc0 (sizeof (*st)); - st->stack = g_ptr_array_sized_new (4); - st->pos = MESSAGE_FIELD (task, raw_headers_content).body_start; + st = g_malloc0(sizeof(*st)); + st->stack = g_ptr_array_sized_new(4); + st->pos = MESSAGE_FIELD(task, raw_headers_content).body_start; st->end = task->msg.begin + task->msg.len; - st->boundaries = g_array_sized_new (FALSE, FALSE, - sizeof (struct rspamd_mime_boundary), 8); + st->boundaries = g_array_sized_new(FALSE, FALSE, + sizeof(struct rspamd_mime_boundary), 8); st->task = task; if (st->pos == NULL) { @@ -1697,8 +1751,8 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err) } st->start = task->msg.begin; - ret = rspamd_mime_parse_message (task, NULL, st, err); - rspamd_mime_parse_stack_free (st); + ret = rspamd_mime_parse_message(task, NULL, st, err); + rspamd_mime_parse_stack_free(st); return ret; } |